{-|
Load unicoder configurations and oerform unicoder transformations on text and strings.

Unicoder replaces simple macros with configured strings, e.g.

> \E.x. \A.y. x \-> y
> \l.x,y. x \of x \of y

becomes

> ∃x ∀y x → y
> λx,y. x ∘ x ∘ y

For more information, see [the documentation](http://zankoku-okuno.viewdocs.io/unicoder/).
-}
module Text.Unicoder (
    -- * Unicoder Algorithm
      unicodize
    , unicodizeLazy
    , unicodizeStr
    -- * Configuration
    , Config
    , loadConfig
    , parseConfig
    ) where

import System.IO
import System.FilePath
import System.Directory

import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.IO as T
import Data.Attoparsec.Text
import Data.Attoparsec.Combinator

import Data.Either
import Data.Maybe
import Data.List
import Data.Monoid
import Control.Applicative
import Control.Monad


{-| Perform the unicoder transformation on a 'Text' value. -}
unicodize :: Config -> Text -> Text
unicodize config input = case parseOnly (xform config) input of
    Left err -> error "unicoder: internal error"
    Right val -> val

{-| Perform the unicoder transformation on a lazy 'TL.Text' value. -}
unicodizeLazy :: Config -> TL.Text -> TL.Text
unicodizeLazy config = TL.fromStrict . unicodize config . TL.toStrict

{-| Perform the unicoder transformation on a 'String' value. -}
unicodizeStr :: Config -> String -> String
unicodizeStr config = T.unpack . unicodize config . T.pack


{-| Aggregate all settings needed to unicodize. -}
data Config = Config
    { _fromFile     :: FilePath
    , _idChars      :: Char -> Bool
    , _beginMark    :: Text
    , _endMark      :: Maybe Text
    , _betweenMarks :: Maybe (Text, Text)
    , _macros0      :: [(Text, Text)]
    , _macros1      :: [(Text, (Text, Text))]
    }

{-| Parse a config file, possibly failing. -}
parseConfig :: FilePath -> Text -> Maybe Config
parseConfig path contents = case removeBlanks $ T.lines contents of
    [] -> Nothing
    (lexer:raw_macros) -> do
        let empty = Config
                    { _fromFile = path
                    , _idChars = undefined
                    , _beginMark = "\\"
                    , _endMark = Nothing
                    , _betweenMarks = Nothing
                    , _macros0 = [], _macros1 = []
                    }
        lexerConfig <- case removeBlanks $ T.splitOn " " lexer of
            [idChars] -> return $
                empty { _idChars = inClass (T.unpack idChars)
                      }
            [begin, idChars] -> return $
                empty { _idChars = inClass (T.unpack idChars)
                      , _beginMark = begin
                      }
            [begin, end, idChars] -> return $
                empty { _idChars = inClass (T.unpack idChars)
                      , _beginMark = begin
                      , _endMark = Just end
                      }
            [begin, end, open, close, idChars] -> return $
                empty { _idChars = inClass (T.unpack idChars)
                      , _beginMark = begin
                      , _endMark = Just end
                      , _betweenMarks = Just (open, close)
                      }
            _ -> Nothing
        let (macros0, macros1) = partitionEithers . catMaybes $ parseMacro <$> raw_macros
        Just $ lexerConfig { _macros0 = macros0, _macros1 = macros1 }
    where
    parseMacro :: T.Text -> Maybe (Either (Text, Text) (Text, (Text, Text)))
    parseMacro input = case T.words input of
        [k, v] -> Just $ Left (k, v)
        [k, v1, v2] -> Just $ Right (k, (v1, v2))
        _ -> Nothing

{-| Parse the contents of the passed file as  unicoder `Config` -}
loadConfig :: FilePath -> IO (Maybe Config)
loadConfig path = do
    contents <- T.readFile path
    return $ parseConfig path contents


{-| The unicoder algorithm, implemented as an Attoparsec combinator. -}
xform :: Config -> Parser Text
xform config = mconcat <$> many (passthrough <|> macro <|> strayBegin) <* endOfInput
    where
    (beginStr, beginChr) = (_beginMark config, T.head beginStr)
    passthrough = takeWhile1 (/= beginChr)
    macro = do
        string beginStr
        full <|> half
    strayBegin = T.singleton <$> char beginChr
    full = do
        name <- takeWhile1 (_idChars config)
        mono name <|> di name
        where
        mono name = do
            replace <- name `lookupM` _macros0 config
            endMark
            return replace
        di name = do
            (open, close) <- betweenMarks
            (rOpen, rClose) <- name `lookupM` _macros1 config
            string open
            inner <- T.pack <$> anyChar `manyTill` string close
            return $ rOpen <> recurse inner <> rClose
    half = do
        (open, close) <- betweenMarks
        which <- (const fst <$> string open) <|> (const snd <$> string close)
        name <- takeWhile1 (_idChars config)
        replace <- which <$> name `lookupM` _macros1 config
        endMark
        return replace
    endMark = case _endMark config of
        Nothing -> return ()
        Just end -> option () $ void (string end)
    betweenMarks = case _betweenMarks config of
        Nothing -> fail "" 
        Just x -> return x
    lookupM k v = maybe (fail "") return (k `lookup` v)
    recurse = unicodize config

removeBlanks :: [Text] -> [Text]
removeBlanks = filter (not . T.null)