{-# LANGUAGE OverloadedStrings #-}
module Text.Unicoder (
      unicodize
    , unicodizeStr
    , Config
    , locateConfig
    , loadConfig
    , parseConfig
    ) where

import System.IO
import Paths_unicoder

import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.IO as T
import Data.Attoparsec.Text
import Data.Attoparsec.Combinator

import Data.Either
import Data.Maybe
import Data.List
import Data.Monoid
import Control.Applicative
import Control.Monad


{-| Perform the unicoder transformation on a 'Text' value. -}
unicodize :: Config -> Text -> Text
unicodize config input = case parseOnly (xform config) input of
    Left err -> error "unicoder: internal error"
    Right val -> val

{-| Perform the unicoder transformation on a 'String' value. -}
unicodizeStr :: Config -> String -> String
unicodizeStr config = T.unpack . unicodize config . T.pack


{-| Aggregate all settings needed to unicodize. -}
data Config = Config { _fromFile     :: FilePath
                     , _idChars      :: Char -> Bool
                     , _beginMark    :: Text
                     , _endMark      :: Maybe Text
                     , _betweenMarks :: Maybe (Text, Text)
                     , _macros0      :: [(Text, Text)]
                     , _macros1      :: [(Text, (Text, Text))]
                     }

{-| Determine the filesystem location of a config file path.
    If the path does not include a slash, then it is resolved using
    the unicoder built-in locations. If it does include a slash, then
    it is resolved relative to the passed working directory.
-}
locateConfig :: FilePath -- ^ the working directory
             -> FilePath -- ^ the config path to resolve
             -> IO FilePath -- ^ resolved, absolute path to the file
locateConfig cwd path | "/" `isPrefixOf` path = return path
                      | '/' `elem` path = return $
                        if "/" `isSuffixOf` cwd
                            then cwd ++ path
                            else cwd ++ "/" ++ path
                      | otherwise = getDataFileName (path ++ ".conf")

{-| Parse a config file, possibly failing. -}
parseConfig :: FilePath -> Text -> Maybe Config
parseConfig path contents = case removeBlanks $ T.lines contents of
    [] -> Nothing
    (lexer:raw_macros) -> do
        let emptyConfig = Config { _fromFile = path
                                 , _idChars = undefined
                                 , _beginMark = "\\"
                                 , _endMark = Nothing
                                 , _betweenMarks = Nothing
                                 , _macros0 = [], _macros1 = []
                                 }
        lexerConfig <- case removeBlanks $ T.splitOn " " lexer of
            [idChars] -> return $ emptyConfig { _idChars = inClass (T.unpack idChars) }
            [begin, idChars] -> return $
                emptyConfig { _idChars = inClass (T.unpack idChars)
                            , _beginMark = begin
                            }
            [begin, end, idChars] -> return $
                emptyConfig { _idChars = inClass (T.unpack idChars)
                            , _beginMark = begin
                            , _endMark = Just end
                            }
            [begin, end, open, close, idChars] -> return $
                emptyConfig { _idChars = inClass (T.unpack idChars)
                            , _beginMark = begin
                            , _endMark = Just end
                            , _betweenMarks = Just (open, close)
                            }
            _ -> Nothing
        let (macros0, macros1) = partitionEithers . catMaybes $ parseMacro <$> raw_macros
        Just $ lexerConfig { _macros0 = macros0, _macros1 = macros1 }
    where
    parseMacro :: T.Text -> Maybe (Either (Text, Text) (Text, (Text, Text)))
    parseMacro input = case T.words input of
        [k, v] -> Just $ Left (k, v)
        [k, v1, v2] -> Just $ Right (k, (v1, v2))
        _ -> Nothing

{-| Parse the contents of the passed file as  unicoder `Config` -}
loadConfig :: FilePath -> IO (Maybe Config)
loadConfig path = do
    contents <- T.readFile path
    return $ parseConfig path contents


{-| The unicoder algorithm, implemented as an Attoparsec combinator. -}
xform :: Config -> Parser Text
xform config = mconcat <$> many (passthrough <|> macro <|> strayBegin) <* endOfInput
    where
    (beginStr, beginChr) = (_beginMark config, T.head beginStr)
    passthrough = takeWhile1 (/= beginChr)
    macro = do
        string beginStr
        full <|> half
    strayBegin = T.singleton <$> char beginChr
    full = do
        name <- takeWhile1 (_idChars config)
        mono name <|> di name
        where
        mono name = do
            replace <- name `lookupM` _macros0 config
            endMark
            return replace
        di name = do
            (open, close) <- betweenMarks
            (rOpen, rClose) <- name `lookupM` _macros1 config
            string open
            inner <- T.pack <$> anyChar `manyTill` string close
            return $ rOpen <> recurse inner <> rClose
    half = do
        (open, close) <- betweenMarks
        which <- (const fst <$> string open) <|> (const snd <$> string close)
        name <- takeWhile1 (_idChars config)
        replace <- which <$> name `lookupM` _macros1 config
        endMark
        return replace
    endMark = case _endMark config of
        Nothing -> return ()
        Just end -> option () $ void (string end)
    betweenMarks = case _betweenMarks config of
        Nothing -> fail "" 
        Just x -> return x
    lookupM k v = maybe (fail "") return (k `lookup` v)
    recurse = unicodize config

removeBlanks :: [Text] -> [Text]
removeBlanks = filter (not . T.null)

{-TODO
a config lint 
    characters don't appear twice in the lexer
    open and end are distinguishable
    macros are defined only using idChars
    macro lines are word-length 2 or 3
-}