module Text.Unicoder (
unicodize
, unicodizeLazy
, unicodizeStr
, Config
, loadConfig
, parseConfig
) where
import System.IO
import System.FilePath
import System.Directory
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.IO as T
import Data.Attoparsec.Text
import Data.Attoparsec.Combinator
import Data.Either
import Data.Maybe
import Data.List
import Data.Monoid
import Control.Applicative
import Control.Monad
unicodize :: Config -> Text -> Text
unicodize config input = case parseOnly (xform config) input of
Left err -> error "unicoder: internal error"
Right val -> val
unicodizeLazy :: Config -> TL.Text -> TL.Text
unicodizeLazy config = TL.fromStrict . unicodize config . TL.toStrict
unicodizeStr :: Config -> String -> String
unicodizeStr config = T.unpack . unicodize config . T.pack
data Config = Config
{ _fromFile :: FilePath
, _idChars :: Char -> Bool
, _beginMark :: Text
, _endMark :: Maybe Text
, _betweenMarks :: Maybe (Text, Text)
, _macros0 :: [(Text, Text)]
, _macros1 :: [(Text, (Text, Text))]
}
parseConfig :: FilePath -> Text -> Maybe Config
parseConfig path contents = case removeBlanks $ T.lines contents of
[] -> Nothing
(lexer:raw_macros) -> do
let empty = Config
{ _fromFile = path
, _idChars = undefined
, _beginMark = "\\"
, _endMark = Nothing
, _betweenMarks = Nothing
, _macros0 = [], _macros1 = []
}
lexerConfig <- case removeBlanks $ T.splitOn " " lexer of
[idChars] -> return $
empty { _idChars = inClass (T.unpack idChars)
}
[begin, idChars] -> return $
empty { _idChars = inClass (T.unpack idChars)
, _beginMark = begin
}
[begin, end, idChars] -> return $
empty { _idChars = inClass (T.unpack idChars)
, _beginMark = begin
, _endMark = Just end
}
[begin, end, open, close, idChars] -> return $
empty { _idChars = inClass (T.unpack idChars)
, _beginMark = begin
, _endMark = Just end
, _betweenMarks = Just (open, close)
}
_ -> Nothing
let (macros0, macros1) = partitionEithers . catMaybes $ parseMacro <$> raw_macros
Just $ lexerConfig { _macros0 = macros0, _macros1 = macros1 }
where
parseMacro :: T.Text -> Maybe (Either (Text, Text) (Text, (Text, Text)))
parseMacro input = case T.words input of
[k, v] -> Just $ Left (k, v)
[k, v1, v2] -> Just $ Right (k, (v1, v2))
_ -> Nothing
loadConfig :: FilePath -> IO (Maybe Config)
loadConfig path = do
contents <- T.readFile path
return $ parseConfig path contents
xform :: Config -> Parser Text
xform config = mconcat <$> many (passthrough <|> macro <|> strayBegin) <* endOfInput
where
(beginStr, beginChr) = (_beginMark config, T.head beginStr)
passthrough = takeWhile1 (/= beginChr)
macro = do
string beginStr
full <|> half
strayBegin = T.singleton <$> char beginChr
full = do
name <- takeWhile1 (_idChars config)
mono name <|> di name
where
mono name = do
replace <- name `lookupM` _macros0 config
endMark
return replace
di name = do
(open, close) <- betweenMarks
(rOpen, rClose) <- name `lookupM` _macros1 config
string open
inner <- T.pack <$> anyChar `manyTill` string close
return $ rOpen <> recurse inner <> rClose
half = do
(open, close) <- betweenMarks
which <- (const fst <$> string open) <|> (const snd <$> string close)
name <- takeWhile1 (_idChars config)
replace <- which <$> name `lookupM` _macros1 config
endMark
return replace
endMark = case _endMark config of
Nothing -> return ()
Just end -> option () $ void (string end)
betweenMarks = case _betweenMarks config of
Nothing -> fail ""
Just x -> return x
lookupM k v = maybe (fail "") return (k `lookup` v)
recurse = unicodize config
removeBlanks :: [Text] -> [Text]
removeBlanks = filter (not . T.null)