module NLP.HistPL.Transliter
(
TrRules (..)
, transliter
, Parser
, ciString
, ciChar
, (#>)
, (>#>)
, (>+>)
, (.|)
, (.|.)
) where
import Control.Applicative ((<$>), (<*>), (<*))
import Control.Monad (msum)
import Data.Char (isUpper, toUpper, toLower)
import Text.Parsec hiding (Line)
type Parser = Parsec String ()
ciChar :: Char -> Parser Char
ciChar c = char (toLower c) <|> char (toUpper c)
ciString :: String -> Parser String
ciString = sequence . map ciChar
data Shape = Lower
| Capitalized
| Upper
getShape :: String -> Shape
getShape [] = Lower
getShape xs@(x:_)
| all isUpper xs = Upper
| isUpper x = Capitalized
| otherwise = Lower
applyShape :: Shape -> String -> String
applyShape _ [] = []
applyShape Lower xs = map toLower xs
applyShape Upper xs = map toUpper xs
applyShape Capitalized (x:xs) = toUpper x : map toLower xs
(#>) :: String -> String -> Parser String
(#>) p x = ciString p >#> x
(>#>) :: Parser String -> String -> Parser String
(>#>) p x = p >>= \y -> return (applyShape (getShape y) x)
(>+>) :: Parser String -> Parser String -> Parser String
(>+>) p p' = (++) <$> p <*> p'
(.|) :: String -> String -> Parser String
(.|) x y = try (ciString x) <|> try (ciString y)
(.|.) :: Parser String -> String -> Parser String
(.|.) p y = p <|> try (ciString y)
data TrRules = TrRules {
wordRules :: [Parser String]
, charRules :: [Parser String]
}
wordParser :: TrRules -> Parser String
wordParser rules =
perWord <|> perChar
where
perWord = msum $ map (\p -> try $ p <* eof) (wordRules rules)
perChar = (concat <$> many1 (msum $ map try (charRules rules))) <* eof
parseWord :: TrRules -> String -> String
parseWord rules x =
case parse (wordParser rules) x x of
Left err -> error $ "parseWord: " ++ show err
Right y -> y
transliter :: TrRules -> String -> String
transliter rules x =
case takeWhile (not . eq) ps of
[] -> x
xs -> snd $ last $ xs
where
ys = iterate (parseWord rules) x
ps = zip ys $ tail ys
eq = uncurry (==)