module NLP.DictParser.Internal where
import Control.Applicative hiding (many, (<|>))
import Data.Either
import Data.List.Split (splitOn)
import NLP.DictParser.Types
import Text.ParserCombinators.Parsec
separator = "_____\n\n"
manyTill1 p end = (:) <$> p <*> p `manyTill` end
line1 = (:) <$> noneOf "\n" <*> line
line = manyTill anyChar (try newline)
acceptedHeaders :: [String]
acceptedHeaders = ["short",
"info",
"url"]
headerP = do
string "00-database-"
header <- choice $ map string acceptedHeaders
newline
descriptions <- many dashLine
newline
return (header, unlines descriptions)
dashLine = string "-" *> spaces *> line
equalLine = string "=" *> spaces *> line
parseString :: String -> (Dict String, [String])
parseString input = (Dict (rights $ map (tryParse headerP) headersText) valid,
map show invalid)
where (headersText,body) = span (isRight . tryParse headerP) . map strip $ splitOn separator input
(invalid, valid) = partitionEithers $ map (tryParse defP) body
isHeader x = True
isRight (Right _) = True
isRight _ = False
tryParse p body = case parse p "(none)" body of
Left _ -> Left body
Right x -> Right x
strip = lstrip . rstrip
lstrip = dropWhile (`elem` " \t")
rstrip = reverse . lstrip . reverse
defP :: GenParser Char st (Def String)
defP = Def <$> line1 <*> (many withPOS) <?> "Def"
withPOS :: GenParser Char st (String, [(Translation, [Example])])
withPOS = (,) <$> pos <*> many translation
textline = (:) <$> noneOf "=*-\n_" <*> line
pos = do
string "*"
spaces
line
translation :: GenParser Char st (Translation, [Example])
translation = (,) <$> dashLine <*> many example <?> "example"
example :: GenParser Char st Example
example = do
string "="
spaces
ex <- line
return $ case span (/='+') ex of
(a,[]) -> Untranslated a
(a,(_:b)) -> Translated a b