-- | Handling Prolexbase dictionaries, both with the -- same storage format. module NLP.Nerf.Dict.Prolexbase ( parseProlexbase , readProlexbase ) where import qualified Data.Text.Lazy as L import qualified Data.Text.Lazy.IO as L import NLP.Nerf.Dict.Base -- | Parse dictionary into a list of entries. parseProlexbase :: L.Text -> [Entry] parseProlexbase = map parseLine . L.lines parseLine :: L.Text -> Entry parseLine row = case map L.toStrict (L.split (=='\t') row) of [_form, _base, _tag, _cat] -> Entry _form _cat _ -> error $ "parseLine: invalid row \"" ++ L.unpack row ++ "\"" -- | Read the dictionary from the file. readProlexbase :: FilePath -> IO [Entry] readProlexbase = fmap parseProlexbase . L.readFile