{-# LANGUAGE OverloadedStrings #-}
module NLP.Nerf.Dict.PNET
( parsePNET
, readPNET
, Typ (..)
, hasTyp
, Entry (..)
) where
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Text.Lazy.IO as L
data Typ
= Internal
| External
deriving (Show, Eq, Ord)
readTyp :: T.Text -> Typ
readTyp "int" = Internal
readTyp "ext" = External
readTyp x = error $ "readTyp: typ " ++ T.unpack x ++ " unknown"
data Entry = Entry
{ orth :: T.Text
, base :: T.Text
, tag :: T.Text
, typ :: Typ
, neTyp :: T.Text
, example :: T.Text }
hasTyp :: Typ -> Entry -> Bool
hasTyp x = (==x) . typ
parseLine :: L.Text -> Entry
parseLine line = case map L.toStrict (L.split (=='\t') line) of
[_orth, _base, _tag, _typ, _neTyp, _example] ->
Entry _orth _base _tag (readTyp _typ) _neTyp _example
_ -> error $ "parseLine: invalid row \"" ++ L.unpack line ++ "\""
parsePNET :: L.Text -> [Entry]
parsePNET = map parseLine . L.lines
readPNET :: FilePath -> IO [Entry]
readPNET = fmap parsePNET . L.readFile