module NLP.Nerf.Dict.PNEG
( parsePNEG
, readPNEG
) where
import Text.XML.PolySoup
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Text.Lazy.IO as L
import NLP.Nerf.Dict.Base
lmfP :: XmlParser L.Text [Entry]
lmfP = true ##> lexEntryP
lexEntryP :: XmlParser L.Text [Entry]
lexEntryP = tag "LexicalEntry" `joinR` do
many_ $ cut $ tag "feat"
_words <- many wordP
sense <- senseP
return [Entry x sense | x <- _words]
wordP :: XmlParser L.Text Form
wordP = head <$> (tag "Lemma" <|> tag "WordForm" /> featP "writtenForm")
senseP :: XmlParser L.Text NeType
senseP = head <$> (tag "Sense" //> featP "externalReference" <|> featP "label")
featP :: L.Text -> XmlParser L.Text T.Text
featP x = L.toStrict <$> cut (tag "feat" *> hasAttr "att" x *> getAttr "val")
parsePNEG :: L.Text -> [Entry]
parsePNEG = parseXml lmfP
readPNEG :: FilePath -> IO [Entry]
readPNEG = fmap parsePNEG . L.readFile