{-# LANGUAGE OverloadedStrings #-} -- | Extraction utilities for various dictionary resources. module NLP.Nerf.Dict ( extractPoliMorf , extractPNEG , extractNELexicon , extractProlexbase , extractIntTriggers , extractExtTriggers , module NLP.Nerf.Dict.Base ) where import Control.Applicative ((<$>), (<*>)) import qualified Data.PoliMorf as Poli import NLP.Nerf.Dict.Base import NLP.Nerf.Dict.PNEG (readPNEG) import NLP.Nerf.Dict.NELexicon (readNELexicon) import NLP.Nerf.Dict.Prolexbase (readProlexbase) import qualified NLP.Nerf.Dict.PNET as PNET -- | Is it a single word entry? atomic :: Entry -> Bool atomic = not . isMultiWord . neOrth -- | Extract NEs dictionary from PNEG. extractPNEG :: FilePath -- ^ Path to PNEG in the LMF format -> IO Dict extractPNEG lmfPath = fromEntries . filter atomic <$> readPNEG lmfPath -- | Extract NEs dictionary from NELexicon. extractNELexicon :: FilePath -- ^ Path to NELexicon -> IO Dict extractNELexicon nePath = fromEntries . filter atomic <$> readNELexicon nePath -- | Extract NEs dictionary from PoliMorf. extractPoliMorf :: FilePath -- ^ Path to PoliMorf -> IO Dict extractPoliMorf poliPath = fromPairs . filter (cond . snd) . map ((,) <$> Poli.form <*> Poli.cat) <$> Poli.readPoliMorf poliPath where cond x = x /= "pospolita" && x /= "" -- | Extract NEs dictionary from Prolexbase. extractProlexbase :: FilePath -- ^ Path to Prolexbase -> IO Dict extractProlexbase proPath = do fromEntries . filter atomic <$> readProlexbase proPath -- | Extract internal triggers from PNET dictionary. extractIntTriggers :: FilePath -- ^ Path to PNET -> IO Dict extractIntTriggers pnetPath = mkTriggers PNET.Internal <$> PNET.readPNET pnetPath -- | Extract external triggers from PNET dictionary. extractExtTriggers :: FilePath -- ^ Path to PNET -> IO Dict extractExtTriggers pnetPath = mkTriggers PNET.External <$> PNET.readPNET pnetPath mkTriggers :: PNET.Typ -> [PNET.Entry] -> Dict mkTriggers typ = fromPairs . filter (not . isMultiWord . fst) . map ((,) <$> PNET.orth <*> PNET.neTyp) . filter (PNET.hasTyp typ)