Safe Haskell | None |
---|
Text.NKJP.Named
Contents
Description
Parsing the NKJP named entity layer.
- data Cert
- data Ptr t
- data Deriv t = Deriv {}
- data Para t = Para {}
- data Sent t = Sent {}
- data NE t = NE {}
- parseNamed :: Text -> [Para Text]
- readNamed :: FilePath -> IO [Para Text]
- readCorpus :: FilePath -> IO [(FilePath, Maybe [Para Text])]
- readTrees :: FilePath -> IO [Forest (Either (NE Text) (Seg Text))]
- mkForest :: Ord t => [Seg t] -> [NE t] -> Forest (Either (NE t) (Seg t))
Data types
A derivation structure.
A paragraph.
A sentence.
A segment element in a file.
Constructors
NE | |
Parsing
parseNamed :: Text -> [Para Text]Source
Parse textual contents of the ann_named.xml file.
readCorpus :: FilePath -> IO [(FilePath, Maybe [Para Text])]Source
Parse all ann_named.xml files from the NCP .tar.gz file.
readTrees :: FilePath -> IO [Forest (Either (NE Text) (Seg Text))]Source
Parse the NCP .tar.gz corpus, extract all NEs and translate them
to the tree form using the mkForest
function.