Copyright | © 2018 bruno cuconato |
---|---|
License | LPGL-3 |
Maintainer | bruno cuconato <bcclaro+hackage@gmail.com> |
Stability | experimental |
Portability | non-portable |
Safe Haskell | Safe-Inferred |
Language | Haskell2010 |
Synopsis
- type Parser = Parsec Void String
- parseConlluWith :: Parser Sent -> FilePath -> String -> Either String Doc
- parseConllu :: FilePath -> String -> Either String Doc
- data ParserC = ParserC (Parser Comment) (Parser ID) (Parser FORM) (Parser LEMMA) (Parser UPOS) (Parser XPOS) (Parser FEATS) (Parser DEPREL) (Parser DEPS) (Parser MISC)
- parserC :: ParserC -> Parser Sent
- rawSents :: Parser (RawData String Void)
- sentence :: Parser Sent
- comment :: Parser Comment
- word :: Parser (CW AW)
- emptyField :: Parser (Maybe a)
- idW :: Parser ID
- form :: Parser FORM
- lemma :: Parser LEMMA
- upos :: Parser UPOS
- xpos :: Parser XPOS
- feats :: Parser FEATS
- deprel :: Parser DEPREL
- deps :: Parser DEPS
- misc :: Parser MISC
- commentPair :: Parser Comment
- listPair :: String -> Parser a -> Parser b -> Parser [(a, b)]
- stringNot :: String -> Parser String
- stringWOSpaces :: Parser String
- stringWSpaces :: Parser String
- keyValue :: String -> Parser a -> Parser b -> Parser (a, b)
- maybeEmpty :: Parser a -> Parser (Maybe a)
- orEmpty :: Parser String -> Parser (Maybe String)
- listP :: Parser [a] -> Parser [a]
Documentation
parsers
:: Parser Sent | the sentence parser to be used. |
-> FilePath | the source whose stream is being supplied in the next argument (may be "" for no file) |
-> String | stream to be parsed |
-> Either String Doc |
parse a CoNLL-U document using a customized parser.
parseConllu :: FilePath -> String -> Either String Doc Source #
parse a CoNLL-U document using the default parser.
customizable parsers
parserC :: ParserC -> Parser Sent Source #
defines a custom parser of sentences. if you only need to customize one field parser (e.g., to parse special comments or a special MISC field), you can do:
parserC ParserC{_commentP = myCommentsParser }
default parsers
CoNLL-U field parsers
emptyField :: Parser (Maybe a) Source #
parse an empty field.
utility parsers
commentPair :: Parser Comment Source #
parse a comment pair.
stringWOSpaces :: Parser String Source #
parse a string until a space, a tab, or a newline.
stringWSpaces :: Parser String Source #
parse a string until a tab or a newline.
parser combinators
maybeEmpty :: Parser a -> Parser (Maybe a) Source #
a parser combinator for parsers that won't parse "_" (e.g., as
lemma
would).
two combinators are needed for parsing the empty field (without lookahead). this has to do with the fact that if we do
form <|> emptyField
we would parse "_" as a non-empty FORM field. but if we did
emptyField <|> form
we would parse "_" in "_something" and then the parser would choke expecting a tab.