Safe Haskell | None |
---|---|
Language | Haskell2010 |
- data Sentence = Sent [Token]
- tokens :: Sentence -> [Token]
- applyTags :: Tag t => Sentence -> [t] -> TaggedSentence t
- data ChunkedSentence chunk tag = ChunkedSent [ChunkOr chunk tag]
- data ChunkOr chunk tag
- data Chunk chunk tag = Chunk chunk [ChunkOr chunk tag]
- showChunkedSent :: (ChunkTag c, Tag t) => ChunkedSentence c t -> Text
- data TaggedSentence tag = TaggedSent [POS tag]
- printTS :: Tag t => TaggedSentence t -> Text
- stripTags :: Tag t => TaggedSentence t -> Sentence
- unzipTags :: Tag t => TaggedSentence t -> (Sentence, [t])
- unzipChunks :: (ChunkTag c, Tag t) => ChunkedSentence c t -> (TaggedSentence t, [c])
- combine :: Tag t => [TaggedSentence t] -> [TaggedSentence t] -> [TaggedSentence t]
- combineSentences :: Tag t => TaggedSentence t -> TaggedSentence t -> TaggedSentence t
- pickTag :: Tag t => POS t -> POS t -> POS t
- mkChunk :: (ChunkTag chunk, Tag tag) => chunk -> [ChunkOr chunk tag] -> ChunkOr chunk tag
- mkChink :: (ChunkTag chunk, Tag tag) => tag -> Token -> ChunkOr chunk tag
- data POS tag = POS {}
- showPOStok :: Tag tag => POS tag -> Text
- showPOStag :: Tag tag => POS tag -> Text
- printPOS :: Tag tag => POS tag -> Text
- data Token = Token Text
- showTok :: Token -> Text
- suffix :: Token -> Text
- unTS :: Tag t => TaggedSentence t -> [POS t]
- tsLength :: Tag t => TaggedSentence t -> Int
- tsConcat :: Tag t => [TaggedSentence t] -> TaggedSentence t
- contains :: Tag t => TaggedSentence t -> Text -> Bool
- containsTag :: Tag t => TaggedSentence t -> t -> Bool
- posTagMatches :: Tag t => t -> POS t -> Bool
- posTokMatches :: Tag t => Text -> POS t -> Bool
- tokenMatches :: Text -> Token -> Bool
Documentation
A sentence of tokens without tags. Generated by the tokenizer. (tokenizer :: Text -> Sentence)
applyTags :: Tag t => Sentence -> [t] -> TaggedSentence t Source
data ChunkedSentence chunk tag Source
A chunked sentence has POS tags and chunk tags. Generated by a chunker.
(chunker :: (Chunk chunk, Tag tag) => TaggedSentence tag -> ChunkedSentence chunk tag)
ChunkedSent [ChunkOr chunk tag] |
(Eq chunk, Eq tag) => Eq (ChunkedSentence chunk tag) | |
(Read chunk, Read tag) => Read (ChunkedSentence chunk tag) | |
(Show chunk, Show tag) => Show (ChunkedSentence chunk tag) | |
(ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (ChunkedSentence c t) | |
(Monad m, ChunkTag c, Tag t) => Stream (ChunkedSentence c t) m (ChunkOr c t) |
A data type to represent the portions of a parse tree for Chunks. Note that this part of the parse tree could be a POS tag with no chunk.
A Chunk that strictly contains chunks or POS tags.
showChunkedSent :: (ChunkTag c, Tag t) => ChunkedSentence c t -> Text Source
data TaggedSentence tag Source
A tagged sentence has POS Tags. Generated by a part-of-speech tagger. (tagger :: Tag tag => Sentence -> TaggedSentence tag)
TaggedSent [POS tag] |
Eq tag => Eq (TaggedSentence tag) | |
Read tag => Read (TaggedSentence tag) | |
Show tag => Show (TaggedSentence tag) | |
(Arbitrary t, Tag t) => Arbitrary (TaggedSentence t) | |
(Monad m, Tag t) => Stream (TaggedSentence t) m (POS t) |
printTS :: Tag t => TaggedSentence t -> Text Source
Generate a Text representation of a TaggedSentence in the common tagged format, eg:
"the/at dog/nn jumped/vbd ./."
stripTags :: Tag t => TaggedSentence t -> Sentence Source
Remove the tags from a tagged sentence
unzipTags :: Tag t => TaggedSentence t -> (Sentence, [t]) Source
Extract the tags from a tagged sentence, returning a parallel list of tags along with the underlying Sentence.
unzipChunks :: (ChunkTag c, Tag t) => ChunkedSentence c t -> (TaggedSentence t, [c]) Source
combine :: Tag t => [TaggedSentence t] -> [TaggedSentence t] -> [TaggedSentence t] Source
Combine the results of POS taggers, using the second param to
fill in tagUNK
entries, where possible.
combineSentences :: Tag t => TaggedSentence t -> TaggedSentence t -> TaggedSentence t Source
Merge TaggedSentence
values, preffering the tags in the first TaggedSentence
.
Delegates to pickTag
.
pickTag :: Tag t => POS t -> POS t -> POS t Source
Returns the first param, unless it is tagged tagUNK
.
Throws an error if the text does not match.
mkChunk :: (ChunkTag chunk, Tag tag) => chunk -> [ChunkOr chunk tag] -> ChunkOr chunk tag Source
Helper to create ChunkOr
types.
mkChink :: (ChunkTag chunk, Tag tag) => tag -> Token -> ChunkOr chunk tag Source
Helper to create ChunkOr
types that just hold POS tagged data.
A POS-tagged token.
showPOStok :: Tag tag => POS tag -> Text Source
Show the underlying text token only.
showPOStag :: Tag tag => POS tag -> Text Source
suffix :: Token -> Text Source
Extract the last three characters of a Token
, if the token is
long enough, otherwise returns the full token text.
unTS :: Tag t => TaggedSentence t -> [POS t] Source
Extract the list of POS
tags from a TaggedSentence
tsLength :: Tag t => TaggedSentence t -> Int Source
Calculate the length of a TaggedSentence
(in terms of the
number of tokens).
tsConcat :: Tag t => [TaggedSentence t] -> TaggedSentence t Source
Brutally concatenate two TaggedSentence
s
contains :: Tag t => TaggedSentence t -> Text -> Bool Source
True if the input sentence contains the given text token. Does not do partial or approximate matching, and compares details in a fully case-sensitive manner.
containsTag :: Tag t => TaggedSentence t -> t -> Bool Source
True if the input sentence contains the given POS tag. Does not do partial matching (such as prefix matching)
posTagMatches :: Tag t => t -> POS t -> Bool Source
Compare the POS-tag token with a supplied tag string.
posTokMatches :: Tag t => Text -> POS t -> Bool Source
Compare the POS-tagged token with a text string.
tokenMatches :: Text -> Token -> Bool Source
Compare a token with a text string.