chatter-0.3.0.0: A library of simple NLP algorithms.

Safe HaskellNone
LanguageHaskell2010

NLP.Types.Tree

Synopsis

Documentation

data Sentence Source

A sentence of tokens without tags. Generated by the tokenizer. (tokenizer :: Text -> Sentence)

Constructors

Sent [Token] 

Instances

data ChunkedSentence chunk tag Source

A chunked sentence has POS tags and chunk tags. Generated by a chunker.

(chunker :: (Chunk chunk, Tag tag) => TaggedSentence tag -> ChunkedSentence chunk tag)

Constructors

ChunkedSent [ChunkOr chunk tag] 

Instances

(Eq chunk, Eq tag) => Eq (ChunkedSentence chunk tag) 
(Read chunk, Read tag) => Read (ChunkedSentence chunk tag) 
(Show chunk, Show tag) => Show (ChunkedSentence chunk tag) 
(ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (ChunkedSentence c t) 

data TaggedSentence tag Source

A tagged sentence has POS Tags. Generated by a part-of-speech tagger. (tagger :: Tag tag => Sentence -> TaggedSentence tag)

Constructors

TaggedSent [POS tag] 

Instances

Eq tag => Eq (TaggedSentence tag) 
Read tag => Read (TaggedSentence tag) 
Show tag => Show (TaggedSentence tag) 
(Arbitrary t, Tag t) => Arbitrary (TaggedSentence t) 
(Monad m, Tag t) => Stream (TaggedSentence t) m (POS t) 

printTS :: Tag t => TaggedSentence t -> Text Source

Generate a Text representation of a TaggedSentence in the common tagged format, eg:

"the/at dog/nn jumped/vbd ./."

stripTags :: Tag t => TaggedSentence t -> Sentence Source

Remove the tags from a tagged sentence

unzipTags :: Tag t => TaggedSentence t -> (Sentence, [t]) Source

Extract the tags from a tagged sentence, returning a parallel list of tags along with the underlying Sentence.

combine :: Tag t => [TaggedSentence t] -> [TaggedSentence t] -> [TaggedSentence t] Source

Combine the results of POS taggers, using the second param to fill in tagUNK entries, where possible.

pickTag :: Tag t => POS t -> POS t -> POS t Source

Returns the first param, unless it is tagged tagUNK. Throws an error if the text does not match.

data ChunkOr chunk tag Source

This type seem redundant, it just exists to support the differences in TaggedSentence and ChunkedSentence.

See the t3 example below to see how verbose this becomes.

Constructors

Chunk_CN (Chunk chunk tag) 
POS_CN (POS tag) 

Instances

(Eq chunk, Eq tag) => Eq (ChunkOr chunk tag) 
(Read chunk, Read tag) => Read (ChunkOr chunk tag) 
(Show chunk, Show tag) => Show (ChunkOr chunk tag) 
(ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (ChunkOr c t) 

mkChunk :: (ChunkTag chunk, Tag tag) => chunk -> [ChunkOr chunk tag] -> ChunkOr chunk tag Source

mkChink :: (ChunkTag chunk, Tag tag) => tag -> Token -> ChunkOr chunk tag Source

data Chunk chunk tag Source

Constructors

Chunk chunk [ChunkOr chunk tag] 

Instances

(Eq chunk, Eq tag) => Eq (Chunk chunk tag) 
(Read chunk, Read tag) => Read (Chunk chunk tag) 
(Show chunk, Show tag) => Show (Chunk chunk tag) 
(ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (Chunk c t) 

data POS tag Source

Constructors

POS tag Token 

Instances

Eq tag => Eq (POS tag) 
Read tag => Read (POS tag) 
Show tag => Show (POS tag) 
(Arbitrary t, Tag t) => Arbitrary (POS t) 
(Monad m, Tag t) => Stream (TaggedSentence t) m (POS t) 

showPOS :: Tag tag => POS tag -> Text Source

Show the underlying text token only.

printPOS :: Tag tag => POS tag -> Text Source

Show the text and tag.

data Token Source

Constructors

Token Text 

Instances

showTok :: Token -> Text Source

suffix :: Token -> Text Source

unTS :: Tag t => TaggedSentence t -> [POS t] Source

contains :: Tag t => TaggedSentence t -> Text -> Bool Source

True if the input sentence contains the given text token. Does not do partial or approximate matching, and compares details in a fully case-sensitive manner.

containsTag :: Tag t => TaggedSentence t -> t -> Bool Source

True if the input sentence contains the given POS tag. Does not do partial matching (such as prefix matching)

posTagMatches :: Tag t => t -> POS t -> Bool Source

Compare the POS-tag token with a supplied tag string.

posTokMatches :: Tag t => Text -> POS t -> Bool Source

Compare the POS-tagged token with a text string.

tokenMatches :: Text -> Token -> Bool Source

Compare a token with a text string.