chatter-0.9.1.0: A library of simple NLP algorithms.

Safe HaskellNone
LanguageHaskell2010

NLP.Types.Tree

Synopsis

Documentation

data Sentence Source #

A sentence of tokens without tags. Generated by the tokenizer. (tokenizer :: Text -> Sentence)

Constructors

Sent [Token] 

tokens :: Sentence -> [Token] Source #

Extract the token list from a Sentence

applyTags :: Tag t => Sentence -> [t] -> TaggedSentence t Source #

Apply a parallel list of Tags to a Sentence.

data ChunkedSentence chunk tag Source #

A chunked sentence has POS tags and chunk tags. Generated by a chunker.

(chunker :: (Chunk chunk, Tag tag) => TaggedSentence tag -> ChunkedSentence chunk tag)

Constructors

ChunkedSent [ChunkOr chunk tag] 

Instances

(Eq chunk, Eq tag) => Eq (ChunkedSentence chunk tag) Source # 

Methods

(==) :: ChunkedSentence chunk tag -> ChunkedSentence chunk tag -> Bool #

(/=) :: ChunkedSentence chunk tag -> ChunkedSentence chunk tag -> Bool #

(Read chunk, Read tag) => Read (ChunkedSentence chunk tag) Source # 
(Show chunk, Show tag) => Show (ChunkedSentence chunk tag) Source # 

Methods

showsPrec :: Int -> ChunkedSentence chunk tag -> ShowS #

show :: ChunkedSentence chunk tag -> String #

showList :: [ChunkedSentence chunk tag] -> ShowS #

(ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (ChunkedSentence c t) Source # 

data ChunkOr chunk tag Source #

A data type to represent the portions of a parse tree for Chunks. Note that this part of the parse tree could be a POS tag with no chunk.

Constructors

Chunk_CN (Chunk chunk tag) 
POS_CN (POS tag) 

Instances

(Eq tag, Eq chunk) => Eq (ChunkOr chunk tag) Source # 

Methods

(==) :: ChunkOr chunk tag -> ChunkOr chunk tag -> Bool #

(/=) :: ChunkOr chunk tag -> ChunkOr chunk tag -> Bool #

(Read tag, Read chunk) => Read (ChunkOr chunk tag) Source # 

Methods

readsPrec :: Int -> ReadS (ChunkOr chunk tag) #

readList :: ReadS [ChunkOr chunk tag] #

readPrec :: ReadPrec (ChunkOr chunk tag) #

readListPrec :: ReadPrec [ChunkOr chunk tag] #

(Show tag, Show chunk) => Show (ChunkOr chunk tag) Source # 

Methods

showsPrec :: Int -> ChunkOr chunk tag -> ShowS #

show :: ChunkOr chunk tag -> String #

showList :: [ChunkOr chunk tag] -> ShowS #

(ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (ChunkOr c t) Source # 

Methods

arbitrary :: Gen (ChunkOr c t) #

shrink :: ChunkOr c t -> [ChunkOr c t] #

data Chunk chunk tag Source #

A Chunk that strictly contains chunks or POS tags.

Constructors

Chunk chunk [ChunkOr chunk tag] 

Instances

(Eq tag, Eq chunk) => Eq (Chunk chunk tag) Source # 

Methods

(==) :: Chunk chunk tag -> Chunk chunk tag -> Bool #

(/=) :: Chunk chunk tag -> Chunk chunk tag -> Bool #

(Read tag, Read chunk) => Read (Chunk chunk tag) Source # 

Methods

readsPrec :: Int -> ReadS (Chunk chunk tag) #

readList :: ReadS [Chunk chunk tag] #

readPrec :: ReadPrec (Chunk chunk tag) #

readListPrec :: ReadPrec [Chunk chunk tag] #

(Show tag, Show chunk) => Show (Chunk chunk tag) Source # 

Methods

showsPrec :: Int -> Chunk chunk tag -> ShowS #

show :: Chunk chunk tag -> String #

showList :: [Chunk chunk tag] -> ShowS #

(ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (Chunk c t) Source # 

Methods

arbitrary :: Gen (Chunk c t) #

shrink :: Chunk c t -> [Chunk c t] #

data TaggedSentence tag Source #

A tagged sentence has POS Tags. Generated by a part-of-speech tagger. (tagger :: Tag tag => Sentence -> TaggedSentence tag)

Constructors

TaggedSent [POS tag] 

printTS :: Tag t => TaggedSentence t -> Text Source #

Generate a Text representation of a TaggedSentence in the common tagged format, eg:

"the/at dog/nn jumped/vbd ./."

stripTags :: Tag t => TaggedSentence t -> Sentence Source #

Remove the tags from a tagged sentence

unzipTags :: Tag t => TaggedSentence t -> (Sentence, [t]) Source #

Extract the tags from a tagged sentence, returning a parallel list of tags along with the underlying Sentence.

combine :: Tag t => [TaggedSentence t] -> [TaggedSentence t] -> [TaggedSentence t] Source #

Combine the results of POS taggers, using the second param to fill in tagUNK entries, where possible.

combineSentences :: Tag t => TaggedSentence t -> TaggedSentence t -> TaggedSentence t Source #

Merge TaggedSentence values, preffering the tags in the first TaggedSentence. Delegates to pickTag.

pickTag :: Tag t => POS t -> POS t -> POS t Source #

Returns the first param, unless it is tagged tagUNK. Throws an error if the text does not match.

mkChunk :: (ChunkTag chunk, Tag tag) => chunk -> [ChunkOr chunk tag] -> ChunkOr chunk tag Source #

Helper to create ChunkOr types.

mkChink :: (ChunkTag chunk, Tag tag) => tag -> Token -> ChunkOr chunk tag Source #

Helper to create ChunkOr types that just hold POS tagged data.

data POS tag Source #

A POS-tagged token.

Constructors

POS 

Fields

Instances

Eq tag => Eq (POS tag) Source # 

Methods

(==) :: POS tag -> POS tag -> Bool #

(/=) :: POS tag -> POS tag -> Bool #

Read tag => Read (POS tag) Source # 

Methods

readsPrec :: Int -> ReadS (POS tag) #

readList :: ReadS [POS tag] #

readPrec :: ReadPrec (POS tag) #

readListPrec :: ReadPrec [POS tag] #

Show tag => Show (POS tag) Source # 

Methods

showsPrec :: Int -> POS tag -> ShowS #

show :: POS tag -> String #

showList :: [POS tag] -> ShowS #

(Arbitrary t, Tag t) => Arbitrary (POS t) Source # 

Methods

arbitrary :: Gen (POS t) #

shrink :: POS t -> [POS t] #

showPOStok :: Tag tag => POS tag -> Text Source #

Show the underlying text token only.

showPOStag :: Tag tag => POS tag -> Text Source #

printPOS :: Tag tag => POS tag -> Text Source #

Show the text and tag.

data Token Source #

Raw tokenized text.

Token has a IsString instance to simplify use.

Constructors

Token Text 

showTok :: Token -> Text Source #

Extract the text of a Token

suffix :: Token -> Text Source #

Extract the last three characters of a Token, if the token is long enough, otherwise returns the full token text.

unTS :: Tag t => TaggedSentence t -> [POS t] Source #

Extract the list of POS tags from a TaggedSentence

tsLength :: Tag t => TaggedSentence t -> Int Source #

Calculate the length of a TaggedSentence (in terms of the number of tokens).

tsConcat :: Tag t => [TaggedSentence t] -> TaggedSentence t Source #

Brutally concatenate two TaggedSentences

contains :: Tag t => TaggedSentence t -> Text -> Bool Source #

True if the input sentence contains the given text token. Does not do partial or approximate matching, and compares details in a fully case-sensitive manner.

containsTag :: Tag t => TaggedSentence t -> t -> Bool Source #

True if the input sentence contains the given POS tag. Does not do partial matching (such as prefix matching)

posTagMatches :: Tag t => t -> POS t -> Bool Source #

Compare the POS-tag token with a supplied tag string.

posTokMatches :: Tag t => Text -> POS t -> Bool Source #

Compare the POS-tagged token with a text string.

tokenMatches :: Text -> Token -> Bool Source #

Compare a token with a text string.