-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | A library of simple NLP algorithms. -- @package chatter @version 0.5.2.0 module Data.DefaultMap -- | Defaulting Map; a Map that returns a default value when queried for a -- key that does not exist. data DefaultMap k v DefMap :: v -> Map k v -> DefaultMap k v defDefault :: DefaultMap k v -> v defMap :: DefaultMap k v -> Map k v -- | Create an empty DefaultMap empty :: v -> DefaultMap k v -- | Query the map for a value. Returns the default if the key is not -- found. lookup :: Ord k => k -> DefaultMap k v -> v -- | Create a DefaultMap from a default value and a list. fromList :: Ord k => v -> [(k, v)] -> DefaultMap k v -- | Access the keys as a list. keys :: DefaultMap k a -> [k] -- | Fold over the values in the map. -- -- Note that this *does* not fold over the default value -- this fold -- behaves in the same way as a standard foldl foldl :: (a -> b -> a) -> a -> DefaultMap k b -> a instance (Ord k, Read k, Read v) => Read (DefaultMap k v) instance (Show k, Show v) => Show (DefaultMap k v) instance (Eq k, Eq v) => Eq (DefaultMap k v) instance (Ord k, Ord v) => Ord (DefaultMap k v) instance Generic (DefaultMap k v) instance Datatype D1DefaultMap instance Constructor C1_0DefaultMap instance Selector S1_0_0DefaultMap instance Selector S1_0_1DefaultMap instance (Arbitrary k, Arbitrary v, Ord k) => Arbitrary (DefaultMap k v) instance (NFData k, NFData v, Ord k) => NFData (DefaultMap k v) instance (Ord k, Serialize k, Serialize v) => Serialize (DefaultMap k v) -- | Utilities for reading mailman-style email archives. module NLP.Corpora.Email -- | Path to the directory containing all the PLUG archives. plugDataPath :: FilePath plugArchiveText :: IO [Text] plugArchiveTokens :: IO [[Text]] fullPlugArchive :: IO [Message] readF :: FilePath -> IO Text module NLP.Types.General -- | Just a handy alias for Text type Error = Text toEitherErr :: Either String a -> Either Error a -- | Boolean type to indicate case sensitivity for textual comparisons. data CaseSensitive Sensitive :: CaseSensitive Insensitive :: CaseSensitive instance Read CaseSensitive instance Show CaseSensitive instance Generic CaseSensitive instance Datatype D1CaseSensitive instance Constructor C1_0CaseSensitive instance Constructor C1_1CaseSensitive instance Arbitrary CaseSensitive instance Serialize CaseSensitive module NLP.Types.Tags -- | The class of named entity sets. This typeclass can be defined entirely -- in terms of the required class constraints. class (Ord a, Eq a, Read a, Show a, Generic a, Serialize a) => NERTag a where fromNERTag = pack . show parseNERTag txt = toEitherErr $ readEither $ unpack txt fromNERTag :: NERTag a => a -> Text parseNERTag :: NERTag a => Text -> Either Error a -- | The class of things that can be regarded as chunks; Chunk -- tags are much like POS tags, but should not be confused. Generally, -- chunks distinguish between different phrasal categories (e.g.; Noun -- Phrases, Verb Phrases, Prepositional Phrases, etc..) class (Ord a, Eq a, Read a, Show a, Generic a, Serialize a) => ChunkTag a fromChunk :: ChunkTag a => a -> Text parseChunk :: ChunkTag a => Text -> Either Error a notChunk :: ChunkTag a => a -- | The class of POS Tags. -- -- We use a typeclass here because POS tags just need a few things in -- excess of equality (they also need to be serializable and human -- readable). Passing around all the constraints everywhere becomes a -- hassle, and it's handy to have a uniform interface to the diferent -- kinds of tag types. -- -- This typeclass also allows for corpus-specific tags to be -- distinguished; They have different semantics, so they should not be -- merged. That said, if you wish to create a unifying POS Tag set, and -- mappings into that set, you can use the type system to ensure that -- that is done correctly. -- -- This may get renamed to POSTag at some later date. class (Ord a, Eq a, Read a, Show a, Generic a, Serialize a) => Tag a fromTag :: Tag a => a -> Text parseTag :: Tag a => Text -> a tagUNK :: Tag a => a tagTerm :: Tag a => a -> Text startTag :: Tag a => a endTag :: Tag a => a isDt :: Tag a => a -> Bool -- | A fall-back ChunkTag instance, analogous to RawTag newtype RawChunk RawChunk :: Text -> RawChunk -- | A fallback POS tag instance. newtype RawTag RawTag :: Text -> RawTag -- | Tag instance for unknown tagsets. instance Ord RawChunk instance Eq RawChunk instance Read RawChunk instance Show RawChunk instance Generic RawChunk instance Ord RawTag instance Eq RawTag instance Read RawTag instance Show RawTag instance Generic RawTag instance Datatype D1RawChunk instance Constructor C1_0RawChunk instance Datatype D1RawTag instance Constructor C1_0RawTag instance Serialize Text instance Arbitrary RawTag instance Tag RawTag instance Serialize RawTag instance ChunkTag RawChunk instance Serialize RawChunk module NLP.Types.Tree -- | A sentence of tokens without tags. Generated by the tokenizer. -- (tokenizer :: Text -> Sentence) data Sentence Sent :: [Token] -> Sentence -- | Extract the token list from a Sentence tokens :: Sentence -> [Token] -- | Apply a parallel list of Tags to a Sentence. applyTags :: Tag t => Sentence -> [t] -> TaggedSentence t -- | A chunked sentence has POS tags and chunk tags. Generated by a -- chunker. -- -- (chunker :: (Chunk chunk, Tag tag) => TaggedSentence tag -> -- ChunkedSentence chunk tag) data ChunkedSentence chunk tag ChunkedSent :: [ChunkOr chunk tag] -> ChunkedSentence chunk tag -- | A data type to represent the portions of a parse tree for Chunks. Note -- that this part of the parse tree could be a POS tag with no chunk. data ChunkOr chunk tag Chunk_CN :: (Chunk chunk tag) -> ChunkOr chunk tag POS_CN :: (POS tag) -> ChunkOr chunk tag -- | A Chunk that strictly contains chunks or POS tags. data Chunk chunk tag Chunk :: chunk -> [ChunkOr chunk tag] -> Chunk chunk tag showChunkedSent :: (ChunkTag c, Tag t) => ChunkedSentence c t -> Text -- | A tagged sentence has POS Tags. Generated by a part-of-speech tagger. -- (tagger :: Tag tag => Sentence -> TaggedSentence tag) data TaggedSentence tag TaggedSent :: [POS tag] -> TaggedSentence tag -- | Generate a Text representation of a TaggedSentence in the common -- tagged format, eg: -- --
--   "the/at dog/nn jumped/vbd ./."
--   
printTS :: Tag t => TaggedSentence t -> Text -- | Remove the tags from a tagged sentence stripTags :: Tag t => TaggedSentence t -> Sentence -- | Extract the tags from a tagged sentence, returning a parallel list of -- tags along with the underlying Sentence. unzipTags :: Tag t => TaggedSentence t -> (Sentence, [t]) unzipChunks :: (ChunkTag c, Tag t) => ChunkedSentence c t -> (TaggedSentence t, [c]) -- | Combine the results of POS taggers, using the second param to fill in -- tagUNK entries, where possible. combine :: Tag t => [TaggedSentence t] -> [TaggedSentence t] -> [TaggedSentence t] -- | Merge TaggedSentence values, preffering the tags in the first -- TaggedSentence. Delegates to pickTag. combineSentences :: Tag t => TaggedSentence t -> TaggedSentence t -> TaggedSentence t -- | Returns the first param, unless it is tagged tagUNK. Throws an -- error if the text does not match. pickTag :: Tag t => POS t -> POS t -> POS t -- | Helper to create ChunkOr types. mkChunk :: (ChunkTag chunk, Tag tag) => chunk -> [ChunkOr chunk tag] -> ChunkOr chunk tag -- | Helper to create ChunkOr types that just hold POS tagged data. mkChink :: (ChunkTag chunk, Tag tag) => tag -> Token -> ChunkOr chunk tag -- | A POS-tagged token. data POS tag POS :: tag -> Token -> POS tag posTag :: POS tag -> tag posToken :: POS tag -> Token -- | Show the underlying text token only. showPOStok :: Tag tag => POS tag -> Text showPOStag :: Tag tag => POS tag -> Text -- | Show the text and tag. printPOS :: Tag tag => POS tag -> Text -- | Raw tokenized text. -- -- Token has a IsString instance to simplify use. data Token Token :: Text -> Token -- | Extract the text of a Token showTok :: Token -> Text -- | Extract the last three characters of a Token, if the token is -- long enough, otherwise returns the full token text. suffix :: Token -> Text -- | Extract the list of POS tags from a TaggedSentence unTS :: Tag t => TaggedSentence t -> [POS t] -- | Calculate the length of a TaggedSentence (in terms of the -- number of tokens). tsLength :: Tag t => TaggedSentence t -> Int -- | Brutally concatenate two TaggedSentences tsConcat :: Tag t => [TaggedSentence t] -> TaggedSentence t -- | True if the input sentence contains the given text token. Does not do -- partial or approximate matching, and compares details in a fully -- case-sensitive manner. contains :: Tag t => TaggedSentence t -> Text -> Bool -- | True if the input sentence contains the given POS tag. Does not do -- partial matching (such as prefix matching) containsTag :: Tag t => TaggedSentence t -> t -> Bool -- | Compare the POS-tag token with a supplied tag string. posTagMatches :: Tag t => t -> POS t -> Bool -- | Compare the POS-tagged token with a text string. posTokMatches :: Tag t => Text -> POS t -> Bool -- | Compare a token with a text string. tokenMatches :: Text -> Token -> Bool instance Read Token instance Show Token instance Eq Token instance Read tag => Read (POS tag) instance Show tag => Show (POS tag) instance Eq tag => Eq (POS tag) instance Read tag => Read (TaggedSentence tag) instance Show tag => Show (TaggedSentence tag) instance Eq tag => Eq (TaggedSentence tag) instance (Read chunk, Read tag) => Read (ChunkOr chunk tag) instance (Show chunk, Show tag) => Show (ChunkOr chunk tag) instance (Eq chunk, Eq tag) => Eq (ChunkOr chunk tag) instance (Read chunk, Read tag) => Read (Chunk chunk tag) instance (Show chunk, Show tag) => Show (Chunk chunk tag) instance (Eq chunk, Eq tag) => Eq (Chunk chunk tag) instance (Read chunk, Read tag) => Read (ChunkedSentence chunk tag) instance (Show chunk, Show tag) => Show (ChunkedSentence chunk tag) instance (Eq chunk, Eq tag) => Eq (ChunkedSentence chunk tag) instance Read Sentence instance Show Sentence instance Eq Sentence instance IsString Token instance Arbitrary Token instance (Arbitrary t, Tag t) => Arbitrary (POS t) instance (ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (Chunk c t) instance (ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (ChunkOr c t) instance (Arbitrary t, Tag t) => Arbitrary (TaggedSentence t) instance (ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (ChunkedSentence c t) instance Arbitrary Sentence module NLP.Types.IOB -- | Data type to indicate IOB tags for chunking data IOBChunk chunk tag -- | Beging marker. BChunk :: (POS tag) -> chunk -> IOBChunk chunk tag -- | In chunk tag IChunk :: (POS tag) -> chunk -> IOBChunk chunk tag -- | Not in a chunk. OChunk :: (POS tag) -> IOBChunk chunk tag getPOS :: (ChunkTag c, Tag t) => IOBChunk c t -> POS t toTaggedSentence :: (ChunkTag c, Tag t) => [IOBChunk c t] -> TaggedSentence t -- | Parse an IOB-chunk encoded line of text. -- -- Assumes that the line has three space-delimeted entries, in the -- format: > token POSTag IOBChunk For example: > > parseIOBLine -- "We PRP B-NP" :: IOBChunk B.Chunk B.Tag > BChunk (POS B.PRP (Token -- We)) B.C_NP parseIOBLine :: (ChunkTag chunk, Tag tag) => Text -> Either Error (IOBChunk chunk tag) iobBuilder :: (ChunkTag c, Tag t) => Text -> (POS t -> Either Error (IOBChunk c t)) -- | Turn an IOB result into a tree. toChunkTree :: (ChunkTag c, Tag t) => [IOBChunk c t] -> ChunkedSentence c t -- | Parse an IOB-encoded corpus. parseIOB :: (ChunkTag chunk, Tag tag) => Text -> Either Error [[IOBChunk chunk tag]] parseSentence :: (ChunkTag chunk, Tag tag) => [Text] -> Either Error [IOBChunk chunk tag] -- | Just split a body of text into lines, and then into "paragraphs". Each -- resulting sub list is separated by empty lines in the original text. -- -- e.g.; > > getSentences "Henjumpedn.nnShenjumpedn." > -- [[He, "jumped", "."], [She,"jumped", "."]] getSentences :: Text -> [[Text]] instance (Read chunk, Read tag) => Read (IOBChunk chunk tag) instance (Show chunk, Show tag) => Show (IOBChunk chunk tag) instance (Eq chunk, Eq tag) => Eq (IOBChunk chunk tag) instance (ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (IOBChunk c t) module NLP.Tokenize.Chatter runTokenizer :: Tokenizer -> (Text -> Sentence) tokenize :: Text -> Sentence -- | Data types representing the POS tags and Chunk tags derived from the -- Conll2000 training corpus. module NLP.Corpora.Conll -- | Named entity categories defined for the Conll 2003 task. data NERTag PER :: NERTag ORG :: NERTag LOC :: NERTag MISC :: NERTag -- | Phrase chunk tags defined for the Conll task. data Chunk ADJP :: Chunk ADVP :: Chunk CONJP :: Chunk INTJ :: Chunk LST :: Chunk -- | Noun Phrase. NP :: Chunk -- | Prepositional Phrase. PP :: Chunk PRT :: Chunk SBAR :: Chunk UCP :: Chunk -- | Verb Phrase. VP :: Chunk -- | "out"; not a chunk. O :: Chunk readTag :: Text -> Either Error Tag -- | Order matters here: The patterns are replaced in reverse order when -- generating tags, and in top-to-bottom when generating tags. tagTxtPatterns :: [(Text, Text)] reversePatterns :: [(Text, Text)] showTag :: Tag -> Text replaceAll :: [(Text, Text)] -> (Text -> Text) -- | These tags may actually be the Penn Treebank tags. But I have not -- (yet?) seen the punctuation tags added to the Penn set. -- -- This particular list was complied from the union of: -- -- data Tag -- | START tag, used in training. START :: Tag -- | END tag, used in training. END :: Tag -- | # Hash :: Tag -- | $ Dollar :: Tag -- | '' CloseDQuote :: Tag -- | `` OpenDQuote :: Tag -- | ( Op_Paren :: Tag -- | ) Cl_Paren :: Tag -- | , Comma :: Tag -- | . Sentence Terminator Term :: Tag -- | : Colon :: Tag -- | Coordinating conjunction CC :: Tag -- | Cardinal number CD :: Tag -- | Determiner DT :: Tag -- | Existential there EX :: Tag -- | Foreign word FW :: Tag -- | Preposition or subordinating conjunction IN :: Tag -- | Adjective JJ :: Tag -- | Adjective, comparative JJR :: Tag -- | Adjective, superlative JJS :: Tag -- | List item marker LS :: Tag -- | Modal MD :: Tag -- | Noun, singular or mass NN :: Tag -- | Noun, plural NNS :: Tag -- | Proper noun, singular NNP :: Tag -- | Proper noun, plural NNPS :: Tag -- | Predeterminer PDT :: Tag -- | Possessive ending POS :: Tag -- | Personal pronoun PRP :: Tag -- | Possessive pronoun PRPdollar :: Tag -- | Adverb RB :: Tag -- | Adverb, comparative RBR :: Tag -- | Adverb, superlative RBS :: Tag -- | Particle RP :: Tag -- | Symbol SYM :: Tag -- | to TO :: Tag -- | Interjection UH :: Tag -- | Verb, base form VB :: Tag -- | Verb, past tense VBD :: Tag -- | Verb, gerund or present participle VBG :: Tag -- | Verb, past participle VBN :: Tag -- | Verb, non-3rd person singular present VBP :: Tag -- | Verb, 3rd person singular present VBZ :: Tag -- | Wh-determiner WDT :: Tag -- | Wh-pronoun WP :: Tag -- | Possessive wh-pronoun WPdollar :: Tag -- | Wh-adverb WRB :: Tag Unk :: Tag instance Read NERTag instance Show NERTag instance Ord NERTag instance Eq NERTag instance Generic NERTag instance Enum NERTag instance Bounded NERTag instance Read Chunk instance Show Chunk instance Ord Chunk instance Eq Chunk instance Generic Chunk instance Enum Chunk instance Bounded Chunk instance Read Tag instance Show Tag instance Ord Tag instance Eq Tag instance Generic Tag instance Enum Tag instance Bounded Tag instance Datatype D1NERTag instance Constructor C1_0NERTag instance Constructor C1_1NERTag instance Constructor C1_2NERTag instance Constructor C1_3NERTag instance Datatype D1Chunk instance Constructor C1_0Chunk instance Constructor C1_1Chunk instance Constructor C1_2Chunk instance Constructor C1_3Chunk instance Constructor C1_4Chunk instance Constructor C1_5Chunk instance Constructor C1_6Chunk instance Constructor C1_7Chunk instance Constructor C1_8Chunk instance Constructor C1_9Chunk instance Constructor C1_10Chunk instance Constructor C1_11Chunk instance Datatype D1Tag instance Constructor C1_0Tag instance Constructor C1_1Tag instance Constructor C1_2Tag instance Constructor C1_3Tag instance Constructor C1_4Tag instance Constructor C1_5Tag instance Constructor C1_6Tag instance Constructor C1_7Tag instance Constructor C1_8Tag instance Constructor C1_9Tag instance Constructor C1_10Tag instance Constructor C1_11Tag instance Constructor C1_12Tag instance Constructor C1_13Tag instance Constructor C1_14Tag instance Constructor C1_15Tag instance Constructor C1_16Tag instance Constructor C1_17Tag instance Constructor C1_18Tag instance Constructor C1_19Tag instance Constructor C1_20Tag instance Constructor C1_21Tag instance Constructor C1_22Tag instance Constructor C1_23Tag instance Constructor C1_24Tag instance Constructor C1_25Tag instance Constructor C1_26Tag instance Constructor C1_27Tag instance Constructor C1_28Tag instance Constructor C1_29Tag instance Constructor C1_30Tag instance Constructor C1_31Tag instance Constructor C1_32Tag instance Constructor C1_33Tag instance Constructor C1_34Tag instance Constructor C1_35Tag instance Constructor C1_36Tag instance Constructor C1_37Tag instance Constructor C1_38Tag instance Constructor C1_39Tag instance Constructor C1_40Tag instance Constructor C1_41Tag instance Constructor C1_42Tag instance Constructor C1_43Tag instance Constructor C1_44Tag instance Constructor C1_45Tag instance Constructor C1_46Tag instance Constructor C1_47Tag instance ChunkTag Chunk instance Serialize Tag instance Arbitrary Tag instance Tag Tag instance Serialize Chunk instance Arbitrary Chunk instance NERTag NERTag instance Serialize NERTag instance Arbitrary NERTag -- | The internal implementation of critical types in terms of the Brown -- corpus. module NLP.Corpora.Brown data Tag -- | START tag, used in training. START :: Tag -- | END tag, used in training. END :: Tag -- | ( Op_Paren :: Tag -- | ) Cl_Paren :: Tag -- | Negator :: Tag -- | , Comma :: Tag -- | Dash :: Tag -- | . Sentence Terminator Term :: Tag -- | : Colon :: Tag -- | determiner/pronoun, pre-qualifier e.g.; quite such rather ABL :: Tag -- | determiner/pronoun, pre-quantifier e.g.; all half many nary ABN :: Tag -- | determiner/pronoun, double conjunction or pre-quantifier both ABX :: Tag -- | determiner/pronoun, post-determiner many other next more last former -- little several enough most least only very few fewer past same Last -- latter less single plenty 'nough lesser certain various manye -- next-to-last particular final previous present nuf AP :: Tag -- | determiner/pronoun, post-determiner, genitive e.g.; other's APdollar :: Tag -- | determiner/pronoun, post-determiner, hyphenated pair e.g.; many-much AP_pl_AP :: Tag -- | article e.g.; the an no a every th' ever' ye AT :: Tag -- | verb "to be", infinitive or imperative e.g.; be BE :: Tag -- | verb "to be", past tense, 2nd person singular or all persons plural -- e.g.; were BED :: Tag -- | verb "to be", past tense, 2nd person singular or all persons plural, -- negated e.g.; weren't BEDstar :: Tag -- | verb "to be", past tense, 1st and 3rd person singular e.g.; was BEDZ :: Tag -- | verb "to be", past tense, 1st and 3rd person singular, negated e.g.; -- wasn't BEDZstar :: Tag -- | verb "to be", present participle or gerund e.g.; being BEG :: Tag -- | verb "to be", present tense, 1st person singular e.g.; am BEM :: Tag -- | verb "to be", present tense, 1st person singular, negated e.g.; ain't BEMstar :: Tag -- | verb "to be", past participle e.g.; been BEN :: Tag -- | verb "to be", present tense, 2nd person singular or all persons plural -- e.g.; are art BER :: Tag -- | verb "to be", present tense, 2nd person singular or all persons -- plural, negated e.g.; aren't ain't BERstar :: Tag -- | verb "to be", present tense, 3rd person singular e.g.; is BEZ :: Tag -- | verb "to be", present tense, 3rd person singular, negated e.g.; isn't -- ain't BEZstar :: Tag -- | conjunction, coordinating e.g.; and or but plus & either neither -- nor yet n and/or minus an' CC :: Tag -- | numeral, cardinal e.g.; two one 1 four 2 1913 71 74 637 1937 8 five -- three million 87-31 29-5 seven 1,119 fifty-three 7.5 billion hundred -- 125,000 1,700 60 100 six ... CD :: Tag -- | numeral, cardinal, genitive e.g.; 1960's 1961's .404's CDdollar :: Tag -- | conjunction, subordinating e.g.; that as after whether before while -- like because if since for than altho until so unless though providing -- once lest sposin till whereas whereupon supposing tho' albeit -- then so's 'fore CS :: Tag -- | verb "to do", uninflected present tense, infinitive or imperative -- e.g.; do dost DO :: Tag -- | verb "to do", uninflected present tense or imperative, negated e.g.; -- don't DOstar :: Tag -- | verb "to do", past or present tense + pronoun, personal, nominative, -- not 3rd person singular e.g.; d'you DO_pl_PPSS :: Tag -- | verb "to do", past tense e.g.; did done DOD :: Tag -- | verb "to do", past tense, negated e.g.; didn't DODstar :: Tag -- | verb "to do", present tense, 3rd person singular e.g.; does DOZ :: Tag -- | verb "to do", present tense, 3rd person singular, negated e.g.; -- doesn't don't DOZstar :: Tag -- | determiner/pronoun, singular e.g.; this each another that 'nother DT :: Tag -- | determiner/pronoun, singular, genitive e.g.; another's DTdollar :: Tag -- | determiner/pronoun + verb "to be", present tense, 3rd person singular -- e.g.; that's DT_pl_BEZ :: Tag -- | determiner/pronoun + modal auxillary e.g.; that'll this'll DT_pl_MD :: Tag -- | determiner/pronoun, singular or plural e.g.; any some DTI :: Tag -- | determiner/pronoun, plural e.g.; these those them DTS :: Tag -- | pronoun, plural + verb "to be", present tense, 3rd person singular -- e.g.; them's DTS_pl_BEZ :: Tag -- | determiner, pronoun or double conjunction e.g.; neither either one DTX :: Tag -- | existential there e.g.; there EX :: Tag -- | existential there + verb "to be", present tense, 3rd person singular -- e.g.; there's EX_pl_BEZ :: Tag -- | existential there + verb "to have", past tense e.g.; there'd EX_pl_HVD :: Tag -- | existential there + verb "to have", present tense, 3rd person singular -- e.g.; there's EX_pl_HVZ :: Tag -- | existential there + modal auxillary e.g.; there'll there'd EX_pl_MD :: Tag -- | foreign word: negator e.g.; pas non ne FW_star :: Tag -- | foreign word: article e.g.; la le el un die der ein keine eine das las -- les Il FW_AT :: Tag -- | foreign word: article + noun, singular, common e.g.; l'orchestre -- l'identite l'arcade l'ange l'assistance l'activite L'Universite -- l'independance L'Union L'Unita l'osservatore FW_AT_pl_NN :: Tag -- | foreign word: article + noun, singular, proper e.g.; L'Astree -- L'Imperiale FW_AT_pl_NP :: Tag -- | foreign word: verb "to be", infinitive or imperative e.g.; sit FW_BE :: Tag -- | foreign word: verb "to be", present tense, 2nd person singular or all -- persons plural e.g.; sind sunt etes FW_BER :: Tag -- | foreign word: verb "to be", present tense, 3rd person singular e.g.; -- ist est FW_BEZ :: Tag -- | foreign word: conjunction, coordinating e.g.; et ma mais und aber och -- nec y FW_CC :: Tag -- | foreign word: numeral, cardinal e.g.; une cinq deux sieben unam zwei FW_CD :: Tag -- | foreign word: conjunction, subordinating e.g.; bevor quam ma FW_CS :: Tag -- | foreign word: determiner/pronoun, singular e.g.; hoc FW_DT :: Tag -- | foreign word: determiner + verb "to be", present tense, 3rd person -- singular e.g.; c'est FW_DT_pl_BEZ :: Tag -- | foreign word: determiner/pronoun, plural e.g.; haec FW_DTS :: Tag -- | foreign word: verb "to have", present tense, not 3rd person singular -- e.g.; habe FW_HV :: Tag -- | foreign word: preposition e.g.; ad de en a par con dans ex von auf -- super post sine sur sub avec per inter sans pour pendant in di FW_IN :: Tag -- | foreign word: preposition + article e.g.; della des du aux zur d'un -- del dell' FW_IN_pl_AT :: Tag -- | foreign word: preposition + noun, singular, common e.g.; d'etat -- d'hotel d'argent d'identite d'art FW_IN_pl_NN :: Tag -- | foreign word: preposition + noun, singular, proper e.g.; d'Yquem -- d'Eiffel FW_IN_pl_NP :: Tag -- | foreign word: adjective e.g.; avant Espagnol sinfonica Siciliana -- Philharmonique grand publique haute noire bouffe Douce meme humaine -- bel serieuses royaux anticus presto Sovietskaya Bayerische comique -- schwarzen ... FW_JJ :: Tag -- | foreign word: adjective, comparative e.g.; fortiori FW_JJR :: Tag -- | foreign word: adjective, superlative e.g.; optimo FW_JJT :: Tag -- | foreign word: noun, singular, common e.g.; ballet esprit ersatz mano -- chatte goutte sang Fledermaus oud def kolkhoz roi troika canto boite -- blutwurst carne muzyka bonheur monde piece force ... FW_NN :: Tag -- | foreign word: noun, singular, common, genitive e.g.; corporis -- intellectus arte's dei aeternitatis senioritatis curiae patronne's -- chambre's FW_NNdollar :: Tag -- | foreign word: noun, plural, common e.g.; al culpas vopos boites haflis -- kolkhozes augen tyrannis alpha-beta-gammas metis banditos rata phis -- negociants crus Einsatzkommandos kamikaze wohaws sabinas zorrillas -- palazzi engages coureurs corroborees yori Ubermenschen ... FW_NNS :: Tag -- | foreign word: noun, singular, proper e.g.; Karshilama Dieu Rundfunk -- Afrique Espanol Afrika Spagna Gott Carthago deus FW_NP :: Tag -- | foreign word: noun, plural, proper e.g.; Svenskarna Atlantes Dieux FW_NPS :: Tag -- | foreign word: noun, singular, adverbial e.g.; heute morgen aujourd'hui -- hoy FW_NR :: Tag -- | foreign word: numeral, ordinal e.g.; 18e 17e quintus FW_OD :: Tag -- | foreign word: pronoun, nominal e.g.; hoc FW_PN :: Tag -- | foreign word: determiner, possessive e.g.; mea mon deras vos FW_PPdollar :: Tag -- | foreign word: pronoun, singular, reflexive e.g.; se FW_PPL :: Tag -- | foreign word: pronoun, singular, reflexive + verb, present tense, 3rd -- person singular e.g.; s'excuse s'accuse FW_PPL_pl_VBZ :: Tag -- | pronoun, personal, accusative e.g.; lui me moi mi FW_PPO :: Tag -- | foreign word: pronoun, personal, accusative + preposition e.g.; mecum -- tecum FW_PPO_pl_IN :: Tag -- | foreign word: pronoun, personal, nominative, 3rd person singular e.g.; -- il FW_PPS :: Tag -- | foreign word: pronoun, personal, nominative, not 3rd person singular -- e.g.; ich vous sie je FW_PPSS :: Tag -- | foreign word: pronoun, personal, nominative, not 3rd person singular + -- verb "to have", present tense, not 3rd person singular e.g.; j'ai FW_PPSS_pl_HV :: Tag -- | foreign word: qualifier e.g.; minus FW_QL :: Tag -- | foreign word: adverb e.g.; bas assai deja um wiederum cito velociter -- vielleicht simpliciter non zu domi nuper sic forsan olim oui semper -- tout despues hors FW_RB :: Tag -- | foreign word: adverb + conjunction, coordinating e.g.; forisque FW_RB_pl_CC :: Tag -- | foreign word: infinitival to + verb, infinitive e.g.; d'entretenir FW_TO_pl_VB :: Tag -- | foreign word: interjection e.g.; sayonara bien adieu arigato bonjour -- adios bueno tchalo ciao o FW_UH :: Tag -- | foreign word: verb, present tense, not 3rd person singular, imperative -- or infinitive e.g.; nolo contendere vive fermate faciunt esse vade -- noli tangere dites duces meminisse iuvabit gosaimasu voulez habla -- ksuu'peliafo lacheln miuchi say allons strafe portant FW_VB :: Tag -- | foreign word: verb, past tense e.g.; stabat peccavi audivi FW_VBD :: Tag -- | foreign word: verb, present participle or gerund e.g.; nolens volens -- appellant seq. obliterans servanda dicendi delenda FW_VBG :: Tag -- | foreign word: verb, past participle e.g.; vue verstrichen rasa -- verboten engages FW_VBN :: Tag -- | foreign word: verb, present tense, 3rd person singular e.g.; gouverne -- sinkt sigue diapiace FW_VBZ :: Tag -- | foreign word: WH-determiner e.g.; quo qua quod que quok FW_WDT :: Tag -- | foreign word: WH-pronoun, accusative e.g.; quibusdam FW_WPO :: Tag -- | foreign word: WH-pronoun, nominative e.g.; qui FW_WPS :: Tag -- | verb "to have", uninflected present tense, infinitive or imperative -- e.g.; have hast HV :: Tag -- | verb "to have", uninflected present tense or imperative, negated e.g.; -- haven't ain't HVstar :: Tag -- | verb "to have", uninflected present tense + infinitival to e.g.; hafta HV_pl_TO :: Tag -- | verb "to have", past tense e.g.; had HVD :: Tag -- | verb "to have", past tense, negated e.g.; hadn't HVDstar :: Tag -- | verb "to have", present participle or gerund e.g.; having HVG :: Tag -- | verb "to have", past participle e.g.; had HVN :: Tag -- | verb "to have", present tense, 3rd person singular e.g.; has hath HVZ :: Tag -- | verb "to have", present tense, 3rd person singular, negated e.g.; -- hasn't ain't HVZstar :: Tag -- | preposition e.g.; of in for by considering to on among at through with -- under into regarding than since despite according per before toward -- against as after during including between without except upon out over -- ... IN :: Tag -- | preposition, hyphenated pair e.g.; f'ovuh IN_pl_IN :: Tag -- | preposition + pronoun, personal, accusative e.g.; t'hi-im IN_pl_PPO :: Tag -- | adjective e.g.; recent over-all possible hard-fought favorable hard -- meager fit such widespread outmoded inadequate ambiguous grand -- clerical effective orderly federal foster general proportionate ... JJ :: Tag -- | adjective, genitive e.g.; Great's JJdollar :: Tag -- | adjective, hyphenated pair e.g.; big-large long-far JJ_pl_JJ :: Tag -- | adjective, comparative e.g.; greater older further earlier later freer -- franker wider better deeper firmer tougher faster higher bigger worse -- younger lighter nicer slower happier frothier Greater newer Elder ... JJR :: Tag -- | adjective + conjunction, coordinating e.g.; lighter'n JJR_pl_CS :: Tag -- | adjective, semantically superlative e.g.; top chief principal -- northernmost master key head main tops utmost innermost foremost -- uppermost paramount topmost JJS :: Tag -- | adjective, superlative e.g.; best largest coolest calmest latest -- greatest earliest simplest strongest newest fiercest unhappiest worst -- youngest worthiest fastest hottest fittest lowest finest smallest -- staunchest ... JJT :: Tag -- | modal auxillary e.g.; should may might will would must can could shall -- ought need wilt MD :: Tag -- | modal auxillary, negated e.g.; cannot couldn't wouldn't can't won't -- shouldn't shan't mustn't musn't MDstar :: Tag -- | modal auxillary + verb "to have", uninflected form e.g.; shouldda -- musta coulda must've woulda could've MD_pl_HV :: Tag -- | modal auxillary + pronoun, personal, nominative, not 3rd person -- singular e.g.; willya MD_pl_PPSS :: Tag -- | modal auxillary + infinitival to e.g.; oughta MD_pl_TO :: Tag -- | noun, singular, common e.g.; failure burden court fire appointment -- awarding compensation Mayor interim committee fact effect airport -- management surveillance jail doctor intern extern night weekend duty -- legislation Tax Office ... NN :: Tag -- | noun, singular, common, genitive e.g.; season's world's player's -- night's chapter's golf's football's baseball's club's U.'s coach's -- bride's bridegroom's board's county's firm's company's -- superintendent's mob's Navy's ... NNdollar :: Tag -- | noun, singular, common + verb "to be", present tense, 3rd person -- singular e.g.; water's camera's sky's kid's Pa's heat's throat's -- father's money's undersecretary's granite's level's wife's fat's -- Knife's fire's name's hell's leg's sun's roulette's cane's guy's -- kind's baseball's ... NN_pl_BEZ :: Tag -- | noun, singular, common + verb "to have", past tense e.g.; Pa'd NN_pl_HVD :: Tag -- | noun, singular, common + verb "to have", present tense, 3rd person -- singular e.g.; guy's Knife's boat's summer's rain's company's NN_pl_HVZ :: Tag -- | noun, singular, common + preposition e.g.; buncha NN_pl_IN :: Tag -- | noun, singular, common + modal auxillary e.g.; cowhand'd sun'll NN_pl_MD :: Tag -- | noun, singular, common, hyphenated pair e.g.; stomach-belly NN_pl_NN :: Tag -- | noun, plural, common e.g.; irregularities presentments thanks reports -- voters laws legislators years areas adjustments chambers $100 bonds -- courts sales details raises sessions members congressmen votes polls -- calls ... NNS :: Tag -- | noun, plural, common, genitive e.g.; taxpayers' children's members' -- States' women's cutters' motorists' steelmakers' hours' Nations' -- lawyers' prisoners' architects' tourists' Employers' secretaries' -- Rogues' ... NNSdollar :: Tag -- | noun, plural, common + modal auxillary e.g.; duds'd oystchers'll NNS_pl_MD :: Tag -- | noun, singular, proper e.g.; Fulton Atlanta September-October Durwood -- Pye Ivan Allen Jr. Jan. Alpharetta Grady William B. Hartsfield Pearl -- Williams Aug. Berry J. M. Cheshire Griffin Opelika Ala. E. Pelham -- Snodgrass ... NP :: Tag -- | noun, singular, proper, genitive e.g.; Green's Landis' Smith's -- Carreon's Allison's Boston's Spahn's Willie's Mickey's Milwaukee's -- Mays' Howsam's Mantle's Shaw's Wagner's Rickey's Shea's Palmer's -- Arnold's Broglio's ... NPdollar :: Tag -- | noun, singular, proper + verb "to be", present tense, 3rd person -- singular e.g.; W.'s Ike's Mack's Jack's Kate's Katharine's Black's -- Arthur's Seaton's Buckhorn's Breed's Penny's Rob's Kitty's Blackwell's -- Myra's Wally's Lucille's Springfield's Arlene's NP_pl_BEZ :: Tag -- | noun, singular, proper + verb "to have", present tense, 3rd person -- singular e.g.; Bill's Guardino's Celie's Skolman's Crosson's Tim's -- Wally's NP_pl_HVZ :: Tag -- | noun, singular, proper + modal auxillary e.g.; Gyp'll John'll NP_pl_MD :: Tag -- | noun, plural, proper e.g.; Chases Aderholds Chapelles Armisteads -- Lockies Carbones French Marskmen Toppers Franciscans Romans Cadillacs -- Masons Blacks Catholics British Dixiecrats Mississippians Congresses -- ... NPS :: Tag -- | noun, plural, proper, genitive e.g.; Republicans' Orioles' Birds' -- Yanks' Redbirds' Bucs' Yankees' Stevenses' Geraghtys' Burkes' Wackers' -- Achaeans' Dresbachs' Russians' Democrats' Gershwins' Adventists' -- Negroes' Catholics' ... NPSdollar :: Tag -- | noun, singular, adverbial e.g.; Friday home Wednesday Tuesday Monday -- Sunday Thursday yesterday tomorrow tonight West East Saturday west -- left east downtown north northeast southeast northwest North South -- right ... NR :: Tag -- | noun, singular, adverbial, genitive e.g.; Saturday's Monday's -- yesterday's tonight's tomorrow's Sunday's Wednesday's Friday's today's -- Tuesday's West's Today's South's NRdollar :: Tag -- | noun, singular, adverbial + modal auxillary e.g.; today'll NR_pl_MD :: Tag -- | noun, plural, adverbial e.g.; Sundays Mondays Saturdays Wednesdays -- Souths Fridays NRS :: Tag -- | numeral, ordinal e.g.; first 13th third nineteenth 2d 61st second -- sixth eighth ninth twenty-first eleventh 50th eighteenth- Thirty-ninth -- 72nd 1/20th twentieth mid-19th thousandth 350th sixteenth 701st ... OD :: Tag -- | pronoun, nominal e.g.; none something everything one anyone nothing -- nobody everybody everyone anybody anything someone no-one nothin PN :: Tag -- | pronoun, nominal, genitive e.g.; one's someone's anybody's nobody's -- everybody's anyone's everyone's PNdollar :: Tag -- | pronoun, nominal + verb "to be", present tense, 3rd person singular -- e.g.; nothing's everything's somebody's nobody's someone's PN_pl_BEZ :: Tag -- | pronoun, nominal + verb "to have", past tense e.g.; nobody'd PN_pl_HVD :: Tag -- | pronoun, nominal + verb "to have", present tense, 3rd person singular -- e.g.; nobody's somebody's one's PN_pl_HVZ :: Tag -- | pronoun, nominal + modal auxillary e.g.; someone'll somebody'll -- anybody'd PN_pl_MD :: Tag -- | determiner, possessive e.g.; our its his their my your her out thy -- mine thine PPdollar :: Tag -- | pronoun, possessive e.g.; ours mine his hers theirs yours PPdollardollar :: Tag -- | pronoun, singular, reflexive e.g.; itself himself myself yourself -- herself oneself ownself PPL :: Tag -- | pronoun, plural, reflexive e.g.; themselves ourselves yourselves PPLS :: Tag -- | pronoun, personal, accusative e.g.; them it him me us you 'em her thee -- we'uns PPO :: Tag -- | pronoun, personal, nominative, 3rd person singular e.g.; it he she -- thee PPS :: Tag -- | pronoun, personal, nominative, 3rd person singular + verb "to be", -- present tense, 3rd person singular e.g.; it's he's she's PPS_pl_BEZ :: Tag -- | pronoun, personal, nominative, 3rd person singular + verb "to have", -- past tense e.g.; she'd he'd it'd PPS_pl_HVD :: Tag -- | pronoun, personal, nominative, 3rd person singular + verb "to have", -- present tense, 3rd person singular e.g.; it's he's she's PPS_pl_HVZ :: Tag -- | pronoun, personal, nominative, 3rd person singular + modal auxillary -- e.g.; he'll she'll it'll he'd it'd she'd PPS_pl_MD :: Tag -- | pronoun, personal, nominative, not 3rd person singular e.g.; they we I -- you ye thou you'uns PPSS :: Tag -- | pronoun, personal, nominative, not 3rd person singular + verb "to be", -- present tense, 1st person singular e.g.; I'm Ahm PPSS_pl_BEM :: Tag -- | pronoun, personal, nominative, not 3rd person singular + verb "to be", -- present tense, 2nd person singular or all persons plural e.g.; we're -- you're they're PPSS_pl_BER :: Tag -- | pronoun, personal, nominative, not 3rd person singular + verb "to be", -- present tense, 3rd person singular e.g.; you's PPSS_pl_BEZ :: Tag -- | pronoun, personal, nominative, not 3rd person singular + verb "to be", -- present tense, 3rd person singular, negated e.g.; taint PPSS_pl_BEZstar :: Tag -- | pronoun, personal, nominative, not 3rd person singular + verb "to -- have", uninflected present tense e.g.; I've we've they've you've PPSS_pl_HV :: Tag -- | pronoun, personal, nominative, not 3rd person singular + verb "to -- have", past tense e.g.; I'd you'd we'd they'd PPSS_pl_HVD :: Tag -- | pronoun, personal, nominative, not 3rd person singular + modal -- auxillary e.g.; you'll we'll I'll we'd I'd they'll they'd you'd PPSS_pl_MD :: Tag -- | pronoun, personal, nominative, not 3rd person singular + verb "to -- verb", uninflected present tense e.g.; y'know PPSS_pl_VB :: Tag -- | qualifier, pre e.g.; well less very most so real as highly -- fundamentally even how much remarkably somewhat more completely too -- thus ill deeply little overly halfway almost impossibly far severly -- such ... QL :: Tag -- | qualifier, post e.g.; indeed enough still 'nuff QLP :: Tag -- | adverb e.g.; only often generally also nevertheless upon together back -- newly no likely meanwhile near then heavily there apparently yet -- outright fully aside consistently specifically formally ever just ... RB :: Tag -- | adverb, genitive e.g.; else's RBdollar :: Tag -- | adverb + verb "to be", present tense, 3rd person singular e.g.; here's -- there's RB_pl_BEZ :: Tag -- | adverb + conjunction, coordinating e.g.; well's soon's RB_pl_CS :: Tag -- | adverb, comparative e.g.; further earlier better later higher tougher -- more harder longer sooner less faster easier louder farther oftener -- nearer cheaper slower tighter lower worse heavier quicker ... RBR :: Tag -- | adverb, comparative + conjunction, coordinating e.g.; more'n RBR_pl_CS :: Tag -- | adverb, superlative e.g.; most best highest uppermost nearest -- brightest hardest fastest deepest farthest loudest ... RBT :: Tag -- | adverb, nominal e.g.; here afar then RN :: Tag -- | adverb, particle e.g.; up out off down over on in about through across -- after RP :: Tag -- | adverb, particle + preposition e.g.; out'n outta RP_pl_IN :: Tag -- | infinitival to e.g.; to t' TO :: Tag -- | infinitival to + verb, infinitive e.g.; t'jawn t'lah TO_pl_VB :: Tag -- | interjection e.g.; Hurrah bang whee hmpf ah goodbye oops -- oh-the-pain-of-it ha crunch say oh why see well hello lo alas -- tarantara rum-tum-tum gosh hell keerist Jesus Keeeerist boy c'mon 'mon -- goddamn bah hoo-pig damn ... UH :: Tag -- | verb, base: uninflected present, imperative or infinitive e.g.; -- investigate find act follow inure achieve reduce take remedy re-set -- distribute realize disable feel receive continue place protect -- eliminate elaborate work permit run enter force ... VB :: Tag -- | verb, base: uninflected present or infinitive + article e.g.; wanna VB_pl_AT :: Tag -- | verb, base: uninflected present, imperative or infinitive + -- preposition e.g.; lookit VB_pl_IN :: Tag -- | verb, base: uninflected present, imperative or infinitive + adjective -- e.g.; die-dead VB_pl_JJ :: Tag -- | verb, uninflected present tense + pronoun, personal, accusative e.g.; -- let's lemme gimme VB_pl_PPO :: Tag -- | verb, imperative + adverbial particle e.g.; g'ahn c'mon VB_pl_RP :: Tag -- | verb, base: uninflected present, imperative or infinitive + -- infinitival to e.g.; wanta wanna VB_pl_TO :: Tag -- | verb, base: uninflected present, imperative or infinitive; hypenated -- pair e.g.; say-speak VB_pl_VB :: Tag -- | verb, past tense e.g.; said produced took recommended commented urged -- found added praised charged listed became announced brought attended -- wanted voted defeated received got stood shot scheduled feared -- promised made ... VBD :: Tag -- | verb, present participle or gerund e.g.; modernizing improving -- purchasing Purchasing lacking enabling pricing keeping getting picking -- entering voting warning making strengthening setting neighboring -- attending participating moving ... VBG :: Tag -- | verb, present participle + infinitival to e.g.; gonna VBG_pl_TO :: Tag -- | verb, past participle e.g.; conducted charged won received studied -- revised operated accepted combined experienced recommended effected -- granted seen protected adopted retarded notarized selected composed -- gotten printed ... VBN :: Tag -- | verb, past participle + infinitival to e.g.; gotta VBN_pl_TO :: Tag -- | verb, present tense, 3rd person singular e.g.; deserves believes -- receives takes goes expires says opposes starts permits expects thinks -- faces votes teaches holds calls fears spends collects backs eliminates -- sets flies gives seeks reads ... VBZ :: Tag -- | WH-determiner e.g.; which what whatever whichever whichever-the-hell WDT :: Tag -- | WH-determiner + verb "to be", present tense, 2nd person singular or -- all persons plural e.g.; what're WDT_pl_BER :: Tag -- | WH-determiner + verb "to be", present, 2nd person singular or all -- persons plural + pronoun, personal, nominative, not 3rd person -- singular e.g.; whaddya WDT_pl_BER_pl_PP :: Tag -- | WH-determiner + verb "to be", present tense, 3rd person singular e.g.; -- what's WDT_pl_BEZ :: Tag -- | WH-determiner + verb "to do", uninflected present tense + pronoun, -- personal, nominative, not 3rd person singular e.g.; whaddya WDT_pl_DO_pl_PPS :: Tag -- | WH-determiner + verb "to do", past tense e.g.; what'd WDT_pl_DOD :: Tag -- | WH-determiner + verb "to have", present tense, 3rd person singular -- e.g.; what's WDT_pl_HVZ :: Tag -- | WH-pronoun, genitive e.g.; whose whosever WPdollar :: Tag -- | WH-pronoun, accusative e.g.; whom that who WPO :: Tag -- | WH-pronoun, nominative e.g.; that who whoever whosoever what -- whatsoever WPS :: Tag -- | WH-pronoun, nominative + verb "to be", present, 3rd person singular -- e.g.; that's who's WPS_pl_BEZ :: Tag -- | WH-pronoun, nominative + verb "to have", past tense e.g.; who'd WPS_pl_HVD :: Tag -- | WH-pronoun, nominative + verb "to have", present tense, 3rd person -- singular e.g.; who's that's WPS_pl_HVZ :: Tag -- | WH-pronoun, nominative + modal auxillary e.g.; who'll that'd who'd -- that'll WPS_pl_MD :: Tag -- | WH-qualifier e.g.; however how WQL :: Tag -- | WH-adverb e.g.; however when where why whereby wherever how whenever -- whereon wherein wherewith wheare wherefore whereof howsabout WRB :: Tag -- | WH-adverb + verb "to be", present, 2nd person singular or all persons -- plural e.g.; where're WRB_pl_BER :: Tag -- | WH-adverb + verb "to be", present, 3rd person singular e.g.; how's -- where's WRB_pl_BEZ :: Tag -- | WH-adverb + verb "to do", present, not 3rd person singular e.g.; howda WRB_pl_DO :: Tag -- | WH-adverb + verb "to do", past tense e.g.; where'd how'd WRB_pl_DOD :: Tag -- | WH-adverb + verb "to do", past tense, negated e.g.; whyn't WRB_pl_DODstar :: Tag -- | WH-adverb + verb "to do", present tense, 3rd person singular e.g.; -- how's WRB_pl_DOZ :: Tag -- | WH-adverb + preposition e.g.; why'n WRB_pl_IN :: Tag -- | WH-adverb + modal auxillary e.g.; where'd WRB_pl_MD :: Tag -- | Unknown. Unk :: Tag data Chunk -- | Noun Phrase. C_NP :: Chunk -- | Verb Phrase. C_VP :: Chunk -- | Prepositional Phrase. C_PP :: Chunk -- | Clause. C_CL :: Chunk -- | Out not a chunk. C_O :: Chunk instance Read Chunk instance Show Chunk instance Ord Chunk instance Eq Chunk instance Generic Chunk instance Enum Chunk instance Bounded Chunk instance Read Tag instance Show Tag instance Ord Tag instance Eq Tag instance Generic Tag instance Enum Tag instance Bounded Tag instance Datatype D1Chunk instance Constructor C1_0Chunk instance Constructor C1_1Chunk instance Constructor C1_2Chunk instance Constructor C1_3Chunk instance Constructor C1_4Chunk instance Datatype D1Tag instance Constructor C1_0Tag instance Constructor C1_1Tag instance Constructor C1_2Tag instance Constructor C1_3Tag instance Constructor C1_4Tag instance Constructor C1_5Tag instance Constructor C1_6Tag instance Constructor C1_7Tag instance Constructor C1_8Tag instance Constructor C1_9Tag instance Constructor C1_10Tag instance Constructor C1_11Tag instance Constructor C1_12Tag instance Constructor C1_13Tag instance Constructor C1_14Tag instance Constructor C1_15Tag instance Constructor C1_16Tag instance Constructor C1_17Tag instance Constructor C1_18Tag instance Constructor C1_19Tag instance Constructor C1_20Tag instance Constructor C1_21Tag instance Constructor C1_22Tag instance Constructor C1_23Tag instance Constructor C1_24Tag instance Constructor C1_25Tag instance Constructor C1_26Tag instance Constructor C1_27Tag instance Constructor C1_28Tag instance Constructor C1_29Tag instance Constructor C1_30Tag instance Constructor C1_31Tag instance Constructor C1_32Tag instance Constructor C1_33Tag instance Constructor C1_34Tag instance Constructor C1_35Tag instance Constructor C1_36Tag instance Constructor C1_37Tag instance Constructor C1_38Tag instance Constructor C1_39Tag instance Constructor C1_40Tag instance Constructor C1_41Tag instance Constructor C1_42Tag instance Constructor C1_43Tag instance Constructor C1_44Tag instance Constructor C1_45Tag instance Constructor C1_46Tag instance Constructor C1_47Tag instance Constructor C1_48Tag instance Constructor C1_49Tag instance Constructor C1_50Tag instance Constructor C1_51Tag instance Constructor C1_52Tag instance Constructor C1_53Tag instance Constructor C1_54Tag instance Constructor C1_55Tag instance Constructor C1_56Tag instance Constructor C1_57Tag instance Constructor C1_58Tag instance Constructor C1_59Tag instance Constructor C1_60Tag instance Constructor C1_61Tag instance Constructor C1_62Tag instance Constructor C1_63Tag instance Constructor C1_64Tag instance Constructor C1_65Tag instance Constructor C1_66Tag instance Constructor C1_67Tag instance Constructor C1_68Tag instance Constructor C1_69Tag instance Constructor C1_70Tag instance Constructor C1_71Tag instance Constructor C1_72Tag instance Constructor C1_73Tag instance Constructor C1_74Tag instance Constructor C1_75Tag instance Constructor C1_76Tag instance Constructor C1_77Tag instance Constructor C1_78Tag instance Constructor C1_79Tag instance Constructor C1_80Tag instance Constructor C1_81Tag instance Constructor C1_82Tag instance Constructor C1_83Tag instance Constructor C1_84Tag instance Constructor C1_85Tag instance Constructor C1_86Tag instance Constructor C1_87Tag instance Constructor C1_88Tag instance Constructor C1_89Tag instance Constructor C1_90Tag instance Constructor C1_91Tag instance Constructor C1_92Tag instance Constructor C1_93Tag instance Constructor C1_94Tag instance Constructor C1_95Tag instance Constructor C1_96Tag instance Constructor C1_97Tag instance Constructor C1_98Tag instance Constructor C1_99Tag instance Constructor C1_100Tag instance Constructor C1_101Tag instance Constructor C1_102Tag instance Constructor C1_103Tag instance Constructor C1_104Tag instance Constructor C1_105Tag instance Constructor C1_106Tag instance Constructor C1_107Tag instance Constructor C1_108Tag instance Constructor C1_109Tag instance Constructor C1_110Tag instance Constructor C1_111Tag instance Constructor C1_112Tag instance Constructor C1_113Tag instance Constructor C1_114Tag instance Constructor C1_115Tag instance Constructor C1_116Tag instance Constructor C1_117Tag instance Constructor C1_118Tag instance Constructor C1_119Tag instance Constructor C1_120Tag instance Constructor C1_121Tag instance Constructor C1_122Tag instance Constructor C1_123Tag instance Constructor C1_124Tag instance Constructor C1_125Tag instance Constructor C1_126Tag instance Constructor C1_127Tag instance Constructor C1_128Tag instance Constructor C1_129Tag instance Constructor C1_130Tag instance Constructor C1_131Tag instance Constructor C1_132Tag instance Constructor C1_133Tag instance Constructor C1_134Tag instance Constructor C1_135Tag instance Constructor C1_136Tag instance Constructor C1_137Tag instance Constructor C1_138Tag instance Constructor C1_139Tag instance Constructor C1_140Tag instance Constructor C1_141Tag instance Constructor C1_142Tag instance Constructor C1_143Tag instance Constructor C1_144Tag instance Constructor C1_145Tag instance Constructor C1_146Tag instance Constructor C1_147Tag instance Constructor C1_148Tag instance Constructor C1_149Tag instance Constructor C1_150Tag instance Constructor C1_151Tag instance Constructor C1_152Tag instance Constructor C1_153Tag instance Constructor C1_154Tag instance Constructor C1_155Tag instance Constructor C1_156Tag instance Constructor C1_157Tag instance Constructor C1_158Tag instance Constructor C1_159Tag instance Constructor C1_160Tag instance Constructor C1_161Tag instance Constructor C1_162Tag instance Constructor C1_163Tag instance Constructor C1_164Tag instance Constructor C1_165Tag instance Constructor C1_166Tag instance Constructor C1_167Tag instance Constructor C1_168Tag instance Constructor C1_169Tag instance Constructor C1_170Tag instance Constructor C1_171Tag instance Constructor C1_172Tag instance Constructor C1_173Tag instance Constructor C1_174Tag instance Constructor C1_175Tag instance Constructor C1_176Tag instance Constructor C1_177Tag instance Constructor C1_178Tag instance Constructor C1_179Tag instance Constructor C1_180Tag instance Constructor C1_181Tag instance Constructor C1_182Tag instance Constructor C1_183Tag instance Constructor C1_184Tag instance Constructor C1_185Tag instance Constructor C1_186Tag instance Constructor C1_187Tag instance Constructor C1_188Tag instance Constructor C1_189Tag instance Constructor C1_190Tag instance Constructor C1_191Tag instance Constructor C1_192Tag instance Constructor C1_193Tag instance Constructor C1_194Tag instance Constructor C1_195Tag instance Constructor C1_196Tag instance Constructor C1_197Tag instance Constructor C1_198Tag instance Constructor C1_199Tag instance Constructor C1_200Tag instance Constructor C1_201Tag instance Constructor C1_202Tag instance Constructor C1_203Tag instance Constructor C1_204Tag instance Constructor C1_205Tag instance Constructor C1_206Tag instance Constructor C1_207Tag instance Constructor C1_208Tag instance Constructor C1_209Tag instance Constructor C1_210Tag instance Constructor C1_211Tag instance Constructor C1_212Tag instance Constructor C1_213Tag instance Constructor C1_214Tag instance Constructor C1_215Tag instance Constructor C1_216Tag instance Constructor C1_217Tag instance Constructor C1_218Tag instance Constructor C1_219Tag instance Constructor C1_220Tag instance Constructor C1_221Tag instance Constructor C1_222Tag instance Constructor C1_223Tag instance Constructor C1_224Tag instance Constructor C1_225Tag instance Constructor C1_226Tag instance Constructor C1_227Tag instance Constructor C1_228Tag instance ChunkTag Chunk instance Arbitrary Tag instance Tag Tag instance Serialize Tag instance Serialize Chunk instance Arbitrary Chunk module NLP.Types -- | Part of Speech tagger, with back-off tagger. -- -- A sequence of pos taggers can be assembled by using backoff taggers. -- When tagging text, the first tagger is run on the input, possibly -- tagging some tokens as unknown ('Tag Unk'). The first backoff -- tagger is then recursively invoked on the text to fill in the unknown -- tags, but that may still leave some tokens marked with 'Tag -- Unk'. This process repeats until no more taggers are found. -- (The current implementation is not very efficient in this respect.). -- -- Back off taggers are particularly useful when there is a set of domain -- specific vernacular that a general purpose statistical tagger does not -- know of. A LitteralTagger can be created to map terms to fixed POS -- tags, and then delegate the bulk of the text to a statistical back off -- tagger, such as an AvgPerceptronTagger. -- -- POSTagger values can be serialized and deserialized by using -- serialize and NLP.POS.deserialize`. This is a bit tricky -- because the POSTagger abstracts away the implementation details of the -- particular tagging algorithm, and the model for that tagger (if any). -- To support serialization, each POSTagger value must provide a -- serialize value that can be used to generate a ByteString -- representation of the model, as well as a unique id (also a -- ByteString). Furthermore, that ID must be added to a `Map -- ByteString (ByteString -> Maybe POSTagger -> Either String -- POSTagger)` that is provided to deserialize. The function in -- the map takes the output of posSerialize, and possibly a -- backoff tagger, and reconstitutes the POSTagger that was serialized -- (assigning the proper functions, setting up closures as needed, etc.) -- Look at the source for taggerTable and readTagger for -- examples. data POSTagger t POSTagger :: ([Sentence] -> [TaggedSentence t]) -> ([TaggedSentence t] -> IO (POSTagger t)) -> Maybe (POSTagger t) -> (Text -> Sentence) -> (Text -> [Text]) -> ByteString -> ByteString -> POSTagger t -- | The initial part-of-speech tagger. posTagger :: POSTagger t -> [Sentence] -> [TaggedSentence t] -- | Training function to train the immediate POS tagger. posTrainer :: POSTagger t -> [TaggedSentence t] -> IO (POSTagger t) -- | A tagger to invoke on unknown tokens. posBackoff :: POSTagger t -> Maybe (POSTagger t) -- | A tokenizer; (words will work.) posTokenizer :: POSTagger t -> Text -> Sentence -- | A sentence splitter. If your input is formatted as one sentence per -- line, then use lines, otherwise try Erik Kow's fullstop -- library. posSplitter :: POSTagger t -> Text -> [Text] -- | Store this POS tagger to a bytestring. This does not serialize -- the backoff taggers. posSerialize :: POSTagger t -> ByteString -- | A unique id that will identify the algorithm used for this POS Tagger. -- This is used in deserialization posID :: POSTagger t -> ByteString -- | Document corpus. -- -- This is a simple hashed corpus, the document content is not stored. data Corpus Corpus :: Int -> Map Text Int -> Corpus -- | The number of documents in the corpus. corpLength :: Corpus -> Int -- | A count of the number of documents each term occurred in. corpTermCounts :: Corpus -> Map Text Int -- | Get the number of documents that a term occurred in. termCounts :: Corpus -> Text -> Int -- | Add a document to the corpus. -- -- This can be dangerous if the documents are pre-processed differently. -- All corpus-related functions assume that the documents have all been -- tokenized and the tokens normalized, in the same way. addDocument :: Corpus -> [Text] -> Corpus -- | Create a corpus from a list of documents, represented by normalized -- tokens. mkCorpus :: [[Text]] -> Corpus addTerms :: Map Text Int -> Set Text -> Map Text Int addTerm :: Map Text Int -> Text -> Map Text Int instance Read Corpus instance Show Corpus instance Eq Corpus instance Ord Corpus instance Generic Corpus instance Datatype D1Corpus instance Constructor C1_0Corpus instance Selector S1_0_0Corpus instance Selector S1_0_1Corpus instance Arbitrary Corpus instance Serialize Corpus instance NFData Corpus module NLP.POS.LiteralTagger tag :: Tag t => Map Text t -> CaseSensitive -> [Sentence] -> [TaggedSentence t] tagSentence :: Tag t => Map Text t -> CaseSensitive -> Sentence -> TaggedSentence t -- | Create a Literal Tagger using the specified back-off tagger as a -- fall-back, if one is specified. -- -- This uses a tokenizer adapted from the tokenize package for a -- tokenizer, and Erik Kow's fullstop sentence segmenter as a sentence -- splitter. mkTagger :: Tag t => Map Text t -> CaseSensitive -> Maybe (POSTagger t) -> POSTagger t taggerID :: ByteString -- | deserialization for Literal Taggers. The serialization logic is in the -- posSerialize record of the POSTagger created in mkTagger. readTagger :: Tag t => ByteString -> Maybe (POSTagger t) -> Either String (POSTagger t) -- | Boolean type to indicate case sensitivity for textual comparisons. data CaseSensitive Sensitive :: CaseSensitive Insensitive :: CaseSensitive -- | Create a tokenizer that protects the provided terms (to tokenize -- multi-word terms) protectTerms :: [Text] -> CaseSensitive -> Tokenizer -- | This POS tagger deterministically tags tokens. However, if it ever -- sees multiple tags for the same token, it will forget the tag it has -- learned. This is useful for creating taggers that have very high -- precision, but very low recall. -- -- Unambiguous taggers are also useful when defined with a -- non-deterministic backoff tagger, such as an -- NLP.POS.AveragedPerceptronTagger, since the high-confidence -- tags will be applied first, followed by the more non-deterministic -- results of the backoff tagger. module NLP.POS.UnambiguousTagger taggerID :: ByteString readTagger :: Tag t => ByteString -> Maybe (POSTagger t) -> Either String (POSTagger t) -- | Create an unambiguous tagger, using the supplied Map as a -- source of tags. mkTagger :: Tag t => Map Text t -> Maybe (POSTagger t) -> POSTagger t -- | Trainer method for unambiguous taggers. train :: Tag t => Map Text t -> [TaggedSentence t] -> Map Text t -- | Average Perceptron implementation of Part of speech tagging, adapted -- for Haskell from this python implementation, which is described on the -- blog post: -- -- -- -- The Perceptron code can be found on github: -- -- module NLP.ML.AvgPerceptron -- | The perceptron model. data Perceptron Perceptron :: Map Feature (Map Class Weight) -> Map (Feature, Class) Weight -> Map (Feature, Class) Int -> Int -> Perceptron -- | Each feature gets its own weight vector, so weights is a dict-of-dicts weights :: Perceptron -> Map Feature (Map Class Weight) -- | The accumulated values, for the averaging. These will be keyed by -- feature/clas tuples totals :: Perceptron -> Map (Feature, Class) Weight -- | The last time the feature was changed, for the averaging. Also keyed -- by feature/clas tuples (tstamps is short for timestamps) tstamps :: Perceptron -> Map (Feature, Class) Int -- | Number of instances seen instances :: Perceptron -> Int -- | The classes that the perceptron assigns are represnted with a -- newtype-wrapped String. -- -- Eventually, I think this should become a typeclass, so the classes can -- be defined by the users of the Perceptron (such as custom POS tag -- ADTs, or more complex classes). newtype Class Class :: String -> Class -- | Typedef for doubles to make the code easier to read, and to make this -- simple to change if necessary. type Weight = Double newtype Feature Feat :: Text -> Feature -- | An empty perceptron, used to start training. emptyPerceptron :: Perceptron -- | Predict a class given a feature vector. -- -- Ported from python: -- --
--   def predict(self, features):
--       '''Dot-product the features and current weights and return the best label.'''
--       scores = defaultdict(float)
--       for feat, value in features.items():
--           if feat not in self.weights or value == 0:
--               continue
--           weights = self.weights[feat]
--           for label, weight in weights.items():
--               scores[label] += value * weight
--       # Do a secondary alphabetic sort, for stability
--       return max(self.classes, key=lambda label: (scores[label], label))
--   
predict :: Perceptron -> Map Feature Int -> Maybe Class train :: Int -> Perceptron -> [(Map Feature Int, Class)] -> IO Perceptron -- | Update the perceptron with a new example. -- --
--   update(self, truth, guess, features)
--      ...
--           self.i += 1
--           if truth == guess:
--               return None
--           for f in features:
--               weights = self.weights.setdefault(f, {}) -- setdefault is Map.findWithDefault, and destructive.
--               upd_feat(truth, f, weights.get(truth, 0.0), 1.0)
--               upd_feat(guess, f, weights.get(guess, 0.0), -1.0)
--           return None
--   
update :: Perceptron -> Class -> Class -> [Feature] -> Perceptron -- | Average the weights -- -- Ported from Python: -- --
--   def average_weights(self):
--       for feat, weights in self.weights.items():
--           new_feat_weights = {}
--           for clas, weight in weights.items():
--               param = (feat, clas)
--               total = self._totals[param]
--               total += (self.i - self._tstamps[param]) * weight
--               averaged = round(total / float(self.i), 3)
--               if averaged:
--                   new_feat_weights[clas] = averaged
--           self.weights[feat] = new_feat_weights
--       return None
--   
averageWeights :: Perceptron -> Perceptron instance Read Feature instance Show Feature instance Eq Feature instance Ord Feature instance Generic Feature instance Read Class instance Show Class instance Eq Class instance Ord Class instance Generic Class instance Read Perceptron instance Show Perceptron instance Eq Perceptron instance Generic Perceptron instance Datatype D1Feature instance Constructor C1_0Feature instance Datatype D1Class instance Constructor C1_0Class instance Datatype D1Perceptron instance Constructor C1_0Perceptron instance Selector S1_0_0Perceptron instance Selector S1_0_1Perceptron instance Selector S1_0_2Perceptron instance Selector S1_0_3Perceptron instance NFData Perceptron instance Serialize Perceptron instance Serialize Class instance Serialize Feature -- | Avegeraged Perceptron Chunker module NLP.Chunk.AvgPerceptronChunker -- | Create a chunker from a Perceptron. mkChunker :: (ChunkTag c, Tag t) => Perceptron -> Chunker c t trainInt :: (ChunkTag c, Tag t) => Int -> Perceptron -> [ChunkedSentence c t] -> IO Perceptron -- | Chunk a list of POS-tagged sentence, generating a parse tree. chunk :: (ChunkTag c, Tag t) => Perceptron -> [TaggedSentence t] -> [ChunkedSentence c t] -- | Chunk a single POS-tagged sentence. chunkSentence :: (ChunkTag c, Tag t) => Perceptron -> TaggedSentence t -> ChunkedSentence c t -- | The type of Chunkers, incorporates chunking, training, serilazitaion -- and unique IDs for deserialization. data Chunker c t Chunker :: ([TaggedSentence t] -> [ChunkedSentence c t]) -> ([ChunkedSentence c t] -> IO (Chunker c t)) -> ByteString -> ByteString -> Chunker c t chChunker :: Chunker c t -> [TaggedSentence t] -> [ChunkedSentence c t] chTrainer :: Chunker c t -> [ChunkedSentence c t] -> IO (Chunker c t) chSerialize :: Chunker c t -> ByteString chId :: Chunker c t -> ByteString -- | The unique ID for this implementation of a Chunker chunkerID :: ByteString -- | deserialize an AvgPerceptronChunker from a ByteString. readChunker :: (ChunkTag c, Tag t) => ByteString -> Either String (Chunker c t) module NLP.Similarity.VectorSim -- | An efficient (ish) representation for documents in the "bag of words" -- sense. type TermVector = DefaultMap Text Double -- | Generate a TermVector from a tokenized document. mkVector :: Corpus -> [Text] -> TermVector -- | Invokes similarity on full strings, using words for -- tokenization, and no stemming. -- -- There *must* be at least one document in the corpus. sim :: Corpus -> Text -> Text -> Double -- | Determine how similar two documents are. -- -- This function assumes that each document has been tokenized and (if -- desired) stemmed/case-normalized. -- -- This is a wrapper around tvSim, which is a *much* more -- efficient implementation. If you need to run similarity against any -- single document more than once, then you should create -- TermVectors for each of your documents and use tvSim -- instead of similarity. -- -- There *must* be at least one document in the corpus. similarity :: Corpus -> [Text] -> [Text] -> Double -- | Determine how similar two documents are. -- -- Calculates the similarity between two documents, represented as -- TermVectors tvSim :: TermVector -> TermVector -> Double -- | Return the raw frequency of a term in a body of text. -- -- The firt argument is the term to find, the second is a tokenized -- document. This function does not do any stemming or additional text -- modification. tf :: Eq a => a -> [a] -> Int -- | Calculate the inverse document frequency. -- -- The IDF is, roughly speaking, a measure of how popular a term is. idf :: Text -> Corpus -> Double -- | Calculate the tf*idf measure for a term given a document and a corpus. tf_idf :: Text -> [Text] -> Corpus -> Double cosVec :: TermVector -> TermVector -> Double -- | Calculate the magnitude of a vector. magnitude :: TermVector -> Double -- | find the dot product of two vectors. dotProd :: TermVector -> TermVector -> Double -- | This is a very simple wrapper around Parsec for writing Information -- Extraction patterns. -- -- Because the particular tags/tokens to parse depends on the training -- corpus (for POS tagging) and the domain, this module only provides -- basic extractors. You can, for example, create an extractor to find -- noun phrases by combining the components provided here: -- --
--   nounPhrase :: Extractor (Text, Tag)
--   nounPhrase = do
--     nlist <- many1 (try (posTok $ Tag "NN")
--                 <|> try (posTok $ Tag "DT")
--                     <|> (posTok $ Tag "JJ"))
--     let term = T.intercalate " " (map fst nlist)
--     return (term, Tag "n-phr")
--   
module NLP.Extraction.Parsec -- | A Parsec parser. -- -- Example usage: -- --
--   > set -XOverloadedStrings
--   > import Text.Parsec.Prim
--   > parse myExtractor "interactive repl" someTaggedSentence
--   
type Extractor t = Parsec (TaggedSentence t) () -- | Consume a token with the given POS Tag posTok :: Tag t => t -> Extractor t (POS t) -- | Consume a token with the specified POS prefix. -- --
--   > parse (posPrefix "n") "ghci" [(Bob, Tag "np")]
--   Right [(Bob, Tag "np")]
--   
posPrefix :: Tag t => Text -> Extractor t (POS t) -- | Text equality matching with optional case sensitivity. matches :: CaseSensitive -> Token -> Token -> Bool -- | Consume a token with the given lexical representation. txtTok :: Tag t => CaseSensitive -> Token -> Extractor t (POS t) -- | Consume any one non-empty token. anyToken :: Tag t => Extractor t (POS t) oneOf :: Tag t => CaseSensitive -> [Token] -> Extractor t (POS t) -- | Skips any number of fill tokens, ending with the end parser, and -- returning the last parsed result. -- -- This is useful when you know what you're looking for and (for -- instance) don't care what comes first. followedBy :: Tag t => Extractor t b -> Extractor t a -> Extractor t a instance (Monad m, ChunkTag c, Tag t) => Stream (ChunkedSentence c t) m (ChunkOr c t) instance (Monad m, Tag t) => Stream (TaggedSentence t) m (POS t) -- | Example parsing with Parsec. -- -- This example shows how the following grammar, from NLTK, can be -- implemented in Chatter, using Parsec-based Information Extraction -- patterns: -- --
--   grammar = r"""
--    NP: {<DT|JJ|NN.*>+}          # Chunk sequences of DT, JJ, NN
--    PP: {<IN><NP>}               # Chunk prepositions followed by NP
--    VP: {<VB.*><NP|PP|CLAUSE>+$} # Chunk verbs and their arguments
--    CLAUSE: {<NP><VP>}           # Chunk NP, VP
--    """
--   
-- --
--   > import NLP.Extraction.Examples.ParsecExamples
--   > import Text.Parsec.Prim
--   > tgr <- defaultTagger
--   > map (parse findClause "interactive") $ tag tgr "Mary saw the cat sit on the mat."
--   [Right (Chunk_CN (Chunk C_CL [Chunk_CN (Chunk C_NP [POS_CN (POS AT (Token "the")),POS_CN (POS NN (Token "cat"))]),Chunk_CN (Chunk C_VP [POS_CN (POS VB (Token "sit")),Chunk_CN (Chunk C_PP [POS_CN (POS IN (Token "on")),Chunk_CN (Chunk C_NP [POS_CN (POS AT (Token "the")),POS_CN (POS NN (Token "mat"))])])])]))]
--   
module NLP.Extraction.Examples.ParsecExamples -- | Find a clause in a larger collection of text. -- -- A clause is defined by the clause extractor, and is a Noun -- Phrase followed (immediately) by a Verb Phrase -- -- findClause skips over leading tokens, if needed, to locate a clause. findClause :: Extractor Tag (ChunkOr Chunk Tag) -- | Find a Noun Phrase followed by a Verb Phrase clause :: Extractor Tag (ChunkOr Chunk Tag) prepPhrase :: Extractor Tag (ChunkOr Chunk Tag) nounPhrase :: Extractor Tag (ChunkOr Chunk Tag) verbPhrase :: Extractor Tag (ChunkOr Chunk Tag) module NLP.Corpora.Parsing -- | Read a POS-tagged corpus out of a Text string of the form: "token/tag -- token/tag..." -- --
--   >>> readPOS "Dear/jj Sirs/nns :/: Let/vb"
--   [("Dear",JJ),("Sirs",NNS),(":",Other ":"),("Let",VB)]
--   
readPOS :: Tag t => Text -> TaggedSentence t readPOSWith :: Tag t => (Text -> t) -> Text -> TaggedSentence t -- | Returns all but the last element of a string, unless the string is -- empty, in which case it returns that string. safeInit :: Text -> Text -- | Avegeraged Perceptron Tagger -- -- Adapted from the python implementation found here: -- -- module NLP.POS.AvgPerceptronTagger -- | Create an Averaged Perceptron Tagger using the specified back-off -- tagger as a fall-back, if one is specified. -- -- This uses a tokenizer adapted from the tokenize package for a -- tokenizer, and Erik Kow's fullstop sentence segmenter -- (http://hackage.haskell.org/package/fullstop) as a sentence -- splitter. mkTagger :: Tag t => Perceptron -> Maybe (POSTagger t) -> POSTagger t -- | Train a new Perceptron. -- -- The training corpus should be a collection of sentences, one sentence -- on each line, and with each token tagged with a part of speech. -- -- For example, the input: -- --
--   "The/DT dog/NN jumped/VB ./.\nThe/DT cat/NN slept/VB ./."
--   
-- -- defines two training sentences. -- --
--   >>> tagger <- trainNew "Dear/jj Sirs/nns :/: Let/vb\nUs/nn begin/vb\n"
--   
--   >>> tag tagger $ map T.words $ T.lines "Dear sir"
--   "Dear/jj Sirs/nns :/: Let/vb"
--   
trainNew :: Tag t => (Text -> t) -> Text -> IO Perceptron -- | Train a new Perceptron on a corpus of files. trainOnFiles :: Tag t => (Text -> t) -> [FilePath] -> IO Perceptron -- | Add training examples to a perceptron. -- --
--   >>> tagger <- train emptyPerceptron "Dear/jj Sirs/nns :/: Let/vb\nUs/nn begin/vb\n"
--   
--   >>> tag tagger $ map T.words $ T.lines "Dear sir"
--   "Dear/jj Sirs/nns :/: Let/vb"
--   
-- -- If you're using multiple input files, this can be useful to improve -- performance (by folding over the files). For example, see -- trainOnFiles train :: Tag t => (Text -> t) -> Perceptron -> Text -> IO Perceptron -- | Train a model from sentences. -- -- Ported from Python: -- --
--   def train(self, sentences, save_loc=None, nr_iter=5):
--       self._make_tagdict(sentences)
--       self.model.classes = self.classes
--       prev, prev2 = START
--       for iter_ in range(nr_iter):
--           c = 0
--           n = 0
--           for words, tags in sentences:
--               context = START + [self._normalize(w) for w in words] + END
--               for i, word in enumerate(words):
--                   guess = self.tagdict.get(word)
--                   if not guess:
--                       feats = self._get_features(i, word, context, prev, prev2)
--                       guess = self.model.predict(feats)
--                       self.model.update(tags[i], guess, feats)
--                   prev2 = prev; prev = guess
--                   c += guess == tags[i]
--                   n += 1
--           random.shuffle(sentences)
--           logging.info("Iter {0}: {1}/{2}={3}".format(iter_, c, n, _pc(c, n)))
--       self.model.average_weights()
--       # Pickle as a binary file
--       if save_loc is not None:
--           pickle.dump((self.model.weights, self.tagdict, self.classes),
--                        open(save_loc, 'wb'), -1)
--       return None
--   
trainInt :: Tag t => Int -> Perceptron -> [TaggedSentence t] -> IO Perceptron -- | Tag a document (represented as a list of Sentences) with a -- trained Perceptron -- -- Ported from Python: -- --
--   def tag(self, corpus, tokenize=True):
--       '''Tags a string `corpus`.'''
--       # Assume untokenized corpus has \n between sentences and ' ' between words
--       s_split = nltk.sent_tokenize if tokenize else lambda t: t.split('\n')
--       w_split = nltk.word_tokenize if tokenize else lambda s: s.split()
--       def split_sents(corpus):
--           for s in s_split(corpus):
--               yield w_split(s)
--        prev, prev2 = self.START
--       tokens = []
--       for words in split_sents(corpus):
--           context = self.START + [self._normalize(w) for w in words] + self.END
--           for i, word in enumerate(words):
--               tag = self.tagdict.get(word)
--               if not tag:
--                   features = self._get_features(i, word, context, prev, prev2)
--                   tag = self.model.predict(features)
--               tokens.append((word, tag))
--               prev2 = prev
--               prev = tag
--       return tokens
--   
tag :: Tag t => Perceptron -> [Sentence] -> [TaggedSentence t] -- | Tag a single sentence. tagSentence :: Tag t => Perceptron -> Sentence -> TaggedSentence t -- | An empty perceptron, used to start training. emptyPerceptron :: Perceptron taggerID :: ByteString readTagger :: Tag t => ByteString -> Maybe (POSTagger t) -> Either String (POSTagger t) -- | This module aims to make tagging text with parts of speech trivially -- easy. -- -- If you're new to chatter and POS-tagging, then I suggest you -- simply try: -- --
--   >>> tagger <- defaultTagger
--   
--   >>> tagStr tagger "This is a sample sentence."
--   "This/dt is/bez a/at sample/nn sentence/nn ./."
--   
-- -- Note that we used tagStr, instead of tag, or -- tagText. Many people don't (yet!) use Data.Text by -- default, so there is a wrapper around tag that packs and -- unpacks the String. This is innefficient, but it's just to get -- you started, and tagStr can be very handy when you're debugging -- a tagger in ghci (or cabal repl). -- -- tag exposes more details of the tokenization and tagging, since -- it returns a list of TaggedSentences, but it doesn't print -- results as nicely. module NLP.POS -- | Tag a chunk of input text with part-of-speech tags, using the sentence -- splitter, tokenizer, and tagger contained in the POSTager. tag :: Tag t => POSTagger t -> Text -> [TaggedSentence t] -- | Tag the tokens in a string. -- -- Returns a space-separated string of tokens, each token suffixed with -- the part of speech. For example: -- --
--   >>> tag tagger "the dog jumped ."
--   "the/at dog/nn jumped/vbd ./."
--   
tagStr :: Tag t => POSTagger t -> String -> String -- | Text version of tagStr tagText :: Tag t => POSTagger t -> Text -> Text -- | Train a POSTagger on a corpus of sentences. -- -- This will recurse through the POSTagger stack, training all the -- backoff taggers as well. In order to do that, this function has to be -- generic to the kind of taggers used, so it is not possible to train up -- a new POSTagger from nothing: train wouldn't know what tagger -- to create. -- -- To get around that restriction, you can use the various -- mkTagger implementations, such as mkTagger or -- NLP.POS.AvgPerceptronTagger.mkTagger'. For example: -- --
--   import NLP.POS.AvgPerceptronTagger as APT
--   
--   let newTagger = APT.mkTagger APT.emptyPerceptron Nothing
--   posTgr <- train newTagger trainingExamples
--   
train :: Tag t => POSTagger t -> [TaggedSentence t] -> IO (POSTagger t) -- | Train a tagger on string input in the standard form for POS tagged -- corpora: -- --
--   trainStr tagger "the/at dog/nn jumped/vbd ./."
--   
trainStr :: Tag t => POSTagger t -> String -> IO (POSTagger t) -- | The Text version of trainStr trainText :: Tag t => POSTagger t -> Text -> IO (POSTagger t) tagTokens :: Tag t => POSTagger t -> [Sentence] -> [TaggedSentence t] -- | Evaluate a POSTager. -- -- Measures accuracy over all tags in the test corpus. -- -- Accuracy is calculated as: -- --
--   |tokens tagged correctly| / |all tokens|
--   
eval :: Tag t => POSTagger t -> [TaggedSentence t] -> Double serialize :: Tag t => POSTagger t -> ByteString deserialize :: Tag t => Map ByteString (ByteString -> Maybe (POSTagger t) -> Either String (POSTagger t)) -> ByteString -> Either String (POSTagger t) -- | The default table of tagger IDs to readTagger functions. Each tagger -- packaged with Chatter should have an entry here. By convention, the -- IDs use are the fully qualified module name of the tagger package. taggerTable :: Tag t => Map ByteString (ByteString -> Maybe (POSTagger t) -> Either String (POSTagger t)) -- | Store a POSTager to a file. saveTagger :: Tag t => POSTagger t -> FilePath -> IO () -- | Load a tagger, using the interal taggerTable. If you need to -- specify your own mappings for new composite taggers, you should use -- deserialize. -- -- This function checks the filename to determine if the content should -- be decompressed. If the file ends with ".gz", then we assume it is a -- gziped model. loadTagger :: Tag t => FilePath -> IO (POSTagger t) -- | A basic POS tagger. defaultTagger :: IO (POSTagger Tag) -- | A POS tagger that has been trained on the Conll 2000 POS tags. conllTagger :: IO (POSTagger Tag) -- | A POS tagger trained on a subset of the Brown corpus. brownTagger :: IO (POSTagger Tag) -- | NLP.Chunk aims to make phrasal chunking trivially easy -- it is the -- corolary to NLP.POS. -- -- The simplest way to try out chunking with Chatter is to open a repl -- after installing chatter and try this: -- --
--   > import NLP.POS
--   > import NLP.Chunk
--   > tgr <- defaultTagger
--   > chk <- defaultChunker
--   > chunkText tgr chk "Monads are monoids in the category of endofunctors."
--    "[NP Monads/NNS are/VBP monoids/NNS] [PP in/IN] [NP the/DT category/NN] [PP of/IN] [NP endofunctors/NNS] ./."
--   
-- -- Note that it isn't perfect--phrase chunking is tricky, and the -- defaultTagger and defaultChunker aren't trained on the -- largest training set (they use Conll 2000). You can easily train more -- taggers and chunkers using the APIs exposed here if you have the -- training data to do so. module NLP.Chunk -- | A basic Phrasal chunker. defaultChunker :: IO (Chunker Chunk Tag) -- | Convenient function to load the Conll2000 Chunker. conllChunker :: IO (Chunker Chunk Tag) -- | Train a chunker on a set of additional examples. train :: (ChunkTag c, Tag t) => Chunker c t -> [ChunkedSentence c t] -> IO (Chunker c t) -- | Chunk a TaggedSentence that has been produced by a Chatter -- tagger, producing a rich representation of the Chunks and the Tags -- detected. -- -- If you just want to see chunked output from standard text, you -- probably want chunkText or chunkStr. chunk :: (ChunkTag c, Tag t) => Chunker c t -> [TaggedSentence t] -> [ChunkedSentence c t] -- | Convenience funciton to Tokenize, POS-tag, then Chunk the provided -- text, and format the result in an easy-to-read format. -- --
--   > tgr <- defaultTagger
--   > chk <- defaultChunker
--   > chunkText tgr chk "The brown dog jumped over the lazy cat."
--   "[NP The/DT brown/NN dog/NN] [VP jumped/VBD] [NP over/IN the/DT lazy/JJ cat/NN] ./."
--   
chunkText :: (ChunkTag c, Tag t) => POSTagger t -> Chunker c t -> Text -> Text -- | A wrapper around chunkText that packs strings. chunkStr :: (ChunkTag c, Tag t) => POSTagger t -> Chunker c t -> String -> String -- | The default table of tagger IDs to readTagger functions. Each tagger -- packaged with Chatter should have an entry here. By convention, the -- IDs use are the fully qualified module name of the tagger package. chunkerTable :: (ChunkTag c, Tag t) => Map ByteString (ByteString -> Either String (Chunker c t)) -- | Store a Chunker to disk. saveChunker :: (ChunkTag c, Tag t) => Chunker c t -> FilePath -> IO () -- | Load a Chunker from disk, optionally gunzipping if needed. -- (based on file extension) loadChunker :: (ChunkTag c, Tag t) => FilePath -> IO (Chunker c t) serialize :: (ChunkTag c, Tag t) => Chunker c t -> ByteString deserialize :: (ChunkTag c, Tag t) => Map ByteString (ByteString -> Either String (Chunker c t)) -> ByteString -> Either String (Chunker c t) -- | A parser for the Wiki NER work presented in: -- -- @Article{nothman2012:artint:wikiner, author = {Joel Nothman and Nicky -- Ringland and Will Radford and Tara Murphy and James R. Curran}, title -- = {Learning multilingual named entity recognition from {Wikipedia}}, -- journal = {Artificial Intelligence}, publisher = {Elsevier}, volume = -- {194}, pages = {151--175}, year = {2012}, doi = -- {10.1016/j.artint.2012.03.006}, url = -- {http:/dx.doi.org10.1016/j.artint.2012.03.006} } -- -- And provided here: -- http://schwa.org/projects/resources/wiki/Wikiner -- -- The format does not appear to be documented, but it looks like: -- -- -- -- For example, the sentence: The Oxford Companion to Philosophy says, -- "there is no single defining position that all anarchists hold, and -- those considered anarchists at best sharae a certain family -- resemblance." -- -- Is rendered as: The|DT|I-MISC Oxford|NNP|I-MISC Companion|NNP|I-MISC -- to|TO|I-MISC Philosophy|NNP|I-MISC says|VBZ|O ,|,|O "|LQU|O there|EX|O -- is|VBZ|O no|DT|O single|JJ|O defining|VBG|O position|NN|O that|IN|O -- all|DT|O anarchists|NNS|O hold|VBP|O ,|,|O and|CC|O those|DT|O -- considered|VBN|O anarchists|NNS|O at|IN|O best|JJS|O share|NN|O a|DT|O -- certain|JJ|O family|NN|O resemblance|NN|O .|.|O "|RQU|O -- -- This module also provides a trained model for NER via the averaged -- perceptron chunker. This actually kindof works, which is a bit -- amazing. For example: -- --
--   import NLP.Corpora.WikiNer
--   import NLP.POS
--   import NLP.Chunk
--   tgr <- defaultTagger
--   chk <- wikiNerChunker
--   chunkText tgr chk "Real World Haskell is a book created by Don Stewart, Bryan O'Sullivan, and Jon Goerzen."
--   "[ORG Real/NNP] [MISC World/NNP] [PER Haskell/NNP] is/VBZ a/DT book/NN created/VBN by/IN [PER Don/NNP Stewart/NNP] ,/, [PER Bryan/NNP O'Sullivan/NNP] ,/, and/CC [PER Jon/NNP Goerzen/NNP] ./."
--   
module NLP.Corpora.WikiNer parseWikiNer :: Text -> Either Error [[IOBChunk Chunk Tag]] -- | Train a chunker on a provided corpus. trainChunker :: [FilePath] -> IO (Chunker Chunk Tag) wikiNerChunker :: IO (Chunker Chunk Tag) -- | Different classes of Named Entity used in the WikiNER data set. data Chunk LOC :: Chunk MISC :: Chunk ORG :: Chunk PER :: Chunk -- | "out" not a chunk. C_O :: Chunk instance Read Chunk instance Show Chunk instance Ord Chunk instance Eq Chunk instance Generic Chunk instance Enum Chunk instance Bounded Chunk instance Datatype D1Chunk instance Constructor C1_0Chunk instance Constructor C1_1Chunk instance Constructor C1_2Chunk instance Constructor C1_3Chunk instance Constructor C1_4Chunk instance ChunkTag Chunk instance Serialize Chunk instance Arbitrary Chunk