-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | A library of simple NLP algorithms.
--   
@package chatter
@version 0.5.2.0

module Data.DefaultMap

-- | Defaulting Map; a Map that returns a default value when queried for a
--   key that does not exist.
data DefaultMap k v
DefMap :: v -> Map k v -> DefaultMap k v
defDefault :: DefaultMap k v -> v
defMap :: DefaultMap k v -> Map k v

-- | Create an empty <a>DefaultMap</a>
empty :: v -> DefaultMap k v

-- | Query the map for a value. Returns the default if the key is not
--   found.
lookup :: Ord k => k -> DefaultMap k v -> v

-- | Create a <a>DefaultMap</a> from a default value and a list.
fromList :: Ord k => v -> [(k, v)] -> DefaultMap k v

-- | Access the keys as a list.
keys :: DefaultMap k a -> [k]

-- | Fold over the values in the map.
--   
--   Note that this *does* not fold over the default value -- this fold
--   behaves in the same way as a standard <a>foldl</a>
foldl :: (a -> b -> a) -> a -> DefaultMap k b -> a
instance (Ord k, Read k, Read v) => Read (DefaultMap k v)
instance (Show k, Show v) => Show (DefaultMap k v)
instance (Eq k, Eq v) => Eq (DefaultMap k v)
instance (Ord k, Ord v) => Ord (DefaultMap k v)
instance Generic (DefaultMap k v)
instance Datatype D1DefaultMap
instance Constructor C1_0DefaultMap
instance Selector S1_0_0DefaultMap
instance Selector S1_0_1DefaultMap
instance (Arbitrary k, Arbitrary v, Ord k) => Arbitrary (DefaultMap k v)
instance (NFData k, NFData v, Ord k) => NFData (DefaultMap k v)
instance (Ord k, Serialize k, Serialize v) => Serialize (DefaultMap k v)


-- | Utilities for reading mailman-style email archives.
module NLP.Corpora.Email

-- | Path to the directory containing all the PLUG archives.
plugDataPath :: FilePath
plugArchiveText :: IO [Text]
plugArchiveTokens :: IO [[Text]]
fullPlugArchive :: IO [Message]
readF :: FilePath -> IO Text

module NLP.Types.General

-- | Just a handy alias for Text
type Error = Text
toEitherErr :: Either String a -> Either Error a

-- | Boolean type to indicate case sensitivity for textual comparisons.
data CaseSensitive
Sensitive :: CaseSensitive
Insensitive :: CaseSensitive
instance Read CaseSensitive
instance Show CaseSensitive
instance Generic CaseSensitive
instance Datatype D1CaseSensitive
instance Constructor C1_0CaseSensitive
instance Constructor C1_1CaseSensitive
instance Arbitrary CaseSensitive
instance Serialize CaseSensitive

module NLP.Types.Tags

-- | The class of named entity sets. This typeclass can be defined entirely
--   in terms of the required class constraints.
class (Ord a, Eq a, Read a, Show a, Generic a, Serialize a) => NERTag a where fromNERTag = pack . show parseNERTag txt = toEitherErr $ readEither $ unpack txt
fromNERTag :: NERTag a => a -> Text
parseNERTag :: NERTag a => Text -> Either Error a

-- | The class of things that can be regarded as <tt>chunks</tt>; Chunk
--   tags are much like POS tags, but should not be confused. Generally,
--   chunks distinguish between different phrasal categories (e.g.; Noun
--   Phrases, Verb Phrases, Prepositional Phrases, etc..)
class (Ord a, Eq a, Read a, Show a, Generic a, Serialize a) => ChunkTag a
fromChunk :: ChunkTag a => a -> Text
parseChunk :: ChunkTag a => Text -> Either Error a
notChunk :: ChunkTag a => a

-- | The class of POS Tags.
--   
--   We use a typeclass here because POS tags just need a few things in
--   excess of equality (they also need to be serializable and human
--   readable). Passing around all the constraints everywhere becomes a
--   hassle, and it's handy to have a uniform interface to the diferent
--   kinds of tag types.
--   
--   This typeclass also allows for corpus-specific tags to be
--   distinguished; They have different semantics, so they should not be
--   merged. That said, if you wish to create a unifying POS Tag set, and
--   mappings into that set, you can use the type system to ensure that
--   that is done correctly.
--   
--   This <i>may</i> get renamed to POSTag at some later date.
class (Ord a, Eq a, Read a, Show a, Generic a, Serialize a) => Tag a
fromTag :: Tag a => a -> Text
parseTag :: Tag a => Text -> a
tagUNK :: Tag a => a
tagTerm :: Tag a => a -> Text
startTag :: Tag a => a
endTag :: Tag a => a
isDt :: Tag a => a -> Bool

-- | A fall-back <a>ChunkTag</a> instance, analogous to <a>RawTag</a>
newtype RawChunk
RawChunk :: Text -> RawChunk

-- | A fallback POS tag instance.
newtype RawTag
RawTag :: Text -> RawTag

-- | Tag instance for unknown tagsets.
instance Ord RawChunk
instance Eq RawChunk
instance Read RawChunk
instance Show RawChunk
instance Generic RawChunk
instance Ord RawTag
instance Eq RawTag
instance Read RawTag
instance Show RawTag
instance Generic RawTag
instance Datatype D1RawChunk
instance Constructor C1_0RawChunk
instance Datatype D1RawTag
instance Constructor C1_0RawTag
instance Serialize Text
instance Arbitrary RawTag
instance Tag RawTag
instance Serialize RawTag
instance ChunkTag RawChunk
instance Serialize RawChunk

module NLP.Types.Tree

-- | A sentence of tokens without tags. Generated by the tokenizer.
--   (tokenizer :: Text -&gt; Sentence)
data Sentence
Sent :: [Token] -> Sentence

-- | Extract the token list from a <a>Sentence</a>
tokens :: Sentence -> [Token]

-- | Apply a parallel list of <a>Tag</a>s to a <a>Sentence</a>.
applyTags :: Tag t => Sentence -> [t] -> TaggedSentence t

-- | A chunked sentence has POS tags and chunk tags. Generated by a
--   chunker.
--   
--   (chunker :: (Chunk chunk, Tag tag) =&gt; TaggedSentence tag -&gt;
--   ChunkedSentence chunk tag)
data ChunkedSentence chunk tag
ChunkedSent :: [ChunkOr chunk tag] -> ChunkedSentence chunk tag

-- | A data type to represent the portions of a parse tree for Chunks. Note
--   that this part of the parse tree could be a POS tag with no chunk.
data ChunkOr chunk tag
Chunk_CN :: (Chunk chunk tag) -> ChunkOr chunk tag
POS_CN :: (POS tag) -> ChunkOr chunk tag

-- | A Chunk that strictly contains chunks or POS tags.
data Chunk chunk tag
Chunk :: chunk -> [ChunkOr chunk tag] -> Chunk chunk tag
showChunkedSent :: (ChunkTag c, Tag t) => ChunkedSentence c t -> Text

-- | A tagged sentence has POS Tags. Generated by a part-of-speech tagger.
--   (tagger :: Tag tag =&gt; Sentence -&gt; TaggedSentence tag)
data TaggedSentence tag
TaggedSent :: [POS tag] -> TaggedSentence tag

-- | Generate a Text representation of a TaggedSentence in the common
--   tagged format, eg:
--   
--   <pre>
--   "the/at dog/nn jumped/vbd ./."
--   </pre>
printTS :: Tag t => TaggedSentence t -> Text

-- | Remove the tags from a tagged sentence
stripTags :: Tag t => TaggedSentence t -> Sentence

-- | Extract the tags from a tagged sentence, returning a parallel list of
--   tags along with the underlying Sentence.
unzipTags :: Tag t => TaggedSentence t -> (Sentence, [t])
unzipChunks :: (ChunkTag c, Tag t) => ChunkedSentence c t -> (TaggedSentence t, [c])

-- | Combine the results of POS taggers, using the second param to fill in
--   <a>tagUNK</a> entries, where possible.
combine :: Tag t => [TaggedSentence t] -> [TaggedSentence t] -> [TaggedSentence t]

-- | Merge <a>TaggedSentence</a> values, preffering the tags in the first
--   <a>TaggedSentence</a>. Delegates to <a>pickTag</a>.
combineSentences :: Tag t => TaggedSentence t -> TaggedSentence t -> TaggedSentence t

-- | Returns the first param, unless it is tagged <a>tagUNK</a>. Throws an
--   error if the text does not match.
pickTag :: Tag t => POS t -> POS t -> POS t

-- | Helper to create <a>ChunkOr</a> types.
mkChunk :: (ChunkTag chunk, Tag tag) => chunk -> [ChunkOr chunk tag] -> ChunkOr chunk tag

-- | Helper to create <a>ChunkOr</a> types that just hold POS tagged data.
mkChink :: (ChunkTag chunk, Tag tag) => tag -> Token -> ChunkOr chunk tag

-- | A POS-tagged token.
data POS tag
POS :: tag -> Token -> POS tag
posTag :: POS tag -> tag
posToken :: POS tag -> Token

-- | Show the underlying text token only.
showPOStok :: Tag tag => POS tag -> Text
showPOStag :: Tag tag => POS tag -> Text

-- | Show the text and tag.
printPOS :: Tag tag => POS tag -> Text

-- | Raw tokenized text.
--   
--   <a>Token</a> has a <a>IsString</a> instance to simplify use.
data Token
Token :: Text -> Token

-- | Extract the text of a <a>Token</a>
showTok :: Token -> Text

-- | Extract the last three characters of a <a>Token</a>, if the token is
--   long enough, otherwise returns the full token text.
suffix :: Token -> Text

-- | Extract the list of <a>POS</a> tags from a <a>TaggedSentence</a>
unTS :: Tag t => TaggedSentence t -> [POS t]

-- | Calculate the length of a <a>TaggedSentence</a> (in terms of the
--   number of tokens).
tsLength :: Tag t => TaggedSentence t -> Int

-- | Brutally concatenate two <a>TaggedSentence</a>s
tsConcat :: Tag t => [TaggedSentence t] -> TaggedSentence t

-- | True if the input sentence contains the given text token. Does not do
--   partial or approximate matching, and compares details in a fully
--   case-sensitive manner.
contains :: Tag t => TaggedSentence t -> Text -> Bool

-- | True if the input sentence contains the given POS tag. Does not do
--   partial matching (such as prefix matching)
containsTag :: Tag t => TaggedSentence t -> t -> Bool

-- | Compare the POS-tag token with a supplied tag string.
posTagMatches :: Tag t => t -> POS t -> Bool

-- | Compare the POS-tagged token with a text string.
posTokMatches :: Tag t => Text -> POS t -> Bool

-- | Compare a token with a text string.
tokenMatches :: Text -> Token -> Bool
instance Read Token
instance Show Token
instance Eq Token
instance Read tag => Read (POS tag)
instance Show tag => Show (POS tag)
instance Eq tag => Eq (POS tag)
instance Read tag => Read (TaggedSentence tag)
instance Show tag => Show (TaggedSentence tag)
instance Eq tag => Eq (TaggedSentence tag)
instance (Read chunk, Read tag) => Read (ChunkOr chunk tag)
instance (Show chunk, Show tag) => Show (ChunkOr chunk tag)
instance (Eq chunk, Eq tag) => Eq (ChunkOr chunk tag)
instance (Read chunk, Read tag) => Read (Chunk chunk tag)
instance (Show chunk, Show tag) => Show (Chunk chunk tag)
instance (Eq chunk, Eq tag) => Eq (Chunk chunk tag)
instance (Read chunk, Read tag) => Read (ChunkedSentence chunk tag)
instance (Show chunk, Show tag) => Show (ChunkedSentence chunk tag)
instance (Eq chunk, Eq tag) => Eq (ChunkedSentence chunk tag)
instance Read Sentence
instance Show Sentence
instance Eq Sentence
instance IsString Token
instance Arbitrary Token
instance (Arbitrary t, Tag t) => Arbitrary (POS t)
instance (ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (Chunk c t)
instance (ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (ChunkOr c t)
instance (Arbitrary t, Tag t) => Arbitrary (TaggedSentence t)
instance (ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (ChunkedSentence c t)
instance Arbitrary Sentence

module NLP.Types.IOB

-- | Data type to indicate IOB tags for chunking
data IOBChunk chunk tag

-- | Beging marker.
BChunk :: (POS tag) -> chunk -> IOBChunk chunk tag

-- | In chunk tag
IChunk :: (POS tag) -> chunk -> IOBChunk chunk tag

-- | Not in a chunk.
OChunk :: (POS tag) -> IOBChunk chunk tag
getPOS :: (ChunkTag c, Tag t) => IOBChunk c t -> POS t
toTaggedSentence :: (ChunkTag c, Tag t) => [IOBChunk c t] -> TaggedSentence t

-- | Parse an IOB-chunk encoded line of text.
--   
--   Assumes that the line has three space-delimeted entries, in the
--   format: &gt; token POSTag IOBChunk For example: &gt; &gt; parseIOBLine
--   "We PRP B-NP" :: IOBChunk B.Chunk B.Tag &gt; BChunk (POS B.PRP (Token
--   <a>We</a>)) B.C_NP
parseIOBLine :: (ChunkTag chunk, Tag tag) => Text -> Either Error (IOBChunk chunk tag)
iobBuilder :: (ChunkTag c, Tag t) => Text -> (POS t -> Either Error (IOBChunk c t))

-- | Turn an IOB result into a tree.
toChunkTree :: (ChunkTag c, Tag t) => [IOBChunk c t] -> ChunkedSentence c t

-- | Parse an IOB-encoded corpus.
parseIOB :: (ChunkTag chunk, Tag tag) => Text -> Either Error [[IOBChunk chunk tag]]
parseSentence :: (ChunkTag chunk, Tag tag) => [Text] -> Either Error [IOBChunk chunk tag]

-- | Just split a body of text into lines, and then into "paragraphs". Each
--   resulting sub list is separated by empty lines in the original text.
--   
--   e.g.; &gt; &gt; getSentences "Henjumpedn.nnShenjumpedn." &gt;
--   [[<a>He</a>, "jumped", "."], [<a>She</a>,"jumped", "."]]
getSentences :: Text -> [[Text]]
instance (Read chunk, Read tag) => Read (IOBChunk chunk tag)
instance (Show chunk, Show tag) => Show (IOBChunk chunk tag)
instance (Eq chunk, Eq tag) => Eq (IOBChunk chunk tag)
instance (ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (IOBChunk c t)

module NLP.Tokenize.Chatter
runTokenizer :: Tokenizer -> (Text -> Sentence)
tokenize :: Text -> Sentence


-- | Data types representing the POS tags and Chunk tags derived from the
--   Conll2000 training corpus.
module NLP.Corpora.Conll

-- | Named entity categories defined for the Conll 2003 task.
data NERTag
PER :: NERTag
ORG :: NERTag
LOC :: NERTag
MISC :: NERTag

-- | Phrase chunk tags defined for the Conll task.
data Chunk
ADJP :: Chunk
ADVP :: Chunk
CONJP :: Chunk
INTJ :: Chunk
LST :: Chunk

-- | Noun Phrase.
NP :: Chunk

-- | Prepositional Phrase.
PP :: Chunk
PRT :: Chunk
SBAR :: Chunk
UCP :: Chunk

-- | Verb Phrase.
VP :: Chunk

-- | "out"; not a chunk.
O :: Chunk
readTag :: Text -> Either Error Tag

-- | Order matters here: The patterns are replaced in reverse order when
--   generating tags, and in top-to-bottom when generating tags.
tagTxtPatterns :: [(Text, Text)]
reversePatterns :: [(Text, Text)]
showTag :: Tag -> Text
replaceAll :: [(Text, Text)] -> (Text -> Text)

-- | These tags may actually be the Penn Treebank tags. But I have not
--   (yet?) seen the punctuation tags added to the Penn set.
--   
--   This particular list was complied from the union of:
--   
--   <ul>
--   <li>All tags used on the Conll2000 training corpus. (contributing the
--   punctuation tags)</li>
--   <li>The PennTreebank tags, listed here:
--   <a>https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html</a>
--   (which contributed LS over the items in the corpus).</li>
--   <li>The tags: START, END, and Unk, which are used by Chatter.</li>
--   </ul>
data Tag

-- | START tag, used in training.
START :: Tag

-- | END tag, used in training.
END :: Tag

-- | #
Hash :: Tag

-- | $
Dollar :: Tag

-- | ''
CloseDQuote :: Tag

-- | ``
OpenDQuote :: Tag

-- | (
Op_Paren :: Tag

-- | )
Cl_Paren :: Tag

-- | ,
Comma :: Tag

-- | . Sentence Terminator
Term :: Tag

-- | :
Colon :: Tag

-- | Coordinating conjunction
CC :: Tag

-- | Cardinal number
CD :: Tag

-- | Determiner
DT :: Tag

-- | Existential there
EX :: Tag

-- | Foreign word
FW :: Tag

-- | Preposition or subordinating conjunction
IN :: Tag

-- | Adjective
JJ :: Tag

-- | Adjective, comparative
JJR :: Tag

-- | Adjective, superlative
JJS :: Tag

-- | List item marker
LS :: Tag

-- | Modal
MD :: Tag

-- | Noun, singular or mass
NN :: Tag

-- | Noun, plural
NNS :: Tag

-- | Proper noun, singular
NNP :: Tag

-- | Proper noun, plural
NNPS :: Tag

-- | Predeterminer
PDT :: Tag

-- | Possessive ending
POS :: Tag

-- | Personal pronoun
PRP :: Tag

-- | Possessive pronoun
PRPdollar :: Tag

-- | Adverb
RB :: Tag

-- | Adverb, comparative
RBR :: Tag

-- | Adverb, superlative
RBS :: Tag

-- | Particle
RP :: Tag

-- | Symbol
SYM :: Tag

-- | to
TO :: Tag

-- | Interjection
UH :: Tag

-- | Verb, base form
VB :: Tag

-- | Verb, past tense
VBD :: Tag

-- | Verb, gerund or present participle
VBG :: Tag

-- | Verb, past participle
VBN :: Tag

-- | Verb, non-3rd person singular present
VBP :: Tag

-- | Verb, 3rd person singular present
VBZ :: Tag

-- | Wh-determiner
WDT :: Tag

-- | Wh-pronoun
WP :: Tag

-- | Possessive wh-pronoun
WPdollar :: Tag

-- | Wh-adverb
WRB :: Tag
Unk :: Tag
instance Read NERTag
instance Show NERTag
instance Ord NERTag
instance Eq NERTag
instance Generic NERTag
instance Enum NERTag
instance Bounded NERTag
instance Read Chunk
instance Show Chunk
instance Ord Chunk
instance Eq Chunk
instance Generic Chunk
instance Enum Chunk
instance Bounded Chunk
instance Read Tag
instance Show Tag
instance Ord Tag
instance Eq Tag
instance Generic Tag
instance Enum Tag
instance Bounded Tag
instance Datatype D1NERTag
instance Constructor C1_0NERTag
instance Constructor C1_1NERTag
instance Constructor C1_2NERTag
instance Constructor C1_3NERTag
instance Datatype D1Chunk
instance Constructor C1_0Chunk
instance Constructor C1_1Chunk
instance Constructor C1_2Chunk
instance Constructor C1_3Chunk
instance Constructor C1_4Chunk
instance Constructor C1_5Chunk
instance Constructor C1_6Chunk
instance Constructor C1_7Chunk
instance Constructor C1_8Chunk
instance Constructor C1_9Chunk
instance Constructor C1_10Chunk
instance Constructor C1_11Chunk
instance Datatype D1Tag
instance Constructor C1_0Tag
instance Constructor C1_1Tag
instance Constructor C1_2Tag
instance Constructor C1_3Tag
instance Constructor C1_4Tag
instance Constructor C1_5Tag
instance Constructor C1_6Tag
instance Constructor C1_7Tag
instance Constructor C1_8Tag
instance Constructor C1_9Tag
instance Constructor C1_10Tag
instance Constructor C1_11Tag
instance Constructor C1_12Tag
instance Constructor C1_13Tag
instance Constructor C1_14Tag
instance Constructor C1_15Tag
instance Constructor C1_16Tag
instance Constructor C1_17Tag
instance Constructor C1_18Tag
instance Constructor C1_19Tag
instance Constructor C1_20Tag
instance Constructor C1_21Tag
instance Constructor C1_22Tag
instance Constructor C1_23Tag
instance Constructor C1_24Tag
instance Constructor C1_25Tag
instance Constructor C1_26Tag
instance Constructor C1_27Tag
instance Constructor C1_28Tag
instance Constructor C1_29Tag
instance Constructor C1_30Tag
instance Constructor C1_31Tag
instance Constructor C1_32Tag
instance Constructor C1_33Tag
instance Constructor C1_34Tag
instance Constructor C1_35Tag
instance Constructor C1_36Tag
instance Constructor C1_37Tag
instance Constructor C1_38Tag
instance Constructor C1_39Tag
instance Constructor C1_40Tag
instance Constructor C1_41Tag
instance Constructor C1_42Tag
instance Constructor C1_43Tag
instance Constructor C1_44Tag
instance Constructor C1_45Tag
instance Constructor C1_46Tag
instance Constructor C1_47Tag
instance ChunkTag Chunk
instance Serialize Tag
instance Arbitrary Tag
instance Tag Tag
instance Serialize Chunk
instance Arbitrary Chunk
instance NERTag NERTag
instance Serialize NERTag
instance Arbitrary NERTag


-- | The internal implementation of critical types in terms of the Brown
--   corpus.
module NLP.Corpora.Brown
data Tag

-- | START tag, used in training.
START :: Tag

-- | END tag, used in training.
END :: Tag

-- | (
Op_Paren :: Tag

-- | )
Cl_Paren :: Tag

-- | <ul>
--   <li>, not n't</li>
--   </ul>
Negator :: Tag

-- | ,
Comma :: Tag

-- | <ul>
--   <li></li>
--   </ul>
Dash :: Tag

-- | . Sentence Terminator
Term :: Tag

-- | :
Colon :: Tag

-- | determiner/pronoun, pre-qualifier e.g.; quite such rather
ABL :: Tag

-- | determiner/pronoun, pre-quantifier e.g.; all half many nary
ABN :: Tag

-- | determiner/pronoun, double conjunction or pre-quantifier both
ABX :: Tag

-- | determiner/pronoun, post-determiner many other next more last former
--   little several enough most least only very few fewer past same Last
--   latter less single plenty 'nough lesser certain various manye
--   next-to-last particular final previous present nuf
AP :: Tag

-- | determiner/pronoun, post-determiner, genitive e.g.; other's
APdollar :: Tag

-- | determiner/pronoun, post-determiner, hyphenated pair e.g.; many-much
AP_pl_AP :: Tag

-- | article e.g.; the an no a every th' ever' ye
AT :: Tag

-- | verb "to be", infinitive or imperative e.g.; be
BE :: Tag

-- | verb "to be", past tense, 2nd person singular or all persons plural
--   e.g.; were
BED :: Tag

-- | verb "to be", past tense, 2nd person singular or all persons plural,
--   negated e.g.; weren't
BEDstar :: Tag

-- | verb "to be", past tense, 1st and 3rd person singular e.g.; was
BEDZ :: Tag

-- | verb "to be", past tense, 1st and 3rd person singular, negated e.g.;
--   wasn't
BEDZstar :: Tag

-- | verb "to be", present participle or gerund e.g.; being
BEG :: Tag

-- | verb "to be", present tense, 1st person singular e.g.; am
BEM :: Tag

-- | verb "to be", present tense, 1st person singular, negated e.g.; ain't
BEMstar :: Tag

-- | verb "to be", past participle e.g.; been
BEN :: Tag

-- | verb "to be", present tense, 2nd person singular or all persons plural
--   e.g.; are art
BER :: Tag

-- | verb "to be", present tense, 2nd person singular or all persons
--   plural, negated e.g.; aren't ain't
BERstar :: Tag

-- | verb "to be", present tense, 3rd person singular e.g.; is
BEZ :: Tag

-- | verb "to be", present tense, 3rd person singular, negated e.g.; isn't
--   ain't
BEZstar :: Tag

-- | conjunction, coordinating e.g.; and or but plus &amp; either neither
--   nor yet <tt>n</tt> and/or minus an'
CC :: Tag

-- | numeral, cardinal e.g.; two one 1 four 2 1913 71 74 637 1937 8 five
--   three million 87-31 29-5 seven 1,119 fifty-three 7.5 billion hundred
--   125,000 1,700 60 100 six ...
CD :: Tag

-- | numeral, cardinal, genitive e.g.; 1960's 1961's .404's
CDdollar :: Tag

-- | conjunction, subordinating e.g.; that as after whether before while
--   like because if since for than altho until so unless though providing
--   once lest s<tt>posin</tt> till whereas whereupon supposing tho' albeit
--   then so's 'fore
CS :: Tag

-- | verb "to do", uninflected present tense, infinitive or imperative
--   e.g.; do dost
DO :: Tag

-- | verb "to do", uninflected present tense or imperative, negated e.g.;
--   don't
DOstar :: Tag

-- | verb "to do", past or present tense + pronoun, personal, nominative,
--   not 3rd person singular e.g.; d'you
DO_pl_PPSS :: Tag

-- | verb "to do", past tense e.g.; did done
DOD :: Tag

-- | verb "to do", past tense, negated e.g.; didn't
DODstar :: Tag

-- | verb "to do", present tense, 3rd person singular e.g.; does
DOZ :: Tag

-- | verb "to do", present tense, 3rd person singular, negated e.g.;
--   doesn't don't
DOZstar :: Tag

-- | determiner/pronoun, singular e.g.; this each another that 'nother
DT :: Tag

-- | determiner/pronoun, singular, genitive e.g.; another's
DTdollar :: Tag

-- | determiner/pronoun + verb "to be", present tense, 3rd person singular
--   e.g.; that's
DT_pl_BEZ :: Tag

-- | determiner/pronoun + modal auxillary e.g.; that'll this'll
DT_pl_MD :: Tag

-- | determiner/pronoun, singular or plural e.g.; any some
DTI :: Tag

-- | determiner/pronoun, plural e.g.; these those them
DTS :: Tag

-- | pronoun, plural + verb "to be", present tense, 3rd person singular
--   e.g.; them's
DTS_pl_BEZ :: Tag

-- | determiner, pronoun or double conjunction e.g.; neither either one
DTX :: Tag

-- | existential there e.g.; there
EX :: Tag

-- | existential there + verb "to be", present tense, 3rd person singular
--   e.g.; there's
EX_pl_BEZ :: Tag

-- | existential there + verb "to have", past tense e.g.; there'd
EX_pl_HVD :: Tag

-- | existential there + verb "to have", present tense, 3rd person singular
--   e.g.; there's
EX_pl_HVZ :: Tag

-- | existential there + modal auxillary e.g.; there'll there'd
EX_pl_MD :: Tag

-- | foreign word: negator e.g.; pas non ne
FW_star :: Tag

-- | foreign word: article e.g.; la le el un die der ein keine eine das las
--   les Il
FW_AT :: Tag

-- | foreign word: article + noun, singular, common e.g.; l'orchestre
--   l'identite l'arcade l'ange l'assistance l'activite L'Universite
--   l'independance L'Union L'Unita l'osservatore
FW_AT_pl_NN :: Tag

-- | foreign word: article + noun, singular, proper e.g.; L'Astree
--   L'Imperiale
FW_AT_pl_NP :: Tag

-- | foreign word: verb "to be", infinitive or imperative e.g.; sit
FW_BE :: Tag

-- | foreign word: verb "to be", present tense, 2nd person singular or all
--   persons plural e.g.; sind sunt etes
FW_BER :: Tag

-- | foreign word: verb "to be", present tense, 3rd person singular e.g.;
--   ist est
FW_BEZ :: Tag

-- | foreign word: conjunction, coordinating e.g.; et ma mais und aber och
--   nec y
FW_CC :: Tag

-- | foreign word: numeral, cardinal e.g.; une cinq deux sieben unam zwei
FW_CD :: Tag

-- | foreign word: conjunction, subordinating e.g.; bevor quam ma
FW_CS :: Tag

-- | foreign word: determiner/pronoun, singular e.g.; hoc
FW_DT :: Tag

-- | foreign word: determiner + verb "to be", present tense, 3rd person
--   singular e.g.; c'est
FW_DT_pl_BEZ :: Tag

-- | foreign word: determiner/pronoun, plural e.g.; haec
FW_DTS :: Tag

-- | foreign word: verb "to have", present tense, not 3rd person singular
--   e.g.; habe
FW_HV :: Tag

-- | foreign word: preposition e.g.; ad de en a par con dans ex von auf
--   super post sine sur sub avec per inter sans pour pendant in di
FW_IN :: Tag

-- | foreign word: preposition + article e.g.; della des du aux zur d'un
--   del dell'
FW_IN_pl_AT :: Tag

-- | foreign word: preposition + noun, singular, common e.g.; d'etat
--   d'hotel d'argent d'identite d'art
FW_IN_pl_NN :: Tag

-- | foreign word: preposition + noun, singular, proper e.g.; d'Yquem
--   d'Eiffel
FW_IN_pl_NP :: Tag

-- | foreign word: adjective e.g.; avant Espagnol sinfonica Siciliana
--   Philharmonique grand publique haute noire bouffe Douce meme humaine
--   bel serieuses royaux anticus presto Sovietskaya Bayerische comique
--   schwarzen ...
FW_JJ :: Tag

-- | foreign word: adjective, comparative e.g.; fortiori
FW_JJR :: Tag

-- | foreign word: adjective, superlative e.g.; optimo
FW_JJT :: Tag

-- | foreign word: noun, singular, common e.g.; ballet esprit ersatz mano
--   chatte goutte sang Fledermaus oud def kolkhoz roi troika canto boite
--   blutwurst carne muzyka bonheur monde piece force ...
FW_NN :: Tag

-- | foreign word: noun, singular, common, genitive e.g.; corporis
--   intellectus arte's dei aeternitatis senioritatis curiae patronne's
--   chambre's
FW_NNdollar :: Tag

-- | foreign word: noun, plural, common e.g.; al culpas vopos boites haflis
--   kolkhozes augen tyrannis alpha-beta-gammas metis banditos rata phis
--   negociants crus Einsatzkommandos kamikaze wohaws sabinas zorrillas
--   palazzi engages coureurs corroborees yori Ubermenschen ...
FW_NNS :: Tag

-- | foreign word: noun, singular, proper e.g.; Karshilama Dieu Rundfunk
--   Afrique Espanol Afrika Spagna Gott Carthago deus
FW_NP :: Tag

-- | foreign word: noun, plural, proper e.g.; Svenskarna Atlantes Dieux
FW_NPS :: Tag

-- | foreign word: noun, singular, adverbial e.g.; heute morgen aujourd'hui
--   hoy
FW_NR :: Tag

-- | foreign word: numeral, ordinal e.g.; 18e 17e quintus
FW_OD :: Tag

-- | foreign word: pronoun, nominal e.g.; hoc
FW_PN :: Tag

-- | foreign word: determiner, possessive e.g.; mea mon deras vos
FW_PPdollar :: Tag

-- | foreign word: pronoun, singular, reflexive e.g.; se
FW_PPL :: Tag

-- | foreign word: pronoun, singular, reflexive + verb, present tense, 3rd
--   person singular e.g.; s'excuse s'accuse
FW_PPL_pl_VBZ :: Tag

-- | pronoun, personal, accusative e.g.; lui me moi mi
FW_PPO :: Tag

-- | foreign word: pronoun, personal, accusative + preposition e.g.; mecum
--   tecum
FW_PPO_pl_IN :: Tag

-- | foreign word: pronoun, personal, nominative, 3rd person singular e.g.;
--   il
FW_PPS :: Tag

-- | foreign word: pronoun, personal, nominative, not 3rd person singular
--   e.g.; ich vous sie je
FW_PPSS :: Tag

-- | foreign word: pronoun, personal, nominative, not 3rd person singular +
--   verb "to have", present tense, not 3rd person singular e.g.; j'ai
FW_PPSS_pl_HV :: Tag

-- | foreign word: qualifier e.g.; minus
FW_QL :: Tag

-- | foreign word: adverb e.g.; bas assai deja um wiederum cito velociter
--   vielleicht simpliciter non zu domi nuper sic forsan olim oui semper
--   tout despues hors
FW_RB :: Tag

-- | foreign word: adverb + conjunction, coordinating e.g.; forisque
FW_RB_pl_CC :: Tag

-- | foreign word: infinitival to + verb, infinitive e.g.; d'entretenir
FW_TO_pl_VB :: Tag

-- | foreign word: interjection e.g.; sayonara bien adieu arigato bonjour
--   adios bueno tchalo ciao o
FW_UH :: Tag

-- | foreign word: verb, present tense, not 3rd person singular, imperative
--   or infinitive e.g.; nolo contendere vive fermate faciunt esse vade
--   noli tangere dites duces meminisse iuvabit gosaimasu voulez habla
--   ksu<tt>u'peli</tt>afo lacheln miuchi say allons strafe portant
FW_VB :: Tag

-- | foreign word: verb, past tense e.g.; stabat peccavi audivi
FW_VBD :: Tag

-- | foreign word: verb, present participle or gerund e.g.; nolens volens
--   appellant seq. obliterans servanda dicendi delenda
FW_VBG :: Tag

-- | foreign word: verb, past participle e.g.; vue verstrichen rasa
--   verboten engages
FW_VBN :: Tag

-- | foreign word: verb, present tense, 3rd person singular e.g.; gouverne
--   sinkt sigue diapiace
FW_VBZ :: Tag

-- | foreign word: WH-determiner e.g.; quo qua quod que quok
FW_WDT :: Tag

-- | foreign word: WH-pronoun, accusative e.g.; quibusdam
FW_WPO :: Tag

-- | foreign word: WH-pronoun, nominative e.g.; qui
FW_WPS :: Tag

-- | verb "to have", uninflected present tense, infinitive or imperative
--   e.g.; have hast
HV :: Tag

-- | verb "to have", uninflected present tense or imperative, negated e.g.;
--   haven't ain't
HVstar :: Tag

-- | verb "to have", uninflected present tense + infinitival to e.g.; hafta
HV_pl_TO :: Tag

-- | verb "to have", past tense e.g.; had
HVD :: Tag

-- | verb "to have", past tense, negated e.g.; hadn't
HVDstar :: Tag

-- | verb "to have", present participle or gerund e.g.; having
HVG :: Tag

-- | verb "to have", past participle e.g.; had
HVN :: Tag

-- | verb "to have", present tense, 3rd person singular e.g.; has hath
HVZ :: Tag

-- | verb "to have", present tense, 3rd person singular, negated e.g.;
--   hasn't ain't
HVZstar :: Tag

-- | preposition e.g.; of in for by considering to on among at through with
--   under into regarding than since despite according per before toward
--   against as after during including between without except upon out over
--   ...
IN :: Tag

-- | preposition, hyphenated pair e.g.; f'ovuh
IN_pl_IN :: Tag

-- | preposition + pronoun, personal, accusative e.g.; t'hi-im
IN_pl_PPO :: Tag

-- | adjective e.g.; recent over-all possible hard-fought favorable hard
--   meager fit such widespread outmoded inadequate ambiguous grand
--   clerical effective orderly federal foster general proportionate ...
JJ :: Tag

-- | adjective, genitive e.g.; Great's
JJdollar :: Tag

-- | adjective, hyphenated pair e.g.; big-large long-far
JJ_pl_JJ :: Tag

-- | adjective, comparative e.g.; greater older further earlier later freer
--   franker wider better deeper firmer tougher faster higher bigger worse
--   younger lighter nicer slower happier frothier Greater newer Elder ...
JJR :: Tag

-- | adjective + conjunction, coordinating e.g.; lighter'n
JJR_pl_CS :: Tag

-- | adjective, semantically superlative e.g.; top chief principal
--   northernmost master key head main tops utmost innermost foremost
--   uppermost paramount topmost
JJS :: Tag

-- | adjective, superlative e.g.; best largest coolest calmest latest
--   greatest earliest simplest strongest newest fiercest unhappiest worst
--   youngest worthiest fastest hottest fittest lowest finest smallest
--   staunchest ...
JJT :: Tag

-- | modal auxillary e.g.; should may might will would must can could shall
--   ought need wilt
MD :: Tag

-- | modal auxillary, negated e.g.; cannot couldn't wouldn't can't won't
--   shouldn't shan't mustn't musn't
MDstar :: Tag

-- | modal auxillary + verb "to have", uninflected form e.g.; shouldda
--   musta coulda must've woulda could've
MD_pl_HV :: Tag

-- | modal auxillary + pronoun, personal, nominative, not 3rd person
--   singular e.g.; willya
MD_pl_PPSS :: Tag

-- | modal auxillary + infinitival to e.g.; oughta
MD_pl_TO :: Tag

-- | noun, singular, common e.g.; failure burden court fire appointment
--   awarding compensation Mayor interim committee fact effect airport
--   management surveillance jail doctor intern extern night weekend duty
--   legislation Tax Office ...
NN :: Tag

-- | noun, singular, common, genitive e.g.; season's world's player's
--   night's chapter's golf's football's baseball's club's U.'s coach's
--   bride's bridegroom's board's county's firm's company's
--   superintendent's mob's Navy's ...
NNdollar :: Tag

-- | noun, singular, common + verb "to be", present tense, 3rd person
--   singular e.g.; water's camera's sky's kid's Pa's heat's throat's
--   father's money's undersecretary's granite's level's wife's fat's
--   Knife's fire's name's hell's leg's sun's roulette's cane's guy's
--   kind's baseball's ...
NN_pl_BEZ :: Tag

-- | noun, singular, common + verb "to have", past tense e.g.; Pa'd
NN_pl_HVD :: Tag

-- | noun, singular, common + verb "to have", present tense, 3rd person
--   singular e.g.; guy's Knife's boat's summer's rain's company's
NN_pl_HVZ :: Tag

-- | noun, singular, common + preposition e.g.; buncha
NN_pl_IN :: Tag

-- | noun, singular, common + modal auxillary e.g.; cowhand'd sun'll
NN_pl_MD :: Tag

-- | noun, singular, common, hyphenated pair e.g.; stomach-belly
NN_pl_NN :: Tag

-- | noun, plural, common e.g.; irregularities presentments thanks reports
--   voters laws legislators years areas adjustments chambers $100 bonds
--   courts sales details raises sessions members congressmen votes polls
--   calls ...
NNS :: Tag

-- | noun, plural, common, genitive e.g.; taxpayers' children's members'
--   States' women's cutters' motorists' steelmakers' hours' Nations'
--   lawyers' prisoners' architects' tourists' Employers' secretaries'
--   Rogues' ...
NNSdollar :: Tag

-- | noun, plural, common + modal auxillary e.g.; duds'd oystchers'll
NNS_pl_MD :: Tag

-- | noun, singular, proper e.g.; Fulton Atlanta September-October Durwood
--   Pye Ivan Allen Jr. Jan. Alpharetta Grady William B. Hartsfield Pearl
--   Williams Aug. Berry J. M. Cheshire Griffin Opelika Ala. E. Pelham
--   Snodgrass ...
NP :: Tag

-- | noun, singular, proper, genitive e.g.; Green's Landis' Smith's
--   Carreon's Allison's Boston's Spahn's Willie's Mickey's Milwaukee's
--   Mays' Howsam's Mantle's Shaw's Wagner's Rickey's Shea's Palmer's
--   Arnold's Broglio's ...
NPdollar :: Tag

-- | noun, singular, proper + verb "to be", present tense, 3rd person
--   singular e.g.; W.'s Ike's Mack's Jack's Kate's Katharine's Black's
--   Arthur's Seaton's Buckhorn's Breed's Penny's Rob's Kitty's Blackwell's
--   Myra's Wally's Lucille's Springfield's Arlene's
NP_pl_BEZ :: Tag

-- | noun, singular, proper + verb "to have", present tense, 3rd person
--   singular e.g.; Bill's Guardino's Celie's Skolman's Crosson's Tim's
--   Wally's
NP_pl_HVZ :: Tag

-- | noun, singular, proper + modal auxillary e.g.; Gyp'll John'll
NP_pl_MD :: Tag

-- | noun, plural, proper e.g.; Chases Aderholds Chapelles Armisteads
--   Lockies Carbones French Marskmen Toppers Franciscans Romans Cadillacs
--   Masons Blacks Catholics British Dixiecrats Mississippians Congresses
--   ...
NPS :: Tag

-- | noun, plural, proper, genitive e.g.; Republicans' Orioles' Birds'
--   Yanks' Redbirds' Bucs' Yankees' Stevenses' Geraghtys' Burkes' Wackers'
--   Achaeans' Dresbachs' Russians' Democrats' Gershwins' Adventists'
--   Negroes' Catholics' ...
NPSdollar :: Tag

-- | noun, singular, adverbial e.g.; Friday home Wednesday Tuesday Monday
--   Sunday Thursday yesterday tomorrow tonight West East Saturday west
--   left east downtown north northeast southeast northwest North South
--   right ...
NR :: Tag

-- | noun, singular, adverbial, genitive e.g.; Saturday's Monday's
--   yesterday's tonight's tomorrow's Sunday's Wednesday's Friday's today's
--   Tuesday's West's Today's South's
NRdollar :: Tag

-- | noun, singular, adverbial + modal auxillary e.g.; today'll
NR_pl_MD :: Tag

-- | noun, plural, adverbial e.g.; Sundays Mondays Saturdays Wednesdays
--   Souths Fridays
NRS :: Tag

-- | numeral, ordinal e.g.; first 13th third nineteenth 2d 61st second
--   sixth eighth ninth twenty-first eleventh 50th eighteenth- Thirty-ninth
--   72nd 1/20th twentieth mid-19th thousandth 350th sixteenth 701st ...
OD :: Tag

-- | pronoun, nominal e.g.; none something everything one anyone nothing
--   nobody everybody everyone anybody anything someone no-one nothin
PN :: Tag

-- | pronoun, nominal, genitive e.g.; one's someone's anybody's nobody's
--   everybody's anyone's everyone's
PNdollar :: Tag

-- | pronoun, nominal + verb "to be", present tense, 3rd person singular
--   e.g.; nothing's everything's somebody's nobody's someone's
PN_pl_BEZ :: Tag

-- | pronoun, nominal + verb "to have", past tense e.g.; nobody'd
PN_pl_HVD :: Tag

-- | pronoun, nominal + verb "to have", present tense, 3rd person singular
--   e.g.; nobody's somebody's one's
PN_pl_HVZ :: Tag

-- | pronoun, nominal + modal auxillary e.g.; someone'll somebody'll
--   anybody'd
PN_pl_MD :: Tag

-- | determiner, possessive e.g.; our its his their my your her out thy
--   mine thine
PPdollar :: Tag

-- | pronoun, possessive e.g.; ours mine his hers theirs yours
PPdollardollar :: Tag

-- | pronoun, singular, reflexive e.g.; itself himself myself yourself
--   herself oneself ownself
PPL :: Tag

-- | pronoun, plural, reflexive e.g.; themselves ourselves yourselves
PPLS :: Tag

-- | pronoun, personal, accusative e.g.; them it him me us you 'em her thee
--   we'uns
PPO :: Tag

-- | pronoun, personal, nominative, 3rd person singular e.g.; it he she
--   thee
PPS :: Tag

-- | pronoun, personal, nominative, 3rd person singular + verb "to be",
--   present tense, 3rd person singular e.g.; it's he's she's
PPS_pl_BEZ :: Tag

-- | pronoun, personal, nominative, 3rd person singular + verb "to have",
--   past tense e.g.; she'd he'd it'd
PPS_pl_HVD :: Tag

-- | pronoun, personal, nominative, 3rd person singular + verb "to have",
--   present tense, 3rd person singular e.g.; it's he's she's
PPS_pl_HVZ :: Tag

-- | pronoun, personal, nominative, 3rd person singular + modal auxillary
--   e.g.; he'll she'll it'll he'd it'd she'd
PPS_pl_MD :: Tag

-- | pronoun, personal, nominative, not 3rd person singular e.g.; they we I
--   you ye thou you'uns
PPSS :: Tag

-- | pronoun, personal, nominative, not 3rd person singular + verb "to be",
--   present tense, 1st person singular e.g.; I'm Ahm
PPSS_pl_BEM :: Tag

-- | pronoun, personal, nominative, not 3rd person singular + verb "to be",
--   present tense, 2nd person singular or all persons plural e.g.; we're
--   you're they're
PPSS_pl_BER :: Tag

-- | pronoun, personal, nominative, not 3rd person singular + verb "to be",
--   present tense, 3rd person singular e.g.; you's
PPSS_pl_BEZ :: Tag

-- | pronoun, personal, nominative, not 3rd person singular + verb "to be",
--   present tense, 3rd person singular, negated e.g.; <tt>tain</tt>t
PPSS_pl_BEZstar :: Tag

-- | pronoun, personal, nominative, not 3rd person singular + verb "to
--   have", uninflected present tense e.g.; I've we've they've you've
PPSS_pl_HV :: Tag

-- | pronoun, personal, nominative, not 3rd person singular + verb "to
--   have", past tense e.g.; I'd you'd we'd they'd
PPSS_pl_HVD :: Tag

-- | pronoun, personal, nominative, not 3rd person singular + modal
--   auxillary e.g.; you'll we'll I'll we'd I'd they'll they'd you'd
PPSS_pl_MD :: Tag

-- | pronoun, personal, nominative, not 3rd person singular + verb "to
--   verb", uninflected present tense e.g.; y'know
PPSS_pl_VB :: Tag

-- | qualifier, pre e.g.; well less very most so real as highly
--   fundamentally even how much remarkably somewhat more completely too
--   thus ill deeply little overly halfway almost impossibly far severly
--   such ...
QL :: Tag

-- | qualifier, post e.g.; indeed enough still 'nuff
QLP :: Tag

-- | adverb e.g.; only often generally also nevertheless upon together back
--   newly no likely meanwhile near then heavily there apparently yet
--   outright fully aside consistently specifically formally ever just ...
RB :: Tag

-- | adverb, genitive e.g.; else's
RBdollar :: Tag

-- | adverb + verb "to be", present tense, 3rd person singular e.g.; here's
--   there's
RB_pl_BEZ :: Tag

-- | adverb + conjunction, coordinating e.g.; well's soon's
RB_pl_CS :: Tag

-- | adverb, comparative e.g.; further earlier better later higher tougher
--   more harder longer sooner less faster easier louder farther oftener
--   nearer cheaper slower tighter lower worse heavier quicker ...
RBR :: Tag

-- | adverb, comparative + conjunction, coordinating e.g.; more'n
RBR_pl_CS :: Tag

-- | adverb, superlative e.g.; most best highest uppermost nearest
--   brightest hardest fastest deepest farthest loudest ...
RBT :: Tag

-- | adverb, nominal e.g.; here afar then
RN :: Tag

-- | adverb, particle e.g.; up out off down over on in about through across
--   after
RP :: Tag

-- | adverb, particle + preposition e.g.; out'n outta
RP_pl_IN :: Tag

-- | infinitival to e.g.; to t'
TO :: Tag

-- | infinitival to + verb, infinitive e.g.; t'jawn t'lah
TO_pl_VB :: Tag

-- | interjection e.g.; Hurrah bang whee hmpf ah goodbye oops
--   oh-the-pain-of-it ha crunch say oh why see well hello lo alas
--   tarantara rum-tum-tum gosh hell keerist Jesus Keeeerist boy c'mon 'mon
--   goddamn bah hoo-pig damn ...
UH :: Tag

-- | verb, base: uninflected present, imperative or infinitive e.g.;
--   investigate find act follow inure achieve reduce take remedy re-set
--   distribute realize disable feel receive continue place protect
--   eliminate elaborate work permit run enter force ...
VB :: Tag

-- | verb, base: uninflected present or infinitive + article e.g.; wanna
VB_pl_AT :: Tag

-- | verb, base: uninflected present, imperative or infinitive +
--   preposition e.g.; lookit
VB_pl_IN :: Tag

-- | verb, base: uninflected present, imperative or infinitive + adjective
--   e.g.; die-dead
VB_pl_JJ :: Tag

-- | verb, uninflected present tense + pronoun, personal, accusative e.g.;
--   let's lemme gimme
VB_pl_PPO :: Tag

-- | verb, imperative + adverbial particle e.g.; g'ahn c'mon
VB_pl_RP :: Tag

-- | verb, base: uninflected present, imperative or infinitive +
--   infinitival to e.g.; wanta wanna
VB_pl_TO :: Tag

-- | verb, base: uninflected present, imperative or infinitive; hypenated
--   pair e.g.; say-speak
VB_pl_VB :: Tag

-- | verb, past tense e.g.; said produced took recommended commented urged
--   found added praised charged listed became announced brought attended
--   wanted voted defeated received got stood shot scheduled feared
--   promised made ...
VBD :: Tag

-- | verb, present participle or gerund e.g.; modernizing improving
--   purchasing Purchasing lacking enabling pricing keeping getting picking
--   entering voting warning making strengthening setting neighboring
--   attending participating moving ...
VBG :: Tag

-- | verb, present participle + infinitival to e.g.; gonna
VBG_pl_TO :: Tag

-- | verb, past participle e.g.; conducted charged won received studied
--   revised operated accepted combined experienced recommended effected
--   granted seen protected adopted retarded notarized selected composed
--   gotten printed ...
VBN :: Tag

-- | verb, past participle + infinitival to e.g.; gotta
VBN_pl_TO :: Tag

-- | verb, present tense, 3rd person singular e.g.; deserves believes
--   receives takes goes expires says opposes starts permits expects thinks
--   faces votes teaches holds calls fears spends collects backs eliminates
--   sets flies gives seeks reads ...
VBZ :: Tag

-- | WH-determiner e.g.; which what whatever whichever whichever-the-hell
WDT :: Tag

-- | WH-determiner + verb "to be", present tense, 2nd person singular or
--   all persons plural e.g.; what're
WDT_pl_BER :: Tag

-- | WH-determiner + verb "to be", present, 2nd person singular or all
--   persons plural + pronoun, personal, nominative, not 3rd person
--   singular e.g.; whaddya
WDT_pl_BER_pl_PP :: Tag

-- | WH-determiner + verb "to be", present tense, 3rd person singular e.g.;
--   what's
WDT_pl_BEZ :: Tag

-- | WH-determiner + verb "to do", uninflected present tense + pronoun,
--   personal, nominative, not 3rd person singular e.g.; whaddya
WDT_pl_DO_pl_PPS :: Tag

-- | WH-determiner + verb "to do", past tense e.g.; what'd
WDT_pl_DOD :: Tag

-- | WH-determiner + verb "to have", present tense, 3rd person singular
--   e.g.; what's
WDT_pl_HVZ :: Tag

-- | WH-pronoun, genitive e.g.; whose whosever
WPdollar :: Tag

-- | WH-pronoun, accusative e.g.; whom that who
WPO :: Tag

-- | WH-pronoun, nominative e.g.; that who whoever whosoever what
--   whatsoever
WPS :: Tag

-- | WH-pronoun, nominative + verb "to be", present, 3rd person singular
--   e.g.; that's who's
WPS_pl_BEZ :: Tag

-- | WH-pronoun, nominative + verb "to have", past tense e.g.; who'd
WPS_pl_HVD :: Tag

-- | WH-pronoun, nominative + verb "to have", present tense, 3rd person
--   singular e.g.; who's that's
WPS_pl_HVZ :: Tag

-- | WH-pronoun, nominative + modal auxillary e.g.; who'll that'd who'd
--   that'll
WPS_pl_MD :: Tag

-- | WH-qualifier e.g.; however how
WQL :: Tag

-- | WH-adverb e.g.; however when where why whereby wherever how whenever
--   whereon wherein wherewith wheare wherefore whereof howsabout
WRB :: Tag

-- | WH-adverb + verb "to be", present, 2nd person singular or all persons
--   plural e.g.; where're
WRB_pl_BER :: Tag

-- | WH-adverb + verb "to be", present, 3rd person singular e.g.; how's
--   where's
WRB_pl_BEZ :: Tag

-- | WH-adverb + verb "to do", present, not 3rd person singular e.g.; howda
WRB_pl_DO :: Tag

-- | WH-adverb + verb "to do", past tense e.g.; where'd how'd
WRB_pl_DOD :: Tag

-- | WH-adverb + verb "to do", past tense, negated e.g.; whyn't
WRB_pl_DODstar :: Tag

-- | WH-adverb + verb "to do", present tense, 3rd person singular e.g.;
--   how's
WRB_pl_DOZ :: Tag

-- | WH-adverb + preposition e.g.; why'n
WRB_pl_IN :: Tag

-- | WH-adverb + modal auxillary e.g.; where'd
WRB_pl_MD :: Tag

-- | Unknown.
Unk :: Tag
data Chunk

-- | Noun Phrase.
C_NP :: Chunk

-- | Verb Phrase.
C_VP :: Chunk

-- | Prepositional Phrase.
C_PP :: Chunk

-- | Clause.
C_CL :: Chunk

-- | <a>Out</a> not a chunk.
C_O :: Chunk
instance Read Chunk
instance Show Chunk
instance Ord Chunk
instance Eq Chunk
instance Generic Chunk
instance Enum Chunk
instance Bounded Chunk
instance Read Tag
instance Show Tag
instance Ord Tag
instance Eq Tag
instance Generic Tag
instance Enum Tag
instance Bounded Tag
instance Datatype D1Chunk
instance Constructor C1_0Chunk
instance Constructor C1_1Chunk
instance Constructor C1_2Chunk
instance Constructor C1_3Chunk
instance Constructor C1_4Chunk
instance Datatype D1Tag
instance Constructor C1_0Tag
instance Constructor C1_1Tag
instance Constructor C1_2Tag
instance Constructor C1_3Tag
instance Constructor C1_4Tag
instance Constructor C1_5Tag
instance Constructor C1_6Tag
instance Constructor C1_7Tag
instance Constructor C1_8Tag
instance Constructor C1_9Tag
instance Constructor C1_10Tag
instance Constructor C1_11Tag
instance Constructor C1_12Tag
instance Constructor C1_13Tag
instance Constructor C1_14Tag
instance Constructor C1_15Tag
instance Constructor C1_16Tag
instance Constructor C1_17Tag
instance Constructor C1_18Tag
instance Constructor C1_19Tag
instance Constructor C1_20Tag
instance Constructor C1_21Tag
instance Constructor C1_22Tag
instance Constructor C1_23Tag
instance Constructor C1_24Tag
instance Constructor C1_25Tag
instance Constructor C1_26Tag
instance Constructor C1_27Tag
instance Constructor C1_28Tag
instance Constructor C1_29Tag
instance Constructor C1_30Tag
instance Constructor C1_31Tag
instance Constructor C1_32Tag
instance Constructor C1_33Tag
instance Constructor C1_34Tag
instance Constructor C1_35Tag
instance Constructor C1_36Tag
instance Constructor C1_37Tag
instance Constructor C1_38Tag
instance Constructor C1_39Tag
instance Constructor C1_40Tag
instance Constructor C1_41Tag
instance Constructor C1_42Tag
instance Constructor C1_43Tag
instance Constructor C1_44Tag
instance Constructor C1_45Tag
instance Constructor C1_46Tag
instance Constructor C1_47Tag
instance Constructor C1_48Tag
instance Constructor C1_49Tag
instance Constructor C1_50Tag
instance Constructor C1_51Tag
instance Constructor C1_52Tag
instance Constructor C1_53Tag
instance Constructor C1_54Tag
instance Constructor C1_55Tag
instance Constructor C1_56Tag
instance Constructor C1_57Tag
instance Constructor C1_58Tag
instance Constructor C1_59Tag
instance Constructor C1_60Tag
instance Constructor C1_61Tag
instance Constructor C1_62Tag
instance Constructor C1_63Tag
instance Constructor C1_64Tag
instance Constructor C1_65Tag
instance Constructor C1_66Tag
instance Constructor C1_67Tag
instance Constructor C1_68Tag
instance Constructor C1_69Tag
instance Constructor C1_70Tag
instance Constructor C1_71Tag
instance Constructor C1_72Tag
instance Constructor C1_73Tag
instance Constructor C1_74Tag
instance Constructor C1_75Tag
instance Constructor C1_76Tag
instance Constructor C1_77Tag
instance Constructor C1_78Tag
instance Constructor C1_79Tag
instance Constructor C1_80Tag
instance Constructor C1_81Tag
instance Constructor C1_82Tag
instance Constructor C1_83Tag
instance Constructor C1_84Tag
instance Constructor C1_85Tag
instance Constructor C1_86Tag
instance Constructor C1_87Tag
instance Constructor C1_88Tag
instance Constructor C1_89Tag
instance Constructor C1_90Tag
instance Constructor C1_91Tag
instance Constructor C1_92Tag
instance Constructor C1_93Tag
instance Constructor C1_94Tag
instance Constructor C1_95Tag
instance Constructor C1_96Tag
instance Constructor C1_97Tag
instance Constructor C1_98Tag
instance Constructor C1_99Tag
instance Constructor C1_100Tag
instance Constructor C1_101Tag
instance Constructor C1_102Tag
instance Constructor C1_103Tag
instance Constructor C1_104Tag
instance Constructor C1_105Tag
instance Constructor C1_106Tag
instance Constructor C1_107Tag
instance Constructor C1_108Tag
instance Constructor C1_109Tag
instance Constructor C1_110Tag
instance Constructor C1_111Tag
instance Constructor C1_112Tag
instance Constructor C1_113Tag
instance Constructor C1_114Tag
instance Constructor C1_115Tag
instance Constructor C1_116Tag
instance Constructor C1_117Tag
instance Constructor C1_118Tag
instance Constructor C1_119Tag
instance Constructor C1_120Tag
instance Constructor C1_121Tag
instance Constructor C1_122Tag
instance Constructor C1_123Tag
instance Constructor C1_124Tag
instance Constructor C1_125Tag
instance Constructor C1_126Tag
instance Constructor C1_127Tag
instance Constructor C1_128Tag
instance Constructor C1_129Tag
instance Constructor C1_130Tag
instance Constructor C1_131Tag
instance Constructor C1_132Tag
instance Constructor C1_133Tag
instance Constructor C1_134Tag
instance Constructor C1_135Tag
instance Constructor C1_136Tag
instance Constructor C1_137Tag
instance Constructor C1_138Tag
instance Constructor C1_139Tag
instance Constructor C1_140Tag
instance Constructor C1_141Tag
instance Constructor C1_142Tag
instance Constructor C1_143Tag
instance Constructor C1_144Tag
instance Constructor C1_145Tag
instance Constructor C1_146Tag
instance Constructor C1_147Tag
instance Constructor C1_148Tag
instance Constructor C1_149Tag
instance Constructor C1_150Tag
instance Constructor C1_151Tag
instance Constructor C1_152Tag
instance Constructor C1_153Tag
instance Constructor C1_154Tag
instance Constructor C1_155Tag
instance Constructor C1_156Tag
instance Constructor C1_157Tag
instance Constructor C1_158Tag
instance Constructor C1_159Tag
instance Constructor C1_160Tag
instance Constructor C1_161Tag
instance Constructor C1_162Tag
instance Constructor C1_163Tag
instance Constructor C1_164Tag
instance Constructor C1_165Tag
instance Constructor C1_166Tag
instance Constructor C1_167Tag
instance Constructor C1_168Tag
instance Constructor C1_169Tag
instance Constructor C1_170Tag
instance Constructor C1_171Tag
instance Constructor C1_172Tag
instance Constructor C1_173Tag
instance Constructor C1_174Tag
instance Constructor C1_175Tag
instance Constructor C1_176Tag
instance Constructor C1_177Tag
instance Constructor C1_178Tag
instance Constructor C1_179Tag
instance Constructor C1_180Tag
instance Constructor C1_181Tag
instance Constructor C1_182Tag
instance Constructor C1_183Tag
instance Constructor C1_184Tag
instance Constructor C1_185Tag
instance Constructor C1_186Tag
instance Constructor C1_187Tag
instance Constructor C1_188Tag
instance Constructor C1_189Tag
instance Constructor C1_190Tag
instance Constructor C1_191Tag
instance Constructor C1_192Tag
instance Constructor C1_193Tag
instance Constructor C1_194Tag
instance Constructor C1_195Tag
instance Constructor C1_196Tag
instance Constructor C1_197Tag
instance Constructor C1_198Tag
instance Constructor C1_199Tag
instance Constructor C1_200Tag
instance Constructor C1_201Tag
instance Constructor C1_202Tag
instance Constructor C1_203Tag
instance Constructor C1_204Tag
instance Constructor C1_205Tag
instance Constructor C1_206Tag
instance Constructor C1_207Tag
instance Constructor C1_208Tag
instance Constructor C1_209Tag
instance Constructor C1_210Tag
instance Constructor C1_211Tag
instance Constructor C1_212Tag
instance Constructor C1_213Tag
instance Constructor C1_214Tag
instance Constructor C1_215Tag
instance Constructor C1_216Tag
instance Constructor C1_217Tag
instance Constructor C1_218Tag
instance Constructor C1_219Tag
instance Constructor C1_220Tag
instance Constructor C1_221Tag
instance Constructor C1_222Tag
instance Constructor C1_223Tag
instance Constructor C1_224Tag
instance Constructor C1_225Tag
instance Constructor C1_226Tag
instance Constructor C1_227Tag
instance Constructor C1_228Tag
instance ChunkTag Chunk
instance Arbitrary Tag
instance Tag Tag
instance Serialize Tag
instance Serialize Chunk
instance Arbitrary Chunk

module NLP.Types

-- | Part of Speech tagger, with back-off tagger.
--   
--   A sequence of pos taggers can be assembled by using backoff taggers.
--   When tagging text, the first tagger is run on the input, possibly
--   tagging some tokens as unknown ('Tag <a>Unk</a>'). The first backoff
--   tagger is then recursively invoked on the text to fill in the unknown
--   tags, but that may still leave some tokens marked with 'Tag
--   <a>Unk</a>'. This process repeats until no more taggers are found.
--   (The current implementation is not very efficient in this respect.).
--   
--   Back off taggers are particularly useful when there is a set of domain
--   specific vernacular that a general purpose statistical tagger does not
--   know of. A LitteralTagger can be created to map terms to fixed POS
--   tags, and then delegate the bulk of the text to a statistical back off
--   tagger, such as an AvgPerceptronTagger.
--   
--   <a>POSTagger</a> values can be serialized and deserialized by using
--   <a>serialize</a> and NLP.POS.deserialize`. This is a bit tricky
--   because the POSTagger abstracts away the implementation details of the
--   particular tagging algorithm, and the model for that tagger (if any).
--   To support serialization, each POSTagger value must provide a
--   serialize value that can be used to generate a <a>ByteString</a>
--   representation of the model, as well as a unique id (also a
--   <a>ByteString</a>). Furthermore, that ID must be added to a `Map
--   ByteString (ByteString -&gt; Maybe POSTagger -&gt; Either String
--   POSTagger)` that is provided to <tt>deserialize</tt>. The function in
--   the map takes the output of <a>posSerialize</a>, and possibly a
--   backoff tagger, and reconstitutes the POSTagger that was serialized
--   (assigning the proper functions, setting up closures as needed, etc.)
--   Look at the source for <a>taggerTable</a> and <a>readTagger</a> for
--   examples.
data POSTagger t
POSTagger :: ([Sentence] -> [TaggedSentence t]) -> ([TaggedSentence t] -> IO (POSTagger t)) -> Maybe (POSTagger t) -> (Text -> Sentence) -> (Text -> [Text]) -> ByteString -> ByteString -> POSTagger t

-- | The initial part-of-speech tagger.
posTagger :: POSTagger t -> [Sentence] -> [TaggedSentence t]

-- | Training function to train the immediate POS tagger.
posTrainer :: POSTagger t -> [TaggedSentence t] -> IO (POSTagger t)

-- | A tagger to invoke on unknown tokens.
posBackoff :: POSTagger t -> Maybe (POSTagger t)

-- | A tokenizer; (<a>words</a> will work.)
posTokenizer :: POSTagger t -> Text -> Sentence

-- | A sentence splitter. If your input is formatted as one sentence per
--   line, then use <a>lines</a>, otherwise try Erik Kow's fullstop
--   library.
posSplitter :: POSTagger t -> Text -> [Text]

-- | Store this POS tagger to a bytestring. This does <i>not</i> serialize
--   the backoff taggers.
posSerialize :: POSTagger t -> ByteString

-- | A unique id that will identify the algorithm used for this POS Tagger.
--   This is used in deserialization
posID :: POSTagger t -> ByteString

-- | Document corpus.
--   
--   This is a simple hashed corpus, the document content is not stored.
data Corpus
Corpus :: Int -> Map Text Int -> Corpus

-- | The number of documents in the corpus.
corpLength :: Corpus -> Int

-- | A count of the number of documents each term occurred in.
corpTermCounts :: Corpus -> Map Text Int

-- | Get the number of documents that a term occurred in.
termCounts :: Corpus -> Text -> Int

-- | Add a document to the corpus.
--   
--   This can be dangerous if the documents are pre-processed differently.
--   All corpus-related functions assume that the documents have all been
--   tokenized and the tokens normalized, in the same way.
addDocument :: Corpus -> [Text] -> Corpus

-- | Create a corpus from a list of documents, represented by normalized
--   tokens.
mkCorpus :: [[Text]] -> Corpus
addTerms :: Map Text Int -> Set Text -> Map Text Int
addTerm :: Map Text Int -> Text -> Map Text Int
instance Read Corpus
instance Show Corpus
instance Eq Corpus
instance Ord Corpus
instance Generic Corpus
instance Datatype D1Corpus
instance Constructor C1_0Corpus
instance Selector S1_0_0Corpus
instance Selector S1_0_1Corpus
instance Arbitrary Corpus
instance Serialize Corpus
instance NFData Corpus

module NLP.POS.LiteralTagger
tag :: Tag t => Map Text t -> CaseSensitive -> [Sentence] -> [TaggedSentence t]
tagSentence :: Tag t => Map Text t -> CaseSensitive -> Sentence -> TaggedSentence t

-- | Create a Literal Tagger using the specified back-off tagger as a
--   fall-back, if one is specified.
--   
--   This uses a tokenizer adapted from the <tt>tokenize</tt> package for a
--   tokenizer, and Erik Kow's fullstop sentence segmenter as a sentence
--   splitter.
mkTagger :: Tag t => Map Text t -> CaseSensitive -> Maybe (POSTagger t) -> POSTagger t
taggerID :: ByteString

-- | deserialization for Literal Taggers. The serialization logic is in the
--   posSerialize record of the POSTagger created in mkTagger.
readTagger :: Tag t => ByteString -> Maybe (POSTagger t) -> Either String (POSTagger t)

-- | Boolean type to indicate case sensitivity for textual comparisons.
data CaseSensitive
Sensitive :: CaseSensitive
Insensitive :: CaseSensitive

-- | Create a tokenizer that protects the provided terms (to tokenize
--   multi-word terms)
protectTerms :: [Text] -> CaseSensitive -> Tokenizer


-- | This POS tagger deterministically tags tokens. However, if it ever
--   sees multiple tags for the same token, it will forget the tag it has
--   learned. This is useful for creating taggers that have very high
--   precision, but very low recall.
--   
--   Unambiguous taggers are also useful when defined with a
--   non-deterministic backoff tagger, such as an
--   <a>NLP.POS.AveragedPerceptronTagger</a>, since the high-confidence
--   tags will be applied first, followed by the more non-deterministic
--   results of the backoff tagger.
module NLP.POS.UnambiguousTagger
taggerID :: ByteString
readTagger :: Tag t => ByteString -> Maybe (POSTagger t) -> Either String (POSTagger t)

-- | Create an unambiguous tagger, using the supplied <a>Map</a> as a
--   source of tags.
mkTagger :: Tag t => Map Text t -> Maybe (POSTagger t) -> POSTagger t

-- | Trainer method for unambiguous taggers.
train :: Tag t => Map Text t -> [TaggedSentence t] -> Map Text t


-- | Average Perceptron implementation of Part of speech tagging, adapted
--   for Haskell from this python implementation, which is described on the
--   blog post:
--   
--   <ul>
--   
--   <li><a>http://honnibal.wordpress.com/2013/09/11/a-good-part-of-speechpos-tagger-in-about-200-lines-of-python/</a></li>
--   </ul>
--   
--   The Perceptron code can be found on github:
--   
--   <ul>
--   
--   <li><a>https://github.com/sloria/TextBlob/blob/dev/text/_perceptron.py</a></li>
--   </ul>
module NLP.ML.AvgPerceptron

-- | The perceptron model.
data Perceptron
Perceptron :: Map Feature (Map Class Weight) -> Map (Feature, Class) Weight -> Map (Feature, Class) Int -> Int -> Perceptron

-- | Each feature gets its own weight vector, so weights is a dict-of-dicts
weights :: Perceptron -> Map Feature (Map Class Weight)

-- | The accumulated values, for the averaging. These will be keyed by
--   feature/clas tuples
totals :: Perceptron -> Map (Feature, Class) Weight

-- | The last time the feature was changed, for the averaging. Also keyed
--   by feature/clas tuples (tstamps is short for timestamps)
tstamps :: Perceptron -> Map (Feature, Class) Int

-- | Number of instances seen
instances :: Perceptron -> Int

-- | The classes that the perceptron assigns are represnted with a
--   newtype-wrapped String.
--   
--   Eventually, I think this should become a typeclass, so the classes can
--   be defined by the users of the Perceptron (such as custom POS tag
--   ADTs, or more complex classes).
newtype Class
Class :: String -> Class

-- | Typedef for doubles to make the code easier to read, and to make this
--   simple to change if necessary.
type Weight = Double
newtype Feature
Feat :: Text -> Feature

-- | An empty perceptron, used to start training.
emptyPerceptron :: Perceptron

-- | Predict a class given a feature vector.
--   
--   Ported from python:
--   
--   <pre>
--   def predict(self, features):
--       '''Dot-product the features and current weights and return the best label.'''
--       scores = defaultdict(float)
--       for feat, value in features.items():
--           if feat not in self.weights or value == 0:
--               continue
--           weights = self.weights[feat]
--           for label, weight in weights.items():
--               scores[label] += value * weight
--       # Do a secondary alphabetic sort, for stability
--       return max(self.classes, key=lambda label: (scores[label], label))
--   </pre>
predict :: Perceptron -> Map Feature Int -> Maybe Class
train :: Int -> Perceptron -> [(Map Feature Int, Class)] -> IO Perceptron

-- | Update the perceptron with a new example.
--   
--   <pre>
--   update(self, truth, guess, features)
--      ...
--           self.i += 1
--           if truth == guess:
--               return None
--           for f in features:
--               weights = self.weights.setdefault(f, {}) -- setdefault is Map.findWithDefault, and destructive.
--               upd_feat(truth, f, weights.get(truth, 0.0), 1.0)
--               upd_feat(guess, f, weights.get(guess, 0.0), -1.0)
--           return None
--   </pre>
update :: Perceptron -> Class -> Class -> [Feature] -> Perceptron

-- | Average the weights
--   
--   Ported from Python:
--   
--   <pre>
--   def average_weights(self):
--       for feat, weights in self.weights.items():
--           new_feat_weights = {}
--           for clas, weight in weights.items():
--               param = (feat, clas)
--               total = self._totals[param]
--               total += (self.i - self._tstamps[param]) * weight
--               averaged = round(total / float(self.i), 3)
--               if averaged:
--                   new_feat_weights[clas] = averaged
--           self.weights[feat] = new_feat_weights
--       return None
--   </pre>
averageWeights :: Perceptron -> Perceptron
instance Read Feature
instance Show Feature
instance Eq Feature
instance Ord Feature
instance Generic Feature
instance Read Class
instance Show Class
instance Eq Class
instance Ord Class
instance Generic Class
instance Read Perceptron
instance Show Perceptron
instance Eq Perceptron
instance Generic Perceptron
instance Datatype D1Feature
instance Constructor C1_0Feature
instance Datatype D1Class
instance Constructor C1_0Class
instance Datatype D1Perceptron
instance Constructor C1_0Perceptron
instance Selector S1_0_0Perceptron
instance Selector S1_0_1Perceptron
instance Selector S1_0_2Perceptron
instance Selector S1_0_3Perceptron
instance NFData Perceptron
instance Serialize Perceptron
instance Serialize Class
instance Serialize Feature


-- | Avegeraged Perceptron Chunker
module NLP.Chunk.AvgPerceptronChunker

-- | Create a chunker from a <a>Perceptron</a>.
mkChunker :: (ChunkTag c, Tag t) => Perceptron -> Chunker c t
trainInt :: (ChunkTag c, Tag t) => Int -> Perceptron -> [ChunkedSentence c t] -> IO Perceptron

-- | Chunk a list of POS-tagged sentence, generating a parse tree.
chunk :: (ChunkTag c, Tag t) => Perceptron -> [TaggedSentence t] -> [ChunkedSentence c t]

-- | Chunk a single POS-tagged sentence.
chunkSentence :: (ChunkTag c, Tag t) => Perceptron -> TaggedSentence t -> ChunkedSentence c t

-- | The type of Chunkers, incorporates chunking, training, serilazitaion
--   and unique IDs for deserialization.
data Chunker c t
Chunker :: ([TaggedSentence t] -> [ChunkedSentence c t]) -> ([ChunkedSentence c t] -> IO (Chunker c t)) -> ByteString -> ByteString -> Chunker c t
chChunker :: Chunker c t -> [TaggedSentence t] -> [ChunkedSentence c t]
chTrainer :: Chunker c t -> [ChunkedSentence c t] -> IO (Chunker c t)
chSerialize :: Chunker c t -> ByteString
chId :: Chunker c t -> ByteString

-- | The unique ID for this implementation of a <a>Chunker</a>
chunkerID :: ByteString

-- | deserialize an <tt>AvgPerceptronChunker</tt> from a <a>ByteString</a>.
readChunker :: (ChunkTag c, Tag t) => ByteString -> Either String (Chunker c t)

module NLP.Similarity.VectorSim

-- | An efficient (ish) representation for documents in the "bag of words"
--   sense.
type TermVector = DefaultMap Text Double

-- | Generate a <a>TermVector</a> from a tokenized document.
mkVector :: Corpus -> [Text] -> TermVector

-- | Invokes similarity on full strings, using <a>words</a> for
--   tokenization, and no stemming.
--   
--   There *must* be at least one document in the corpus.
sim :: Corpus -> Text -> Text -> Double

-- | Determine how similar two documents are.
--   
--   This function assumes that each document has been tokenized and (if
--   desired) stemmed/case-normalized.
--   
--   This is a wrapper around <a>tvSim</a>, which is a *much* more
--   efficient implementation. If you need to run similarity against any
--   single document more than once, then you should create
--   <a>TermVector</a>s for each of your documents and use <a>tvSim</a>
--   instead of <a>similarity</a>.
--   
--   There *must* be at least one document in the corpus.
similarity :: Corpus -> [Text] -> [Text] -> Double

-- | Determine how similar two documents are.
--   
--   Calculates the similarity between two documents, represented as
--   <tt>TermVectors</tt>
tvSim :: TermVector -> TermVector -> Double

-- | Return the raw frequency of a term in a body of text.
--   
--   The firt argument is the term to find, the second is a tokenized
--   document. This function does not do any stemming or additional text
--   modification.
tf :: Eq a => a -> [a] -> Int

-- | Calculate the inverse document frequency.
--   
--   The IDF is, roughly speaking, a measure of how popular a term is.
idf :: Text -> Corpus -> Double

-- | Calculate the tf*idf measure for a term given a document and a corpus.
tf_idf :: Text -> [Text] -> Corpus -> Double
cosVec :: TermVector -> TermVector -> Double

-- | Calculate the magnitude of a vector.
magnitude :: TermVector -> Double

-- | find the dot product of two vectors.
dotProd :: TermVector -> TermVector -> Double


-- | This is a very simple wrapper around Parsec for writing Information
--   Extraction patterns.
--   
--   Because the particular tags/tokens to parse depends on the training
--   corpus (for POS tagging) and the domain, this module only provides
--   basic extractors. You can, for example, create an extractor to find
--   noun phrases by combining the components provided here:
--   
--   <pre>
--   nounPhrase :: Extractor (Text, Tag)
--   nounPhrase = do
--     nlist &lt;- many1 (try (posTok $ Tag "NN")
--                 &lt;|&gt; try (posTok $ Tag "DT")
--                     &lt;|&gt; (posTok $ Tag "JJ"))
--     let term = T.intercalate " " (map fst nlist)
--     return (term, Tag "n-phr")
--   </pre>
module NLP.Extraction.Parsec

-- | A Parsec parser.
--   
--   Example usage:
--   
--   <pre>
--   &gt; set -XOverloadedStrings
--   &gt; import Text.Parsec.Prim
--   &gt; parse myExtractor "interactive repl" someTaggedSentence
--   </pre>
type Extractor t = Parsec (TaggedSentence t) ()

-- | Consume a token with the given POS Tag
posTok :: Tag t => t -> Extractor t (POS t)

-- | Consume a token with the specified POS prefix.
--   
--   <pre>
--   &gt; parse (posPrefix "n") "ghci" [(<a>Bob</a>, Tag "np")]
--   Right [(<a>Bob</a>, Tag "np")]
--   </pre>
posPrefix :: Tag t => Text -> Extractor t (POS t)

-- | Text equality matching with optional case sensitivity.
matches :: CaseSensitive -> Token -> Token -> Bool

-- | Consume a token with the given lexical representation.
txtTok :: Tag t => CaseSensitive -> Token -> Extractor t (POS t)

-- | Consume any one non-empty token.
anyToken :: Tag t => Extractor t (POS t)
oneOf :: Tag t => CaseSensitive -> [Token] -> Extractor t (POS t)

-- | Skips any number of fill tokens, ending with the end parser, and
--   returning the last parsed result.
--   
--   This is useful when you know what you're looking for and (for
--   instance) don't care what comes first.
followedBy :: Tag t => Extractor t b -> Extractor t a -> Extractor t a
instance (Monad m, ChunkTag c, Tag t) => Stream (ChunkedSentence c t) m (ChunkOr c t)
instance (Monad m, Tag t) => Stream (TaggedSentence t) m (POS t)


-- | Example parsing with Parsec.
--   
--   This example shows how the following grammar, from NLTK, can be
--   implemented in Chatter, using Parsec-based Information Extraction
--   patterns:
--   
--   <pre>
--   grammar = r"""
--    NP: {&lt;DT|JJ|NN.*&gt;+}          # Chunk sequences of DT, JJ, NN
--    PP: {&lt;IN&gt;&lt;NP&gt;}               # Chunk prepositions followed by NP
--    VP: {&lt;VB.*&gt;&lt;NP|PP|CLAUSE&gt;+$} # Chunk verbs and their arguments
--    CLAUSE: {&lt;NP&gt;&lt;VP&gt;}           # Chunk NP, VP
--    """
--   </pre>
--   
--   <pre>
--   &gt; import NLP.Extraction.Examples.ParsecExamples
--   &gt; import Text.Parsec.Prim
--   &gt; tgr &lt;- defaultTagger
--   &gt; map (parse findClause "interactive") $ tag tgr "Mary saw the cat sit on the mat."
--   [Right (Chunk_CN (Chunk C_CL [Chunk_CN (Chunk C_NP [POS_CN (POS AT (Token "the")),POS_CN (POS NN (Token "cat"))]),Chunk_CN (Chunk C_VP [POS_CN (POS VB (Token "sit")),Chunk_CN (Chunk C_PP [POS_CN (POS IN (Token "on")),Chunk_CN (Chunk C_NP [POS_CN (POS AT (Token "the")),POS_CN (POS NN (Token "mat"))])])])]))]
--   </pre>
module NLP.Extraction.Examples.ParsecExamples

-- | Find a clause in a larger collection of text.
--   
--   A clause is defined by the <a>clause</a> extractor, and is a Noun
--   Phrase followed (immediately) by a Verb Phrase
--   
--   findClause skips over leading tokens, if needed, to locate a clause.
findClause :: Extractor Tag (ChunkOr Chunk Tag)

-- | Find a Noun Phrase followed by a Verb Phrase
clause :: Extractor Tag (ChunkOr Chunk Tag)
prepPhrase :: Extractor Tag (ChunkOr Chunk Tag)
nounPhrase :: Extractor Tag (ChunkOr Chunk Tag)
verbPhrase :: Extractor Tag (ChunkOr Chunk Tag)

module NLP.Corpora.Parsing

-- | Read a POS-tagged corpus out of a Text string of the form: "token/tag
--   token/tag..."
--   
--   <pre>
--   &gt;&gt;&gt; readPOS "Dear/jj Sirs/nns :/: Let/vb"
--   [("Dear",JJ),("Sirs",NNS),(":",Other ":"),("Let",VB)]
--   </pre>
readPOS :: Tag t => Text -> TaggedSentence t
readPOSWith :: Tag t => (Text -> t) -> Text -> TaggedSentence t

-- | Returns all but the last element of a string, unless the string is
--   empty, in which case it returns that string.
safeInit :: Text -> Text


-- | Avegeraged Perceptron Tagger
--   
--   Adapted from the python implementation found here:
--   
--   <ul>
--   
--   <li><a>https://github.com/sloria/textblob-aptagger/blob/master/textblob_aptagger/taggers.py</a></li>
--   </ul>
module NLP.POS.AvgPerceptronTagger

-- | Create an Averaged Perceptron Tagger using the specified back-off
--   tagger as a fall-back, if one is specified.
--   
--   This uses a tokenizer adapted from the <a>tokenize</a> package for a
--   tokenizer, and Erik Kow's fullstop sentence segmenter
--   (<a>http://hackage.haskell.org/package/fullstop</a>) as a sentence
--   splitter.
mkTagger :: Tag t => Perceptron -> Maybe (POSTagger t) -> POSTagger t

-- | Train a new <a>Perceptron</a>.
--   
--   The training corpus should be a collection of sentences, one sentence
--   on each line, and with each token tagged with a part of speech.
--   
--   For example, the input:
--   
--   <pre>
--   "The/DT dog/NN jumped/VB ./.\nThe/DT cat/NN slept/VB ./."
--   </pre>
--   
--   defines two training sentences.
--   
--   <pre>
--   &gt;&gt;&gt; tagger &lt;- trainNew "Dear/jj Sirs/nns :/: Let/vb\nUs/nn begin/vb\n"
--   
--   &gt;&gt;&gt; tag tagger $ map T.words $ T.lines "Dear sir"
--   "Dear/jj Sirs/nns :/: Let/vb"
--   </pre>
trainNew :: Tag t => (Text -> t) -> Text -> IO Perceptron

-- | Train a new <a>Perceptron</a> on a corpus of files.
trainOnFiles :: Tag t => (Text -> t) -> [FilePath] -> IO Perceptron

-- | Add training examples to a perceptron.
--   
--   <pre>
--   &gt;&gt;&gt; tagger &lt;- train emptyPerceptron "Dear/jj Sirs/nns :/: Let/vb\nUs/nn begin/vb\n"
--   
--   &gt;&gt;&gt; tag tagger $ map T.words $ T.lines "Dear sir"
--   "Dear/jj Sirs/nns :/: Let/vb"
--   </pre>
--   
--   If you're using multiple input files, this can be useful to improve
--   performance (by folding over the files). For example, see
--   <a>trainOnFiles</a>
train :: Tag t => (Text -> t) -> Perceptron -> Text -> IO Perceptron

-- | Train a model from sentences.
--   
--   Ported from Python:
--   
--   <pre>
--   def train(self, sentences, save_loc=None, nr_iter=5):
--       self._make_tagdict(sentences)
--       self.model.classes = self.classes
--       prev, prev2 = START
--       for iter_ in range(nr_iter):
--           c = 0
--           n = 0
--           for words, tags in sentences:
--               context = START + [self._normalize(w) for w in words] + END
--               for i, word in enumerate(words):
--                   guess = self.tagdict.get(word)
--                   if not guess:
--                       feats = self._get_features(i, word, context, prev, prev2)
--                       guess = self.model.predict(feats)
--                       self.model.update(tags[i], guess, feats)
--                   prev2 = prev; prev = guess
--                   c += guess == tags[i]
--                   n += 1
--           random.shuffle(sentences)
--           logging.info("Iter {0}: {1}/{2}={3}".format(iter_, c, n, _pc(c, n)))
--       self.model.average_weights()
--       # Pickle as a binary file
--       if save_loc is not None:
--           pickle.dump((self.model.weights, self.tagdict, self.classes),
--                        open(save_loc, 'wb'), -1)
--       return None
--   </pre>
trainInt :: Tag t => Int -> Perceptron -> [TaggedSentence t] -> IO Perceptron

-- | Tag a document (represented as a list of <a>Sentence</a>s) with a
--   trained <a>Perceptron</a>
--   
--   Ported from Python:
--   
--   <pre>
--   def tag(self, corpus, tokenize=True):
--       '''Tags a string `corpus`.'''
--       # Assume untokenized corpus has \n between sentences and ' ' between words
--       s_split = nltk.sent_tokenize if tokenize else lambda t: t.split('\n')
--       w_split = nltk.word_tokenize if tokenize else lambda s: s.split()
--       def split_sents(corpus):
--           for s in s_split(corpus):
--               yield w_split(s)
--        prev, prev2 = self.START
--       tokens = []
--       for words in split_sents(corpus):
--           context = self.START + [self._normalize(w) for w in words] + self.END
--           for i, word in enumerate(words):
--               tag = self.tagdict.get(word)
--               if not tag:
--                   features = self._get_features(i, word, context, prev, prev2)
--                   tag = self.model.predict(features)
--               tokens.append((word, tag))
--               prev2 = prev
--               prev = tag
--       return tokens
--   </pre>
tag :: Tag t => Perceptron -> [Sentence] -> [TaggedSentence t]

-- | Tag a single sentence.
tagSentence :: Tag t => Perceptron -> Sentence -> TaggedSentence t

-- | An empty perceptron, used to start training.
emptyPerceptron :: Perceptron
taggerID :: ByteString
readTagger :: Tag t => ByteString -> Maybe (POSTagger t) -> Either String (POSTagger t)


-- | This module aims to make tagging text with parts of speech trivially
--   easy.
--   
--   If you're new to <tt>chatter</tt> and POS-tagging, then I suggest you
--   simply try:
--   
--   <pre>
--   &gt;&gt;&gt; tagger &lt;- defaultTagger
--   
--   &gt;&gt;&gt; tagStr tagger "This is a sample sentence."
--   "This/dt is/bez a/at sample/nn sentence/nn ./."
--   </pre>
--   
--   Note that we used <a>tagStr</a>, instead of <a>tag</a>, or
--   <a>tagText</a>. Many people don't (yet!) use <a>Data.Text</a> by
--   default, so there is a wrapper around <a>tag</a> that packs and
--   unpacks the <a>String</a>. This is innefficient, but it's just to get
--   you started, and <a>tagStr</a> can be very handy when you're debugging
--   a tagger in ghci (or cabal repl).
--   
--   <a>tag</a> exposes more details of the tokenization and tagging, since
--   it returns a list of <a>TaggedSentence</a>s, but it doesn't print
--   results as nicely.
module NLP.POS

-- | Tag a chunk of input text with part-of-speech tags, using the sentence
--   splitter, tokenizer, and tagger contained in the <tt>POSTager</tt>.
tag :: Tag t => POSTagger t -> Text -> [TaggedSentence t]

-- | Tag the tokens in a string.
--   
--   Returns a space-separated string of tokens, each token suffixed with
--   the part of speech. For example:
--   
--   <pre>
--   &gt;&gt;&gt; tag tagger "the dog jumped ."
--   "the/at dog/nn jumped/vbd ./."
--   </pre>
tagStr :: Tag t => POSTagger t -> String -> String

-- | Text version of tagStr
tagText :: Tag t => POSTagger t -> Text -> Text

-- | Train a <a>POSTagger</a> on a corpus of sentences.
--   
--   This will recurse through the <a>POSTagger</a> stack, training all the
--   backoff taggers as well. In order to do that, this function has to be
--   generic to the kind of taggers used, so it is not possible to train up
--   a new POSTagger from nothing: <a>train</a> wouldn't know what tagger
--   to create.
--   
--   To get around that restriction, you can use the various
--   <tt>mkTagger</tt> implementations, such as <a>mkTagger</a> or
--   NLP.POS.AvgPerceptronTagger.mkTagger'. For example:
--   
--   <pre>
--   import NLP.POS.AvgPerceptronTagger as APT
--   
--   let newTagger = APT.mkTagger APT.emptyPerceptron Nothing
--   posTgr &lt;- train newTagger trainingExamples
--   </pre>
train :: Tag t => POSTagger t -> [TaggedSentence t] -> IO (POSTagger t)

-- | Train a tagger on string input in the standard form for POS tagged
--   corpora:
--   
--   <pre>
--   trainStr tagger "the/at dog/nn jumped/vbd ./."
--   </pre>
trainStr :: Tag t => POSTagger t -> String -> IO (POSTagger t)

-- | The <a>Text</a> version of <a>trainStr</a>
trainText :: Tag t => POSTagger t -> Text -> IO (POSTagger t)
tagTokens :: Tag t => POSTagger t -> [Sentence] -> [TaggedSentence t]

-- | Evaluate a <tt>POSTager</tt>.
--   
--   Measures accuracy over all tags in the test corpus.
--   
--   Accuracy is calculated as:
--   
--   <pre>
--   |tokens tagged correctly| / |all tokens|
--   </pre>
eval :: Tag t => POSTagger t -> [TaggedSentence t] -> Double
serialize :: Tag t => POSTagger t -> ByteString
deserialize :: Tag t => Map ByteString (ByteString -> Maybe (POSTagger t) -> Either String (POSTagger t)) -> ByteString -> Either String (POSTagger t)

-- | The default table of tagger IDs to readTagger functions. Each tagger
--   packaged with Chatter should have an entry here. By convention, the
--   IDs use are the fully qualified module name of the tagger package.
taggerTable :: Tag t => Map ByteString (ByteString -> Maybe (POSTagger t) -> Either String (POSTagger t))

-- | Store a <tt>POSTager</tt> to a file.
saveTagger :: Tag t => POSTagger t -> FilePath -> IO ()

-- | Load a tagger, using the interal <a>taggerTable</a>. If you need to
--   specify your own mappings for new composite taggers, you should use
--   <a>deserialize</a>.
--   
--   This function checks the filename to determine if the content should
--   be decompressed. If the file ends with ".gz", then we assume it is a
--   gziped model.
loadTagger :: Tag t => FilePath -> IO (POSTagger t)

-- | A basic POS tagger.
defaultTagger :: IO (POSTagger Tag)

-- | A POS tagger that has been trained on the Conll 2000 POS tags.
conllTagger :: IO (POSTagger Tag)

-- | A POS tagger trained on a subset of the Brown corpus.
brownTagger :: IO (POSTagger Tag)


-- | NLP.Chunk aims to make phrasal chunking trivially easy -- it is the
--   corolary to NLP.POS.
--   
--   The simplest way to try out chunking with Chatter is to open a repl
--   after installing chatter and try this:
--   
--   <pre>
--   &gt; import NLP.POS
--   &gt; import NLP.Chunk
--   &gt; tgr &lt;- defaultTagger
--   &gt; chk &lt;- defaultChunker
--   &gt; chunkText tgr chk "Monads are monoids in the category of endofunctors."
--    "[NP Monads/NNS are/VBP monoids/NNS] [PP in/IN] [NP the/DT category/NN] [PP of/IN] [NP endofunctors/NNS] ./."
--   </pre>
--   
--   Note that it isn't perfect--phrase chunking is tricky, and the
--   <tt>defaultTagger</tt> and <a>defaultChunker</a> aren't trained on the
--   largest training set (they use Conll 2000). You can easily train more
--   taggers and chunkers using the APIs exposed here if you have the
--   training data to do so.
module NLP.Chunk

-- | A basic Phrasal chunker.
defaultChunker :: IO (Chunker Chunk Tag)

-- | Convenient function to load the Conll2000 Chunker.
conllChunker :: IO (Chunker Chunk Tag)

-- | Train a chunker on a set of additional examples.
train :: (ChunkTag c, Tag t) => Chunker c t -> [ChunkedSentence c t] -> IO (Chunker c t)

-- | Chunk a <a>TaggedSentence</a> that has been produced by a Chatter
--   tagger, producing a rich representation of the Chunks and the Tags
--   detected.
--   
--   If you just want to see chunked output from standard text, you
--   probably want <a>chunkText</a> or <a>chunkStr</a>.
chunk :: (ChunkTag c, Tag t) => Chunker c t -> [TaggedSentence t] -> [ChunkedSentence c t]

-- | Convenience funciton to Tokenize, POS-tag, then Chunk the provided
--   text, and format the result in an easy-to-read format.
--   
--   <pre>
--   &gt; tgr &lt;- defaultTagger
--   &gt; chk &lt;- defaultChunker
--   &gt; chunkText tgr chk "The brown dog jumped over the lazy cat."
--   "[NP The/DT brown/NN dog/NN] [VP jumped/VBD] [NP over/IN the/DT lazy/JJ cat/NN] ./."
--   </pre>
chunkText :: (ChunkTag c, Tag t) => POSTagger t -> Chunker c t -> Text -> Text

-- | A wrapper around <a>chunkText</a> that packs strings.
chunkStr :: (ChunkTag c, Tag t) => POSTagger t -> Chunker c t -> String -> String

-- | The default table of tagger IDs to readTagger functions. Each tagger
--   packaged with Chatter should have an entry here. By convention, the
--   IDs use are the fully qualified module name of the tagger package.
chunkerTable :: (ChunkTag c, Tag t) => Map ByteString (ByteString -> Either String (Chunker c t))

-- | Store a <a>Chunker</a> to disk.
saveChunker :: (ChunkTag c, Tag t) => Chunker c t -> FilePath -> IO ()

-- | Load a <a>Chunker</a> from disk, optionally gunzipping if needed.
--   (based on file extension)
loadChunker :: (ChunkTag c, Tag t) => FilePath -> IO (Chunker c t)
serialize :: (ChunkTag c, Tag t) => Chunker c t -> ByteString
deserialize :: (ChunkTag c, Tag t) => Map ByteString (ByteString -> Either String (Chunker c t)) -> ByteString -> Either String (Chunker c t)


-- | A parser for the Wiki NER work presented in:
--   
--   @Article{nothman2012:artint:wikiner, author = {Joel Nothman and Nicky
--   Ringland and Will Radford and Tara Murphy and James R. Curran}, title
--   = {Learning multilingual named entity recognition from {Wikipedia}},
--   journal = {Artificial Intelligence}, publisher = {Elsevier}, volume =
--   {194}, pages = {151--175}, year = {2012}, doi =
--   {10.1016/j.artint.2012.03.006}, url =
--   {http:/<i>dx.doi.org</i>10.1016/j.artint.2012.03.006} }
--   
--   And provided here:
--   <a>http://schwa.org/projects/resources/wiki/Wikiner</a>
--   
--   The format does not appear to be documented, but it looks like:
--   
--   <ul>
--   <li>One sentence per line.</li>
--   <li>Tagged tokens are separated by spaces</li>
--   <li>Items in a tagged token are separated by vertical bars ('|')</li>
--   <li>Each line of <tt>n</tt> text tokens contains 3*n items, starting
--   with a text token, a POS tag, then a IOB tag with one of the NER
--   classes</li>
--   </ul>
--   
--   For example, the sentence: The Oxford Companion to Philosophy says,
--   "there is no single defining position that all anarchists hold, and
--   those considered anarchists at best sharae a certain family
--   resemblance."
--   
--   Is rendered as: The|DT|I-MISC Oxford|NNP|I-MISC Companion|NNP|I-MISC
--   to|TO|I-MISC Philosophy|NNP|I-MISC says|VBZ|O ,|,|O "|LQU|O there|EX|O
--   is|VBZ|O no|DT|O single|JJ|O defining|VBG|O position|NN|O that|IN|O
--   all|DT|O anarchists|NNS|O hold|VBP|O ,|,|O and|CC|O those|DT|O
--   considered|VBN|O anarchists|NNS|O at|IN|O best|JJS|O share|NN|O a|DT|O
--   certain|JJ|O family|NN|O resemblance|NN|O .|.|O "|RQU|O
--   
--   This module also provides a trained model for NER via the averaged
--   perceptron chunker. This actually kindof works, which is a bit
--   amazing. For example:
--   
--   <pre>
--   import NLP.Corpora.WikiNer
--   import NLP.POS
--   import NLP.Chunk
--   tgr &lt;- defaultTagger
--   chk &lt;- wikiNerChunker
--   chunkText tgr chk "Real World Haskell is a book created by Don Stewart, Bryan O'Sullivan, and Jon Goerzen."
--   "[ORG Real/NNP] [MISC World/NNP] [PER Haskell/NNP] is/VBZ a/DT book/NN created/VBN by/IN [PER Don/NNP Stewart/NNP] ,/, [PER Bryan/NNP O'Sullivan/NNP] ,/, and/CC [PER Jon/NNP Goerzen/NNP] ./."
--   </pre>
module NLP.Corpora.WikiNer
parseWikiNer :: Text -> Either Error [[IOBChunk Chunk Tag]]

-- | Train a chunker on a provided corpus.
trainChunker :: [FilePath] -> IO (Chunker Chunk Tag)
wikiNerChunker :: IO (Chunker Chunk Tag)

-- | Different classes of Named Entity used in the WikiNER data set.
data Chunk
LOC :: Chunk
MISC :: Chunk
ORG :: Chunk
PER :: Chunk

-- | "out" not a chunk.
C_O :: Chunk
instance Read Chunk
instance Show Chunk
instance Ord Chunk
instance Eq Chunk
instance Generic Chunk
instance Enum Chunk
instance Bounded Chunk
instance Datatype D1Chunk
instance Constructor C1_0Chunk
instance Constructor C1_1Chunk
instance Constructor C1_2Chunk
instance Constructor C1_3Chunk
instance Constructor C1_4Chunk
instance ChunkTag Chunk
instance Serialize Chunk
instance Arbitrary Chunk