-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | A library of simple NLP algorithms.
--
-- chatter is a collection of simple Natural Language Processing
-- algorithms.
--
-- Chatter supports:
--
--
@package chatter
@version 0.3.0.0
module Data.DefaultMap
-- | Defaulting Map; a Map that returns a default value when queried for a
-- key that does not exist.
data DefaultMap k v
DefMap :: v -> Map k v -> DefaultMap k v
defDefault :: DefaultMap k v -> v
defMap :: DefaultMap k v -> Map k v
-- | Create an empty DefaultMap
empty :: v -> DefaultMap k v
-- | Query the map for a value. Returns the default if the key is not
-- found.
lookup :: Ord k => k -> DefaultMap k v -> v
-- | Create a DefaultMap from a default value and a list.
fromList :: Ord k => v -> [(k, v)] -> DefaultMap k v
-- | Access the keys as a list.
keys :: DefaultMap k a -> [k]
-- | Fold over the values in the map.
--
-- Note that this *does* not fold over the default value -- this fold
-- behaves in the same way as a standard foldl
foldl :: (a -> b -> a) -> a -> DefaultMap k b -> a
instance (Ord k, Read k, Read v) => Read (DefaultMap k v)
instance (Show k, Show v) => Show (DefaultMap k v)
instance (Eq k, Eq v) => Eq (DefaultMap k v)
instance (Ord k, Ord v) => Ord (DefaultMap k v)
instance Generic (DefaultMap k v)
instance Datatype D1DefaultMap
instance Constructor C1_0DefaultMap
instance Selector S1_0_0DefaultMap
instance Selector S1_0_1DefaultMap
instance (NFData k, NFData v, Ord k) => NFData (DefaultMap k v)
instance (Ord k, Serialize k, Serialize v) => Serialize (DefaultMap k v)
-- | Utilities for reading mailman-style email archives.
module NLP.Corpora.Email
-- | Path to the directory containing all the PLUG archives.
plugDataPath :: FilePath
plugArchiveText :: IO [Text]
plugArchiveTokens :: IO [[Text]]
fullPlugArchive :: IO [Message]
readF :: FilePath -> IO Text
module NLP.Types.General
-- | Just a handy alias for Text
type Error = Text
-- | Boolean type to indicate case sensitivity for textual comparisons.
data CaseSensitive
Sensitive :: CaseSensitive
Insensitive :: CaseSensitive
instance Read CaseSensitive
instance Show CaseSensitive
instance Generic CaseSensitive
instance Datatype D1CaseSensitive
instance Constructor C1_0CaseSensitive
instance Constructor C1_1CaseSensitive
instance Serialize CaseSensitive
module NLP.Types.Tags
class (Ord a, Eq a, Read a, Show a, Generic a, Serialize a) => ChunkTag a
fromChunk :: ChunkTag a => a -> Text
class (Ord a, Eq a, Read a, Show a, Generic a, Serialize a) => Tag a
fromTag :: Tag a => a -> Text
parseTag :: Tag a => Text -> a
tagUNK :: Tag a => a
tagTerm :: Tag a => a -> Text
newtype RawChunk
RawChunk :: Text -> RawChunk
newtype RawTag
RawTag :: Text -> RawTag
-- | Tag instance for unknown tagsets.
instance Ord RawChunk
instance Eq RawChunk
instance Read RawChunk
instance Show RawChunk
instance Generic RawChunk
instance Ord RawTag
instance Eq RawTag
instance Read RawTag
instance Show RawTag
instance Generic RawTag
instance Datatype D1RawChunk
instance Constructor C1_0RawChunk
instance Datatype D1RawTag
instance Constructor C1_0RawTag
instance Serialize Text
instance Arbitrary RawTag
instance Tag RawTag
instance Serialize RawTag
instance ChunkTag RawChunk
instance Serialize RawChunk
-- | The internal implementation of critical types in terms of the Brown
-- corpus.
module NLP.Corpora.Brown
data Tag
-- | (
Op_Paren :: Tag
-- | )
Cl_Paren :: Tag
-- |
Negator :: Tag
-- | ,
Comma :: Tag
-- |
Dash :: Tag
-- | . Sentence Terminator
Term :: Tag
-- | :
Colon :: Tag
-- | determiner/pronoun, pre-qualifier e.g.; quite such rather
ABL :: Tag
-- | determiner/pronoun, pre-quantifier e.g.; all half many nary
ABN :: Tag
-- | determiner/pronoun, double conjunction or pre-quantifier both
ABX :: Tag
-- | determiner/pronoun, post-determiner many other next more last former
-- little several enough most least only very few fewer past same Last
-- latter less single plenty 'nough lesser certain various manye
-- next-to-last particular final previous present nuf
AP :: Tag
-- | determiner/pronoun, post-determiner, genitive e.g.; other's
APdollar :: Tag
-- | determiner/pronoun, post-determiner, hyphenated pair e.g.; many-much
AP_pl_AP :: Tag
-- | article e.g.; the an no a every th' ever' ye
AT :: Tag
-- | verb "to be", infinitive or imperative e.g.; be
BE :: Tag
-- | verb "to be", past tense, 2nd person singular or all persons plural
-- e.g.; were
BED :: Tag
-- | verb "to be", past tense, 2nd person singular or all persons plural,
-- negated e.g.; weren't
BEDstar :: Tag
-- | verb "to be", past tense, 1st and 3rd person singular e.g.; was
BEDZ :: Tag
-- | verb "to be", past tense, 1st and 3rd person singular, negated e.g.;
-- wasn't
BEDZstar :: Tag
-- | verb "to be", present participle or gerund e.g.; being
BEG :: Tag
-- | verb "to be", present tense, 1st person singular e.g.; am
BEM :: Tag
-- | verb "to be", present tense, 1st person singular, negated e.g.; ain't
BEMstar :: Tag
-- | verb "to be", past participle e.g.; been
BEN :: Tag
-- | verb "to be", present tense, 2nd person singular or all persons plural
-- e.g.; are art
BER :: Tag
-- | verb "to be", present tense, 2nd person singular or all persons
-- plural, negated e.g.; aren't ain't
BERstar :: Tag
-- | verb "to be", present tense, 3rd person singular e.g.; is
BEZ :: Tag
-- | verb "to be", present tense, 3rd person singular, negated e.g.; isn't
-- ain't
BEZstar :: Tag
-- | conjunction, coordinating e.g.; and or but plus & either neither
-- nor yet n and/or minus an'
CC :: Tag
-- | numeral, cardinal e.g.; two one 1 four 2 1913 71 74 637 1937 8 five
-- three million 87-31 29-5 seven 1,119 fifty-three 7.5 billion hundred
-- 125,000 1,700 60 100 six ...
CD :: Tag
-- | numeral, cardinal, genitive e.g.; 1960's 1961's .404's
CDdollar :: Tag
-- | conjunction, subordinating e.g.; that as after whether before while
-- like because if since for than altho until so unless though providing
-- once lest sposin till whereas whereupon supposing tho' albeit
-- then so's 'fore
CS :: Tag
-- | verb "to do", uninflected present tense, infinitive or imperative
-- e.g.; do dost
DO :: Tag
-- | verb "to do", uninflected present tense or imperative, negated e.g.;
-- don't
DOstar :: Tag
-- | verb "to do", past or present tense + pronoun, personal, nominative,
-- not 3rd person singular e.g.; d'you
DO_pl_PPSS :: Tag
-- | verb "to do", past tense e.g.; did done
DOD :: Tag
-- | verb "to do", past tense, negated e.g.; didn't
DODstar :: Tag
-- | verb "to do", present tense, 3rd person singular e.g.; does
DOZ :: Tag
-- | verb "to do", present tense, 3rd person singular, negated e.g.;
-- doesn't don't
DOZstar :: Tag
-- | determiner/pronoun, singular e.g.; this each another that 'nother
DT :: Tag
-- | determiner/pronoun, singular, genitive e.g.; another's
DTdollar :: Tag
-- | determiner/pronoun + verb "to be", present tense, 3rd person singular
-- e.g.; that's
DT_pl_BEZ :: Tag
-- | determiner/pronoun + modal auxillary e.g.; that'll this'll
DT_pl_MD :: Tag
-- | determiner/pronoun, singular or plural e.g.; any some
DTI :: Tag
-- | determiner/pronoun, plural e.g.; these those them
DTS :: Tag
-- | pronoun, plural + verb "to be", present tense, 3rd person singular
-- e.g.; them's
DTS_pl_BEZ :: Tag
-- | determiner, pronoun or double conjunction e.g.; neither either one
DTX :: Tag
-- | existential there e.g.; there
EX :: Tag
-- | existential there + verb "to be", present tense, 3rd person singular
-- e.g.; there's
EX_pl_BEZ :: Tag
-- | existential there + verb "to have", past tense e.g.; there'd
EX_pl_HVD :: Tag
-- | existential there + verb "to have", present tense, 3rd person singular
-- e.g.; there's
EX_pl_HVZ :: Tag
-- | existential there + modal auxillary e.g.; there'll there'd
EX_pl_MD :: Tag
-- | foreign word: negator e.g.; pas non ne
FW_star :: Tag
-- | foreign word: article e.g.; la le el un die der ein keine eine das las
-- les Il
FW_AT :: Tag
-- | foreign word: article + noun, singular, common e.g.; l'orchestre
-- l'identite l'arcade l'ange l'assistance l'activite L'Universite
-- l'independance L'Union L'Unita l'osservatore
FW_AT_pl_NN :: Tag
-- | foreign word: article + noun, singular, proper e.g.; L'Astree
-- L'Imperiale
FW_AT_pl_NP :: Tag
-- | foreign word: verb "to be", infinitive or imperative e.g.; sit
FW_BE :: Tag
-- | foreign word: verb "to be", present tense, 2nd person singular or all
-- persons plural e.g.; sind sunt etes
FW_BER :: Tag
-- | foreign word: verb "to be", present tense, 3rd person singular e.g.;
-- ist est
FW_BEZ :: Tag
-- | foreign word: conjunction, coordinating e.g.; et ma mais und aber och
-- nec y
FW_CC :: Tag
-- | foreign word: numeral, cardinal e.g.; une cinq deux sieben unam zwei
FW_CD :: Tag
-- | foreign word: conjunction, subordinating e.g.; bevor quam ma
FW_CS :: Tag
-- | foreign word: determiner/pronoun, singular e.g.; hoc
FW_DT :: Tag
-- | foreign word: determiner + verb "to be", present tense, 3rd person
-- singular e.g.; c'est
FW_DT_pl_BEZ :: Tag
-- | foreign word: determiner/pronoun, plural e.g.; haec
FW_DTS :: Tag
-- | foreign word: verb "to have", present tense, not 3rd person singular
-- e.g.; habe
FW_HV :: Tag
-- | foreign word: preposition e.g.; ad de en a par con dans ex von auf
-- super post sine sur sub avec per inter sans pour pendant in di
FW_IN :: Tag
-- | foreign word: preposition + article e.g.; della des du aux zur d'un
-- del dell'
FW_IN_pl_AT :: Tag
-- | foreign word: preposition + noun, singular, common e.g.; d'etat
-- d'hotel d'argent d'identite d'art
FW_IN_pl_NN :: Tag
-- | foreign word: preposition + noun, singular, proper e.g.; d'Yquem
-- d'Eiffel
FW_IN_pl_NP :: Tag
-- | foreign word: adjective e.g.; avant Espagnol sinfonica Siciliana
-- Philharmonique grand publique haute noire bouffe Douce meme humaine
-- bel serieuses royaux anticus presto Sovietskaya Bayerische comique
-- schwarzen ...
FW_JJ :: Tag
-- | foreign word: adjective, comparative e.g.; fortiori
FW_JJR :: Tag
-- | foreign word: adjective, superlative e.g.; optimo
FW_JJT :: Tag
-- | foreign word: noun, singular, common e.g.; ballet esprit ersatz mano
-- chatte goutte sang Fledermaus oud def kolkhoz roi troika canto boite
-- blutwurst carne muzyka bonheur monde piece force ...
FW_NN :: Tag
-- | foreign word: noun, singular, common, genitive e.g.; corporis
-- intellectus arte's dei aeternitatis senioritatis curiae patronne's
-- chambre's
FW_NNdollar :: Tag
-- | foreign word: noun, plural, common e.g.; al culpas vopos boites haflis
-- kolkhozes augen tyrannis alpha-beta-gammas metis banditos rata phis
-- negociants crus Einsatzkommandos kamikaze wohaws sabinas zorrillas
-- palazzi engages coureurs corroborees yori Ubermenschen ...
FW_NNS :: Tag
-- | foreign word: noun, singular, proper e.g.; Karshilama Dieu Rundfunk
-- Afrique Espanol Afrika Spagna Gott Carthago deus
FW_NP :: Tag
-- | foreign word: noun, plural, proper e.g.; Svenskarna Atlantes Dieux
FW_NPS :: Tag
-- | foreign word: noun, singular, adverbial e.g.; heute morgen aujourd'hui
-- hoy
FW_NR :: Tag
-- | foreign word: numeral, ordinal e.g.; 18e 17e quintus
FW_OD :: Tag
-- | foreign word: pronoun, nominal e.g.; hoc
FW_PN :: Tag
-- | foreign word: determiner, possessive e.g.; mea mon deras vos
FW_PPdollar :: Tag
-- | foreign word: pronoun, singular, reflexive e.g.; se
FW_PPL :: Tag
-- | foreign word: pronoun, singular, reflexive + verb, present tense, 3rd
-- person singular e.g.; s'excuse s'accuse
FW_PPL_pl_VBZ :: Tag
-- | pronoun, personal, accusative e.g.; lui me moi mi
FW_PPO :: Tag
-- | foreign word: pronoun, personal, accusative + preposition e.g.; mecum
-- tecum
FW_PPO_pl_IN :: Tag
-- | foreign word: pronoun, personal, nominative, 3rd person singular e.g.;
-- il
FW_PPS :: Tag
-- | foreign word: pronoun, personal, nominative, not 3rd person singular
-- e.g.; ich vous sie je
FW_PPSS :: Tag
-- | foreign word: pronoun, personal, nominative, not 3rd person singular +
-- verb "to have", present tense, not 3rd person singular e.g.; j'ai
FW_PPSS_pl_HV :: Tag
-- | foreign word: qualifier e.g.; minus
FW_QL :: Tag
-- | foreign word: adverb e.g.; bas assai deja um wiederum cito velociter
-- vielleicht simpliciter non zu domi nuper sic forsan olim oui semper
-- tout despues hors
FW_RB :: Tag
-- | foreign word: adverb + conjunction, coordinating e.g.; forisque
FW_RB_pl_CC :: Tag
-- | foreign word: infinitival to + verb, infinitive e.g.; d'entretenir
FW_TO_pl_VB :: Tag
-- | foreign word: interjection e.g.; sayonara bien adieu arigato bonjour
-- adios bueno tchalo ciao o
FW_UH :: Tag
-- | foreign word: verb, present tense, not 3rd person singular, imperative
-- or infinitive e.g.; nolo contendere vive fermate faciunt esse vade
-- noli tangere dites duces meminisse iuvabit gosaimasu voulez habla
-- ksuu'peliafo lacheln miuchi say allons strafe portant
FW_VB :: Tag
-- | foreign word: verb, past tense e.g.; stabat peccavi audivi
FW_VBD :: Tag
-- | foreign word: verb, present participle or gerund e.g.; nolens volens
-- appellant seq. obliterans servanda dicendi delenda
FW_VBG :: Tag
-- | foreign word: verb, past participle e.g.; vue verstrichen rasa
-- verboten engages
FW_VBN :: Tag
-- | foreign word: verb, present tense, 3rd person singular e.g.; gouverne
-- sinkt sigue diapiace
FW_VBZ :: Tag
-- | foreign word: WH-determiner e.g.; quo qua quod que quok
FW_WDT :: Tag
-- | foreign word: WH-pronoun, accusative e.g.; quibusdam
FW_WPO :: Tag
-- | foreign word: WH-pronoun, nominative e.g.; qui
FW_WPS :: Tag
-- | verb "to have", uninflected present tense, infinitive or imperative
-- e.g.; have hast
HV :: Tag
-- | verb "to have", uninflected present tense or imperative, negated e.g.;
-- haven't ain't
HVstar :: Tag
-- | verb "to have", uninflected present tense + infinitival to e.g.; hafta
HV_pl_TO :: Tag
-- | verb "to have", past tense e.g.; had
HVD :: Tag
-- | verb "to have", past tense, negated e.g.; hadn't
HVDstar :: Tag
-- | verb "to have", present participle or gerund e.g.; having
HVG :: Tag
-- | verb "to have", past participle e.g.; had
HVN :: Tag
-- | verb "to have", present tense, 3rd person singular e.g.; has hath
HVZ :: Tag
-- | verb "to have", present tense, 3rd person singular, negated e.g.;
-- hasn't ain't
HVZstar :: Tag
-- | preposition e.g.; of in for by considering to on among at through with
-- under into regarding than since despite according per before toward
-- against as after during including between without except upon out over
-- ...
IN :: Tag
-- | preposition, hyphenated pair e.g.; f'ovuh
IN_pl_IN :: Tag
-- | preposition + pronoun, personal, accusative e.g.; t'hi-im
IN_pl_PPO :: Tag
-- | adjective e.g.; recent over-all possible hard-fought favorable hard
-- meager fit such widespread outmoded inadequate ambiguous grand
-- clerical effective orderly federal foster general proportionate ...
JJ :: Tag
-- | adjective, genitive e.g.; Great's
JJdollar :: Tag
-- | adjective, hyphenated pair e.g.; big-large long-far
JJ_pl_JJ :: Tag
-- | adjective, comparative e.g.; greater older further earlier later freer
-- franker wider better deeper firmer tougher faster higher bigger worse
-- younger lighter nicer slower happier frothier Greater newer Elder ...
JJR :: Tag
-- | adjective + conjunction, coordinating e.g.; lighter'n
JJR_pl_CS :: Tag
-- | adjective, semantically superlative e.g.; top chief principal
-- northernmost master key head main tops utmost innermost foremost
-- uppermost paramount topmost
JJS :: Tag
-- | adjective, superlative e.g.; best largest coolest calmest latest
-- greatest earliest simplest strongest newest fiercest unhappiest worst
-- youngest worthiest fastest hottest fittest lowest finest smallest
-- staunchest ...
JJT :: Tag
-- | modal auxillary e.g.; should may might will would must can could shall
-- ought need wilt
MD :: Tag
-- | modal auxillary, negated e.g.; cannot couldn't wouldn't can't won't
-- shouldn't shan't mustn't musn't
MDstar :: Tag
-- | modal auxillary + verb "to have", uninflected form e.g.; shouldda
-- musta coulda must've woulda could've
MD_pl_HV :: Tag
-- | modal auxillary + pronoun, personal, nominative, not 3rd person
-- singular e.g.; willya
MD_pl_PPSS :: Tag
-- | modal auxillary + infinitival to e.g.; oughta
MD_pl_TO :: Tag
-- | noun, singular, common e.g.; failure burden court fire appointment
-- awarding compensation Mayor interim committee fact effect airport
-- management surveillance jail doctor intern extern night weekend duty
-- legislation Tax Office ...
NN :: Tag
-- | noun, singular, common, genitive e.g.; season's world's player's
-- night's chapter's golf's football's baseball's club's U.'s coach's
-- bride's bridegroom's board's county's firm's company's
-- superintendent's mob's Navy's ...
NNdollar :: Tag
-- | noun, singular, common + verb "to be", present tense, 3rd person
-- singular e.g.; water's camera's sky's kid's Pa's heat's throat's
-- father's money's undersecretary's granite's level's wife's fat's
-- Knife's fire's name's hell's leg's sun's roulette's cane's guy's
-- kind's baseball's ...
NN_pl_BEZ :: Tag
-- | noun, singular, common + verb "to have", past tense e.g.; Pa'd
NN_pl_HVD :: Tag
-- | noun, singular, common + verb "to have", present tense, 3rd person
-- singular e.g.; guy's Knife's boat's summer's rain's company's
NN_pl_HVZ :: Tag
-- | noun, singular, common + preposition e.g.; buncha
NN_pl_IN :: Tag
-- | noun, singular, common + modal auxillary e.g.; cowhand'd sun'll
NN_pl_MD :: Tag
-- | noun, singular, common, hyphenated pair e.g.; stomach-belly
NN_pl_NN :: Tag
-- | noun, plural, common e.g.; irregularities presentments thanks reports
-- voters laws legislators years areas adjustments chambers $100 bonds
-- courts sales details raises sessions members congressmen votes polls
-- calls ...
NNS :: Tag
-- | noun, plural, common, genitive e.g.; taxpayers' children's members'
-- States' women's cutters' motorists' steelmakers' hours' Nations'
-- lawyers' prisoners' architects' tourists' Employers' secretaries'
-- Rogues' ...
NNSdollar :: Tag
-- | noun, plural, common + modal auxillary e.g.; duds'd oystchers'll
NNS_pl_MD :: Tag
-- | noun, singular, proper e.g.; Fulton Atlanta September-October Durwood
-- Pye Ivan Allen Jr. Jan. Alpharetta Grady William B. Hartsfield Pearl
-- Williams Aug. Berry J. M. Cheshire Griffin Opelika Ala. E. Pelham
-- Snodgrass ...
NP :: Tag
-- | noun, singular, proper, genitive e.g.; Green's Landis' Smith's
-- Carreon's Allison's Boston's Spahn's Willie's Mickey's Milwaukee's
-- Mays' Howsam's Mantle's Shaw's Wagner's Rickey's Shea's Palmer's
-- Arnold's Broglio's ...
NPdollar :: Tag
-- | noun, singular, proper + verb "to be", present tense, 3rd person
-- singular e.g.; W.'s Ike's Mack's Jack's Kate's Katharine's Black's
-- Arthur's Seaton's Buckhorn's Breed's Penny's Rob's Kitty's Blackwell's
-- Myra's Wally's Lucille's Springfield's Arlene's
NP_pl_BEZ :: Tag
-- | noun, singular, proper + verb "to have", present tense, 3rd person
-- singular e.g.; Bill's Guardino's Celie's Skolman's Crosson's Tim's
-- Wally's
NP_pl_HVZ :: Tag
-- | noun, singular, proper + modal auxillary e.g.; Gyp'll John'll
NP_pl_MD :: Tag
-- | noun, plural, proper e.g.; Chases Aderholds Chapelles Armisteads
-- Lockies Carbones French Marskmen Toppers Franciscans Romans Cadillacs
-- Masons Blacks Catholics British Dixiecrats Mississippians Congresses
-- ...
NPS :: Tag
-- | noun, plural, proper, genitive e.g.; Republicans' Orioles' Birds'
-- Yanks' Redbirds' Bucs' Yankees' Stevenses' Geraghtys' Burkes' Wackers'
-- Achaeans' Dresbachs' Russians' Democrats' Gershwins' Adventists'
-- Negroes' Catholics' ...
NPSdollar :: Tag
-- | noun, singular, adverbial e.g.; Friday home Wednesday Tuesday Monday
-- Sunday Thursday yesterday tomorrow tonight West East Saturday west
-- left east downtown north northeast southeast northwest North South
-- right ...
NR :: Tag
-- | noun, singular, adverbial, genitive e.g.; Saturday's Monday's
-- yesterday's tonight's tomorrow's Sunday's Wednesday's Friday's today's
-- Tuesday's West's Today's South's
NRdollar :: Tag
-- | noun, singular, adverbial + modal auxillary e.g.; today'll
NR_pl_MD :: Tag
-- | noun, plural, adverbial e.g.; Sundays Mondays Saturdays Wednesdays
-- Souths Fridays
NRS :: Tag
-- | numeral, ordinal e.g.; first 13th third nineteenth 2d 61st second
-- sixth eighth ninth twenty-first eleventh 50th eighteenth- Thirty-ninth
-- 72nd 1/20th twentieth mid-19th thousandth 350th sixteenth 701st ...
OD :: Tag
-- | pronoun, nominal e.g.; none something everything one anyone nothing
-- nobody everybody everyone anybody anything someone no-one nothin
PN :: Tag
-- | pronoun, nominal, genitive e.g.; one's someone's anybody's nobody's
-- everybody's anyone's everyone's
PNdollar :: Tag
-- | pronoun, nominal + verb "to be", present tense, 3rd person singular
-- e.g.; nothing's everything's somebody's nobody's someone's
PN_pl_BEZ :: Tag
-- | pronoun, nominal + verb "to have", past tense e.g.; nobody'd
PN_pl_HVD :: Tag
-- | pronoun, nominal + verb "to have", present tense, 3rd person singular
-- e.g.; nobody's somebody's one's
PN_pl_HVZ :: Tag
-- | pronoun, nominal + modal auxillary e.g.; someone'll somebody'll
-- anybody'd
PN_pl_MD :: Tag
-- | determiner, possessive e.g.; our its his their my your her out thy
-- mine thine
PPdollar :: Tag
-- | pronoun, possessive e.g.; ours mine his hers theirs yours
PPdollardollar :: Tag
-- | pronoun, singular, reflexive e.g.; itself himself myself yourself
-- herself oneself ownself
PPL :: Tag
-- | pronoun, plural, reflexive e.g.; themselves ourselves yourselves
PPLS :: Tag
-- | pronoun, personal, accusative e.g.; them it him me us you 'em her thee
-- we'uns
PPO :: Tag
-- | pronoun, personal, nominative, 3rd person singular e.g.; it he she
-- thee
PPS :: Tag
-- | pronoun, personal, nominative, 3rd person singular + verb "to be",
-- present tense, 3rd person singular e.g.; it's he's she's
PPS_pl_BEZ :: Tag
-- | pronoun, personal, nominative, 3rd person singular + verb "to have",
-- past tense e.g.; she'd he'd it'd
PPS_pl_HVD :: Tag
-- | pronoun, personal, nominative, 3rd person singular + verb "to have",
-- present tense, 3rd person singular e.g.; it's he's she's
PPS_pl_HVZ :: Tag
-- | pronoun, personal, nominative, 3rd person singular + modal auxillary
-- e.g.; he'll she'll it'll he'd it'd she'd
PPS_pl_MD :: Tag
-- | pronoun, personal, nominative, not 3rd person singular e.g.; they we I
-- you ye thou you'uns
PPSS :: Tag
-- | pronoun, personal, nominative, not 3rd person singular + verb "to be",
-- present tense, 1st person singular e.g.; I'm Ahm
PPSS_pl_BEM :: Tag
-- | pronoun, personal, nominative, not 3rd person singular + verb "to be",
-- present tense, 2nd person singular or all persons plural e.g.; we're
-- you're they're
PPSS_pl_BER :: Tag
-- | pronoun, personal, nominative, not 3rd person singular + verb "to be",
-- present tense, 3rd person singular e.g.; you's
PPSS_pl_BEZ :: Tag
-- | pronoun, personal, nominative, not 3rd person singular + verb "to be",
-- present tense, 3rd person singular, negated e.g.; taint
PPSS_pl_BEZstar :: Tag
-- | pronoun, personal, nominative, not 3rd person singular + verb "to
-- have", uninflected present tense e.g.; I've we've they've you've
PPSS_pl_HV :: Tag
-- | pronoun, personal, nominative, not 3rd person singular + verb "to
-- have", past tense e.g.; I'd you'd we'd they'd
PPSS_pl_HVD :: Tag
-- | pronoun, personal, nominative, not 3rd person singular + modal
-- auxillary e.g.; you'll we'll I'll we'd I'd they'll they'd you'd
PPSS_pl_MD :: Tag
-- | pronoun, personal, nominative, not 3rd person singular + verb "to
-- verb", uninflected present tense e.g.; y'know
PPSS_pl_VB :: Tag
-- | qualifier, pre e.g.; well less very most so real as highly
-- fundamentally even how much remarkably somewhat more completely too
-- thus ill deeply little overly halfway almost impossibly far severly
-- such ...
QL :: Tag
-- | qualifier, post e.g.; indeed enough still 'nuff
QLP :: Tag
-- | adverb e.g.; only often generally also nevertheless upon together back
-- newly no likely meanwhile near then heavily there apparently yet
-- outright fully aside consistently specifically formally ever just ...
RB :: Tag
-- | adverb, genitive e.g.; else's
RBdollar :: Tag
-- | adverb + verb "to be", present tense, 3rd person singular e.g.; here's
-- there's
RB_pl_BEZ :: Tag
-- | adverb + conjunction, coordinating e.g.; well's soon's
RB_pl_CS :: Tag
-- | adverb, comparative e.g.; further earlier better later higher tougher
-- more harder longer sooner less faster easier louder farther oftener
-- nearer cheaper slower tighter lower worse heavier quicker ...
RBR :: Tag
-- | adverb, comparative + conjunction, coordinating e.g.; more'n
RBR_pl_CS :: Tag
-- | adverb, superlative e.g.; most best highest uppermost nearest
-- brightest hardest fastest deepest farthest loudest ...
RBT :: Tag
-- | adverb, nominal e.g.; here afar then
RN :: Tag
-- | adverb, particle e.g.; up out off down over on in about through across
-- after
RP :: Tag
-- | adverb, particle + preposition e.g.; out'n outta
RP_pl_IN :: Tag
-- | infinitival to e.g.; to t'
TO :: Tag
-- | infinitival to + verb, infinitive e.g.; t'jawn t'lah
TO_pl_VB :: Tag
-- | interjection e.g.; Hurrah bang whee hmpf ah goodbye oops
-- oh-the-pain-of-it ha crunch say oh why see well hello lo alas
-- tarantara rum-tum-tum gosh hell keerist Jesus Keeeerist boy c'mon 'mon
-- goddamn bah hoo-pig damn ...
UH :: Tag
-- | verb, base: uninflected present, imperative or infinitive e.g.;
-- investigate find act follow inure achieve reduce take remedy re-set
-- distribute realize disable feel receive continue place protect
-- eliminate elaborate work permit run enter force ...
VB :: Tag
-- | verb, base: uninflected present or infinitive + article e.g.; wanna
VB_pl_AT :: Tag
-- | verb, base: uninflected present, imperative or infinitive +
-- preposition e.g.; lookit
VB_pl_IN :: Tag
-- | verb, base: uninflected present, imperative or infinitive + adjective
-- e.g.; die-dead
VB_pl_JJ :: Tag
-- | verb, uninflected present tense + pronoun, personal, accusative e.g.;
-- let's lemme gimme
VB_pl_PPO :: Tag
-- | verb, imperative + adverbial particle e.g.; g'ahn c'mon
VB_pl_RP :: Tag
-- | verb, base: uninflected present, imperative or infinitive +
-- infinitival to e.g.; wanta wanna
VB_pl_TO :: Tag
-- | verb, base: uninflected present, imperative or infinitive; hypenated
-- pair e.g.; say-speak
VB_pl_VB :: Tag
-- | verb, past tense e.g.; said produced took recommended commented urged
-- found added praised charged listed became announced brought attended
-- wanted voted defeated received got stood shot scheduled feared
-- promised made ...
VBD :: Tag
-- | verb, present participle or gerund e.g.; modernizing improving
-- purchasing Purchasing lacking enabling pricing keeping getting picking
-- entering voting warning making strengthening setting neighboring
-- attending participating moving ...
VBG :: Tag
-- | verb, present participle + infinitival to e.g.; gonna
VBG_pl_TO :: Tag
-- | verb, past participle e.g.; conducted charged won received studied
-- revised operated accepted combined experienced recommended effected
-- granted seen protected adopted retarded notarized selected composed
-- gotten printed ...
VBN :: Tag
-- | verb, past participle + infinitival to e.g.; gotta
VBN_pl_TO :: Tag
-- | verb, present tense, 3rd person singular e.g.; deserves believes
-- receives takes goes expires says opposes starts permits expects thinks
-- faces votes teaches holds calls fears spends collects backs eliminates
-- sets flies gives seeks reads ...
VBZ :: Tag
-- | WH-determiner e.g.; which what whatever whichever whichever-the-hell
WDT :: Tag
-- | WH-determiner + verb "to be", present tense, 2nd person singular or
-- all persons plural e.g.; what're
WDT_pl_BER :: Tag
-- | WH-determiner + verb "to be", present, 2nd person singular or all
-- persons plural + pronoun, personal, nominative, not 3rd person
-- singular e.g.; whaddya
WDT_pl_BER_pl_PP :: Tag
-- | WH-determiner + verb "to be", present tense, 3rd person singular e.g.;
-- what's
WDT_pl_BEZ :: Tag
-- | WH-determiner + verb "to do", uninflected present tense + pronoun,
-- personal, nominative, not 3rd person singular e.g.; whaddya
WDT_pl_DO_pl_PPS :: Tag
-- | WH-determiner + verb "to do", past tense e.g.; what'd
WDT_pl_DOD :: Tag
-- | WH-determiner + verb "to have", present tense, 3rd person singular
-- e.g.; what's
WDT_pl_HVZ :: Tag
-- | WH-pronoun, genitive e.g.; whose whosever
WPdollar :: Tag
-- | WH-pronoun, accusative e.g.; whom that who
WPO :: Tag
-- | WH-pronoun, nominative e.g.; that who whoever whosoever what
-- whatsoever
WPS :: Tag
-- | WH-pronoun, nominative + verb "to be", present, 3rd person singular
-- e.g.; that's who's
WPS_pl_BEZ :: Tag
-- | WH-pronoun, nominative + verb "to have", past tense e.g.; who'd
WPS_pl_HVD :: Tag
-- | WH-pronoun, nominative + verb "to have", present tense, 3rd person
-- singular e.g.; who's that's
WPS_pl_HVZ :: Tag
-- | WH-pronoun, nominative + modal auxillary e.g.; who'll that'd who'd
-- that'll
WPS_pl_MD :: Tag
-- | WH-qualifier e.g.; however how
WQL :: Tag
-- | WH-adverb e.g.; however when where why whereby wherever how whenever
-- whereon wherein wherewith wheare wherefore whereof howsabout
WRB :: Tag
-- | WH-adverb + verb "to be", present, 2nd person singular or all persons
-- plural e.g.; where're
WRB_pl_BER :: Tag
-- | WH-adverb + verb "to be", present, 3rd person singular e.g.; how's
-- where's
WRB_pl_BEZ :: Tag
-- | WH-adverb + verb "to do", present, not 3rd person singular e.g.; howda
WRB_pl_DO :: Tag
-- | WH-adverb + verb "to do", past tense e.g.; where'd how'd
WRB_pl_DOD :: Tag
-- | WH-adverb + verb "to do", past tense, negated e.g.; whyn't
WRB_pl_DODstar :: Tag
-- | WH-adverb + verb "to do", present tense, 3rd person singular e.g.;
-- how's
WRB_pl_DOZ :: Tag
-- | WH-adverb + preposition e.g.; why'n
WRB_pl_IN :: Tag
-- | WH-adverb + modal auxillary e.g.; where'd
WRB_pl_MD :: Tag
-- | Unknown.
Unk :: Tag
data Chunk
-- | Noun Phrase.
C_NP :: Chunk
-- | Verb Phrase.
C_VP :: Chunk
-- | Prepositional Phrase.
C_PP :: Chunk
-- | Clause.
C_CL :: Chunk
instance Read Chunk
instance Show Chunk
instance Ord Chunk
instance Eq Chunk
instance Generic Chunk
instance Enum Chunk
instance Read Tag
instance Show Tag
instance Ord Tag
instance Eq Tag
instance Generic Tag
instance Enum Tag
instance Datatype D1Chunk
instance Constructor C1_0Chunk
instance Constructor C1_1Chunk
instance Constructor C1_2Chunk
instance Constructor C1_3Chunk
instance Datatype D1Tag
instance Constructor C1_0Tag
instance Constructor C1_1Tag
instance Constructor C1_2Tag
instance Constructor C1_3Tag
instance Constructor C1_4Tag
instance Constructor C1_5Tag
instance Constructor C1_6Tag
instance Constructor C1_7Tag
instance Constructor C1_8Tag
instance Constructor C1_9Tag
instance Constructor C1_10Tag
instance Constructor C1_11Tag
instance Constructor C1_12Tag
instance Constructor C1_13Tag
instance Constructor C1_14Tag
instance Constructor C1_15Tag
instance Constructor C1_16Tag
instance Constructor C1_17Tag
instance Constructor C1_18Tag
instance Constructor C1_19Tag
instance Constructor C1_20Tag
instance Constructor C1_21Tag
instance Constructor C1_22Tag
instance Constructor C1_23Tag
instance Constructor C1_24Tag
instance Constructor C1_25Tag
instance Constructor C1_26Tag
instance Constructor C1_27Tag
instance Constructor C1_28Tag
instance Constructor C1_29Tag
instance Constructor C1_30Tag
instance Constructor C1_31Tag
instance Constructor C1_32Tag
instance Constructor C1_33Tag
instance Constructor C1_34Tag
instance Constructor C1_35Tag
instance Constructor C1_36Tag
instance Constructor C1_37Tag
instance Constructor C1_38Tag
instance Constructor C1_39Tag
instance Constructor C1_40Tag
instance Constructor C1_41Tag
instance Constructor C1_42Tag
instance Constructor C1_43Tag
instance Constructor C1_44Tag
instance Constructor C1_45Tag
instance Constructor C1_46Tag
instance Constructor C1_47Tag
instance Constructor C1_48Tag
instance Constructor C1_49Tag
instance Constructor C1_50Tag
instance Constructor C1_51Tag
instance Constructor C1_52Tag
instance Constructor C1_53Tag
instance Constructor C1_54Tag
instance Constructor C1_55Tag
instance Constructor C1_56Tag
instance Constructor C1_57Tag
instance Constructor C1_58Tag
instance Constructor C1_59Tag
instance Constructor C1_60Tag
instance Constructor C1_61Tag
instance Constructor C1_62Tag
instance Constructor C1_63Tag
instance Constructor C1_64Tag
instance Constructor C1_65Tag
instance Constructor C1_66Tag
instance Constructor C1_67Tag
instance Constructor C1_68Tag
instance Constructor C1_69Tag
instance Constructor C1_70Tag
instance Constructor C1_71Tag
instance Constructor C1_72Tag
instance Constructor C1_73Tag
instance Constructor C1_74Tag
instance Constructor C1_75Tag
instance Constructor C1_76Tag
instance Constructor C1_77Tag
instance Constructor C1_78Tag
instance Constructor C1_79Tag
instance Constructor C1_80Tag
instance Constructor C1_81Tag
instance Constructor C1_82Tag
instance Constructor C1_83Tag
instance Constructor C1_84Tag
instance Constructor C1_85Tag
instance Constructor C1_86Tag
instance Constructor C1_87Tag
instance Constructor C1_88Tag
instance Constructor C1_89Tag
instance Constructor C1_90Tag
instance Constructor C1_91Tag
instance Constructor C1_92Tag
instance Constructor C1_93Tag
instance Constructor C1_94Tag
instance Constructor C1_95Tag
instance Constructor C1_96Tag
instance Constructor C1_97Tag
instance Constructor C1_98Tag
instance Constructor C1_99Tag
instance Constructor C1_100Tag
instance Constructor C1_101Tag
instance Constructor C1_102Tag
instance Constructor C1_103Tag
instance Constructor C1_104Tag
instance Constructor C1_105Tag
instance Constructor C1_106Tag
instance Constructor C1_107Tag
instance Constructor C1_108Tag
instance Constructor C1_109Tag
instance Constructor C1_110Tag
instance Constructor C1_111Tag
instance Constructor C1_112Tag
instance Constructor C1_113Tag
instance Constructor C1_114Tag
instance Constructor C1_115Tag
instance Constructor C1_116Tag
instance Constructor C1_117Tag
instance Constructor C1_118Tag
instance Constructor C1_119Tag
instance Constructor C1_120Tag
instance Constructor C1_121Tag
instance Constructor C1_122Tag
instance Constructor C1_123Tag
instance Constructor C1_124Tag
instance Constructor C1_125Tag
instance Constructor C1_126Tag
instance Constructor C1_127Tag
instance Constructor C1_128Tag
instance Constructor C1_129Tag
instance Constructor C1_130Tag
instance Constructor C1_131Tag
instance Constructor C1_132Tag
instance Constructor C1_133Tag
instance Constructor C1_134Tag
instance Constructor C1_135Tag
instance Constructor C1_136Tag
instance Constructor C1_137Tag
instance Constructor C1_138Tag
instance Constructor C1_139Tag
instance Constructor C1_140Tag
instance Constructor C1_141Tag
instance Constructor C1_142Tag
instance Constructor C1_143Tag
instance Constructor C1_144Tag
instance Constructor C1_145Tag
instance Constructor C1_146Tag
instance Constructor C1_147Tag
instance Constructor C1_148Tag
instance Constructor C1_149Tag
instance Constructor C1_150Tag
instance Constructor C1_151Tag
instance Constructor C1_152Tag
instance Constructor C1_153Tag
instance Constructor C1_154Tag
instance Constructor C1_155Tag
instance Constructor C1_156Tag
instance Constructor C1_157Tag
instance Constructor C1_158Tag
instance Constructor C1_159Tag
instance Constructor C1_160Tag
instance Constructor C1_161Tag
instance Constructor C1_162Tag
instance Constructor C1_163Tag
instance Constructor C1_164Tag
instance Constructor C1_165Tag
instance Constructor C1_166Tag
instance Constructor C1_167Tag
instance Constructor C1_168Tag
instance Constructor C1_169Tag
instance Constructor C1_170Tag
instance Constructor C1_171Tag
instance Constructor C1_172Tag
instance Constructor C1_173Tag
instance Constructor C1_174Tag
instance Constructor C1_175Tag
instance Constructor C1_176Tag
instance Constructor C1_177Tag
instance Constructor C1_178Tag
instance Constructor C1_179Tag
instance Constructor C1_180Tag
instance Constructor C1_181Tag
instance Constructor C1_182Tag
instance Constructor C1_183Tag
instance Constructor C1_184Tag
instance Constructor C1_185Tag
instance Constructor C1_186Tag
instance Constructor C1_187Tag
instance Constructor C1_188Tag
instance Constructor C1_189Tag
instance Constructor C1_190Tag
instance Constructor C1_191Tag
instance Constructor C1_192Tag
instance Constructor C1_193Tag
instance Constructor C1_194Tag
instance Constructor C1_195Tag
instance Constructor C1_196Tag
instance Constructor C1_197Tag
instance Constructor C1_198Tag
instance Constructor C1_199Tag
instance Constructor C1_200Tag
instance Constructor C1_201Tag
instance Constructor C1_202Tag
instance Constructor C1_203Tag
instance Constructor C1_204Tag
instance Constructor C1_205Tag
instance Constructor C1_206Tag
instance Constructor C1_207Tag
instance Constructor C1_208Tag
instance Constructor C1_209Tag
instance Constructor C1_210Tag
instance Constructor C1_211Tag
instance Constructor C1_212Tag
instance Constructor C1_213Tag
instance Constructor C1_214Tag
instance Constructor C1_215Tag
instance Constructor C1_216Tag
instance Constructor C1_217Tag
instance Constructor C1_218Tag
instance Constructor C1_219Tag
instance Constructor C1_220Tag
instance Constructor C1_221Tag
instance Constructor C1_222Tag
instance Constructor C1_223Tag
instance Constructor C1_224Tag
instance Constructor C1_225Tag
instance Constructor C1_226Tag
instance ChunkTag Chunk
instance Arbitrary Tag
instance Tag Tag
instance Serialize Tag
instance Serialize Chunk
instance Arbitrary Chunk
module NLP.Types.Tree
-- | A sentence of tokens without tags. Generated by the tokenizer.
-- (tokenizer :: Text -> Sentence)
data Sentence
Sent :: [Token] -> Sentence
tokens :: Sentence -> [Token]
applyTags :: Tag t => Sentence -> [t] -> TaggedSentence t
-- | A chunked sentence has POS tags and chunk tags. Generated by a
-- chunker.
--
-- (chunker :: (Chunk chunk, Tag tag) => TaggedSentence tag ->
-- ChunkedSentence chunk tag)
data ChunkedSentence chunk tag
ChunkedSent :: [ChunkOr chunk tag] -> ChunkedSentence chunk tag
-- | A tagged sentence has POS Tags. Generated by a part-of-speech tagger.
-- (tagger :: Tag tag => Sentence -> TaggedSentence tag)
data TaggedSentence tag
TaggedSent :: [POS tag] -> TaggedSentence tag
-- | Generate a Text representation of a TaggedSentence in the common
-- tagged format, eg:
--
--
-- "the/at dog/nn jumped/vbd ./."
--
printTS :: Tag t => TaggedSentence t -> Text
-- | Remove the tags from a tagged sentence
stripTags :: Tag t => TaggedSentence t -> Sentence
-- | Extract the tags from a tagged sentence, returning a parallel list of
-- tags along with the underlying Sentence.
unzipTags :: Tag t => TaggedSentence t -> (Sentence, [t])
-- | Combine the results of POS taggers, using the second param to fill in
-- tagUNK entries, where possible.
combine :: Tag t => [TaggedSentence t] -> [TaggedSentence t] -> [TaggedSentence t]
combineSentences :: Tag t => TaggedSentence t -> TaggedSentence t -> TaggedSentence t
-- | Returns the first param, unless it is tagged tagUNK. Throws an
-- error if the text does not match.
pickTag :: Tag t => POS t -> POS t -> POS t
-- | This type seem redundant, it just exists to support the differences in
-- TaggedSentence and ChunkedSentence.
--
-- See the t3 example below to see how verbose this becomes.
data ChunkOr chunk tag
Chunk_CN :: (Chunk chunk tag) -> ChunkOr chunk tag
POS_CN :: (POS tag) -> ChunkOr chunk tag
mkChunk :: (ChunkTag chunk, Tag tag) => chunk -> [ChunkOr chunk tag] -> ChunkOr chunk tag
mkChink :: (ChunkTag chunk, Tag tag) => tag -> Token -> ChunkOr chunk tag
data Chunk chunk tag
Chunk :: chunk -> [ChunkOr chunk tag] -> Chunk chunk tag
data POS tag
POS :: tag -> Token -> POS tag
-- | Show the underlying text token only.
showPOS :: Tag tag => POS tag -> Text
-- | Show the text and tag.
printPOS :: Tag tag => POS tag -> Text
data Token
Token :: Text -> Token
showTok :: Token -> Text
suffix :: Token -> Text
unTS :: Tag t => TaggedSentence t -> [POS t]
tsLength :: Tag t => TaggedSentence t -> Int
tsConcat :: Tag t => [TaggedSentence t] -> TaggedSentence t
-- | True if the input sentence contains the given text token. Does not do
-- partial or approximate matching, and compares details in a fully
-- case-sensitive manner.
contains :: Tag t => TaggedSentence t -> Text -> Bool
-- | True if the input sentence contains the given POS tag. Does not do
-- partial matching (such as prefix matching)
containsTag :: Tag t => TaggedSentence t -> t -> Bool
-- | Compare the POS-tag token with a supplied tag string.
posTagMatches :: Tag t => t -> POS t -> Bool
-- | Compare the POS-tagged token with a text string.
posTokMatches :: Tag t => Text -> POS t -> Bool
-- | Compare a token with a text string.
tokenMatches :: Text -> Token -> Bool
t1 :: Sentence
t2 :: TaggedSentence Tag
t3 :: ChunkedSentence Chunk Tag
instance Read Token
instance Show Token
instance Eq Token
instance Read tag => Read (POS tag)
instance Show tag => Show (POS tag)
instance Eq tag => Eq (POS tag)
instance (Read chunk, Read tag) => Read (ChunkOr chunk tag)
instance (Show chunk, Show tag) => Show (ChunkOr chunk tag)
instance (Eq chunk, Eq tag) => Eq (ChunkOr chunk tag)
instance (Read chunk, Read tag) => Read (Chunk chunk tag)
instance (Show chunk, Show tag) => Show (Chunk chunk tag)
instance (Eq chunk, Eq tag) => Eq (Chunk chunk tag)
instance (Read chunk, Read tag) => Read (ChunkedSentence chunk tag)
instance (Show chunk, Show tag) => Show (ChunkedSentence chunk tag)
instance (Eq chunk, Eq tag) => Eq (ChunkedSentence chunk tag)
instance Read tag => Read (TaggedSentence tag)
instance Show tag => Show (TaggedSentence tag)
instance Eq tag => Eq (TaggedSentence tag)
instance Read Sentence
instance Show Sentence
instance Eq Sentence
instance IsString Token
instance Arbitrary Token
instance (Arbitrary t, Tag t) => Arbitrary (POS t)
instance (ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (Chunk c t)
instance (ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (ChunkOr c t)
instance (Arbitrary t, Tag t) => Arbitrary (TaggedSentence t)
instance (ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (ChunkedSentence c t)
instance Arbitrary Sentence
module NLP.Tokenize.Chatter
runTokenizer :: Tokenizer -> (Text -> Sentence)
tokenize :: Text -> Sentence
module NLP.Types
-- | Part of Speech tagger, with back-off tagger.
--
-- A sequence of pos taggers can be assembled by using backoff taggers.
-- When tagging text, the first tagger is run on the input, possibly
-- tagging some tokens as unknown ('Tag Unk'). The first backoff
-- tagger is then recursively invoked on the text to fill in the unknown
-- tags, but that may still leave some tokens marked with 'Tag
-- Unk'. This process repeats until no more taggers are found.
-- (The current implementation is not very efficient in this respect.).
--
-- Back off taggers are particularly useful when there is a set of domain
-- specific vernacular that a general purpose statistical tagger does not
-- know of. A LitteralTagger can be created to map terms to fixed POS
-- tags, and then delegate the bulk of the text to a statistical back off
-- tagger, such as an AvgPerceptronTagger.
--
-- POSTagger values can be serialized and deserialized by using
-- serialize and NLP.POS.deserialize`. This is a bit tricky
-- because the POSTagger abstracts away the implementation details of the
-- particular tagging algorithm, and the model for that tagger (if any).
-- To support serialization, each POSTagger value must provide a
-- serialize value that can be used to generate a ByteString
-- representation of the model, as well as a unique id (also a
-- ByteString). Furthermore, that ID must be added to a `Map
-- ByteString (ByteString -> Maybe POSTagger -> Either String
-- POSTagger)` that is provided to deserialize. The function in
-- the map takes the output of posSerialize, and possibly a
-- backoff tagger, and reconstitutes the POSTagger that was serialized
-- (assigning the proper functions, setting up closures as needed, etc.)
-- Look at the source for taggerTable and readTagger for
-- examples.
data POSTagger t
POSTagger :: ([Sentence] -> [TaggedSentence t]) -> ([TaggedSentence t] -> IO (POSTagger t)) -> Maybe (POSTagger t) -> (Text -> Sentence) -> (Text -> [Text]) -> ByteString -> ByteString -> POSTagger t
-- | The initial part-of-speech tagger.
posTagger :: POSTagger t -> [Sentence] -> [TaggedSentence t]
-- | Training function to train the immediate POS tagger.
posTrainer :: POSTagger t -> [TaggedSentence t] -> IO (POSTagger t)
-- | A tagger to invoke on unknown tokens.
posBackoff :: POSTagger t -> Maybe (POSTagger t)
-- | A tokenizer; (words will work.)
posTokenizer :: POSTagger t -> Text -> Sentence
-- | A sentence splitter. If your input is formatted as one sentence per
-- line, then use lines, otherwise try Erik Kow's fullstop
-- library.
posSplitter :: POSTagger t -> Text -> [Text]
-- | Store this POS tagger to a bytestring. This does not serialize
-- the backoff taggers.
posSerialize :: POSTagger t -> ByteString
-- | A unique id that will identify the algorithm used for this POS Tagger.
-- This is used in deserialization
posID :: POSTagger t -> ByteString
-- | Document corpus.
--
-- This is a simple hashed corpus, the document content is not stored.
data Corpus
Corpus :: Int -> Map Text Int -> Corpus
-- | The number of documents in the corpus.
corpLength :: Corpus -> Int
-- | A count of the number of documents each term occurred in.
corpTermCounts :: Corpus -> Map Text Int
-- | Get the number of documents that a term occurred in.
termCounts :: Corpus -> Text -> Int
-- | Add a document to the corpus.
--
-- This can be dangerous if the documents are pre-processed differently.
-- All corpus-related functions assume that the documents have all been
-- tokenized and the tokens normalized, in the same way.
addDocument :: Corpus -> [Text] -> Corpus
-- | Create a corpus from a list of documents, represented by normalized
-- tokens.
mkCorpus :: [[Text]] -> Corpus
addTerms :: Map Text Int -> Set Text -> Map Text Int
addTerm :: Map Text Int -> Text -> Map Text Int
instance Read Corpus
instance Show Corpus
instance Eq Corpus
instance Ord Corpus
instance Generic Corpus
instance Datatype D1Corpus
instance Constructor C1_0Corpus
instance Selector S1_0_0Corpus
instance Selector S1_0_1Corpus
instance Serialize Corpus
instance NFData Corpus
module NLP.POS.LiteralTagger
tag :: Tag t => Map Text t -> CaseSensitive -> [Sentence] -> [TaggedSentence t]
tagSentence :: Tag t => Map Text t -> CaseSensitive -> Sentence -> TaggedSentence t
-- | Create a Literal Tagger using the specified back-off tagger as a
-- fall-back, if one is specified.
--
-- This uses a tokenizer adapted from the tokenize package for a
-- tokenizer, and Erik Kow's fullstop sentence segmenter as a sentence
-- splitter.
mkTagger :: Tag t => Map Text t -> CaseSensitive -> Maybe (POSTagger t) -> POSTagger t
taggerID :: ByteString
-- | deserialization for Literal Taggers. The serialization logic is in the
-- posSerialize record of the POSTagger created in mkTagger.
readTagger :: Tag t => ByteString -> Maybe (POSTagger t) -> Either String (POSTagger t)
-- | Boolean type to indicate case sensitivity for textual comparisons.
data CaseSensitive
Sensitive :: CaseSensitive
Insensitive :: CaseSensitive
-- | Create a tokenizer that protects the provided terms (to tokenize
-- multi-word terms)
protectTerms :: [Text] -> CaseSensitive -> Tokenizer
-- | This POS tagger deterministically tags tokens. However, if it ever
-- sees multiple tags for the same token, it will forget the tag it has
-- learned. This is useful for creating taggers that have very high
-- precision, but very low recall.
--
-- Unambiguous taggers are also useful when defined with a
-- non-deterministic backoff tagger, such as an
-- NLP.POS.AveragedPerceptronTagger, since the high-confidence
-- tags will be applied first, followed by the more non-deterministic
-- results of the backoff tagger.
module NLP.POS.UnambiguousTagger
taggerID :: ByteString
readTagger :: Tag t => ByteString -> Maybe (POSTagger t) -> Either String (POSTagger t)
-- | Create an unambiguous tagger, using the supplied Map as a
-- source of tags.
mkTagger :: Tag t => Map Text t -> Maybe (POSTagger t) -> POSTagger t
-- | Trainer method for unambiguous taggers.
train :: Tag t => Map Text t -> [TaggedSentence t] -> Map Text t
-- | Average Perceptron implementation of Part of speech tagging, adapted
-- for Haskell from this python implementation, which is described on the
-- blog post:
--
--
--
-- The Perceptron code can be found on github:
--
--
module NLP.POS.AvgPerceptron
-- | The perceptron model.
data Perceptron
Perceptron :: Map Feature (Map Class Weight) -> Map (Feature, Class) Weight -> Map (Feature, Class) Int -> Int -> Perceptron
-- | Each feature gets its own weight vector, so weights is a dict-of-dicts
weights :: Perceptron -> Map Feature (Map Class Weight)
-- | The accumulated values, for the averaging. These will be keyed by
-- feature/clas tuples
totals :: Perceptron -> Map (Feature, Class) Weight
-- | The last time the feature was changed, for the averaging. Also keyed
-- by feature/clas tuples (tstamps is short for timestamps)
tstamps :: Perceptron -> Map (Feature, Class) Int
-- | Number of instances seen
instances :: Perceptron -> Int
-- | The classes that the perceptron assigns are represnted with a
-- newtype-wrapped String.
--
-- Eventually, I think this should become a typeclass, so the classes can
-- be defined by the users of the Perceptron (such as custom POS tag
-- ADTs, or more complex classes).
newtype Class
Class :: String -> Class
-- | Typedef for doubles to make the code easier to read, and to make this
-- simple to change if necessary.
type Weight = Double
newtype Feature
Feat :: Text -> Feature
-- | An empty perceptron, used to start training.
emptyPerceptron :: Perceptron
-- | Predict a class given a feature vector.
--
-- Ported from python:
--
--
-- def predict(self, features):
-- '''Dot-product the features and current weights and return the best label.'''
-- scores = defaultdict(float)
-- for feat, value in features.items():
-- if feat not in self.weights or value == 0:
-- continue
-- weights = self.weights[feat]
-- for label, weight in weights.items():
-- scores[label] += value * weight
-- # Do a secondary alphabetic sort, for stability
-- return max(self.classes, key=lambda label: (scores[label], label))
--
predict :: Perceptron -> Map Feature Int -> Maybe Class
train :: Int -> Perceptron -> [(Map Feature Int, Class)] -> IO Perceptron
-- | Update the perceptron with a new example.
--
--
-- update(self, truth, guess, features)
-- ...
-- self.i += 1
-- if truth == guess:
-- return None
-- for f in features:
-- weights = self.weights.setdefault(f, {}) -- setdefault is Map.findWithDefault, and destructive.
-- upd_feat(truth, f, weights.get(truth, 0.0), 1.0)
-- upd_feat(guess, f, weights.get(guess, 0.0), -1.0)
-- return None
--
update :: Perceptron -> Class -> Class -> [Feature] -> Perceptron
-- | Average the weights
--
-- Ported from Python:
--
--
-- def average_weights(self):
-- for feat, weights in self.weights.items():
-- new_feat_weights = {}
-- for clas, weight in weights.items():
-- param = (feat, clas)
-- total = self._totals[param]
-- total += (self.i - self._tstamps[param]) * weight
-- averaged = round(total / float(self.i), 3)
-- if averaged:
-- new_feat_weights[clas] = averaged
-- self.weights[feat] = new_feat_weights
-- return None
--
averageWeights :: Perceptron -> Perceptron
instance Read Feature
instance Show Feature
instance Eq Feature
instance Ord Feature
instance Generic Feature
instance Read Class
instance Show Class
instance Eq Class
instance Ord Class
instance Generic Class
instance Read Perceptron
instance Show Perceptron
instance Eq Perceptron
instance Generic Perceptron
instance Datatype D1Feature
instance Constructor C1_0Feature
instance Datatype D1Class
instance Constructor C1_0Class
instance Datatype D1Perceptron
instance Constructor C1_0Perceptron
instance Selector S1_0_0Perceptron
instance Selector S1_0_1Perceptron
instance Selector S1_0_2Perceptron
instance Selector S1_0_3Perceptron
instance NFData Perceptron
instance Serialize Perceptron
instance Serialize Class
instance Serialize Feature
module NLP.Similarity.VectorSim
-- | An efficient (ish) representation for documents in the "bag of words"
-- sense.
type TermVector = DefaultMap Text Double
-- | Generate a TermVector from a tokenized document.
mkVector :: Corpus -> [Text] -> TermVector
-- | Invokes similarity on full strings, using words for
-- tokenization, and no stemming.
--
-- There *must* be at least one document in the corpus.
sim :: Corpus -> Text -> Text -> Double
-- | Determine how similar two documents are.
--
-- This function assumes that each document has been tokenized and (if
-- desired) stemmed/case-normalized.
--
-- This is a wrapper around tvSim, which is a *much* more
-- efficient implementation. If you need to run similarity against any
-- single document more than once, then you should create
-- TermVectors for each of your documents and use tvSim
-- instead of similarity.
--
-- There *must* be at least one document in the corpus.
similarity :: Corpus -> [Text] -> [Text] -> Double
-- | Determine how similar two documents are.
--
-- Calculates the similarity between two documents, represented as
-- TermVectors
tvSim :: TermVector -> TermVector -> Double
-- | Return the raw frequency of a term in a body of text.
--
-- The firt argument is the term to find, the second is a tokenized
-- document. This function does not do any stemming or additional text
-- modification.
tf :: Eq a => a -> [a] -> Int
-- | Calculate the inverse document frequency.
--
-- The IDF is, roughly speaking, a measure of how popular a term is.
idf :: Text -> Corpus -> Double
-- | Calculate the tf*idf measure for a term given a document and a corpus.
tf_idf :: Text -> [Text] -> Corpus -> Double
cosVec :: TermVector -> TermVector -> Double
-- | Calculate the magnitude of a vector.
magnitude :: TermVector -> Double
-- | find the dot product of two vectors.
dotProd :: TermVector -> TermVector -> Double
-- | This is a very simple wrapper around Parsec for writing Information
-- Extraction patterns.
--
-- Because the particular tags/tokens to parse depends on the training
-- corpus (for POS tagging) and the domain, this module only provides
-- basic extractors. You can, for example, create an extractor to find
-- noun phrases by combining the components provided here:
--
--
-- nounPhrase :: Extractor (Text, Tag)
-- nounPhrase = do
-- nlist <- many1 (try (posTok $ Tag "NN")
-- <|> try (posTok $ Tag "DT")
-- <|> (posTok $ Tag "JJ"))
-- let term = T.intercalate " " (map fst nlist)
-- return (term, Tag "n-phr")
--
module NLP.Extraction.Parsec
-- | A Parsec parser.
--
-- Example usage:
--
--
-- > set -XOverloadedStrings
-- > import Text.Parsec.Prim
-- > parse myExtractor "interactive repl" someTaggedSentence
--
type Extractor t = Parsec (TaggedSentence t) ()
-- | Consume a token with the given POS Tag
posTok :: Tag t => t -> Extractor t (POS t)
-- | Consume a token with the specified POS prefix.
--
--
-- > parse (posPrefix "n") "ghci" [(Bob, Tag "np")]
-- Right [(Bob, Tag "np")]
--
posPrefix :: Tag t => Text -> Extractor t (POS t)
-- | Text equality matching with optional case sensitivity.
matches :: CaseSensitive -> Token -> Token -> Bool
-- | Consume a token with the given lexical representation.
txtTok :: Tag t => CaseSensitive -> Token -> Extractor t (POS t)
-- | Consume any one non-empty token.
anyToken :: Tag t => Extractor t (POS t)
oneOf :: Tag t => CaseSensitive -> [Token] -> Extractor t (POS t)
-- | Skips any number of fill tokens, ending with the end parser, and
-- returning the last parsed result.
--
-- This is useful when you know what you're looking for and (for
-- instance) don't care what comes first.
followedBy :: Tag t => Extractor t b -> Extractor t a -> Extractor t a
instance (Monad m, Tag t) => Stream (TaggedSentence t) m (POS t)
module NLP.Extraction.Examples.ParsecExamples
-- | Find a clause in a larger collection of text.
--
-- findClause skips over leading tokens, if needed, to locate a clause.
findClause :: Extractor Tag (ChunkOr Chunk Tag)
clause :: Extractor Tag (ChunkOr Chunk Tag)
prepPhrase :: Extractor Tag (ChunkOr Chunk Tag)
nounPhrase :: Extractor Tag (ChunkOr Chunk Tag)
verbPhrase :: Extractor Tag (ChunkOr Chunk Tag)
module NLP.Corpora.Parsing
-- | Read a POS-tagged corpus out of a Text string of the form: "token/tag
-- token/tag..."
--
--
-- >>> readPOS "Dear/jj Sirs/nns :/: Let/vb"
-- [("Dear",JJ),("Sirs",NNS),(":",Other ":"),("Let",VB)]
--
readPOS :: Tag t => Text -> TaggedSentence t
readPOSWith :: Tag t => (Text -> t) -> Text -> TaggedSentence t
-- | Returns all but the last element of a string, unless the string is
-- empty, in which case it returns that string.
safeInit :: Text -> Text
-- | Avegeraged Perceptron Tagger
--
-- Adapted from the python implementation found here:
--
--
module NLP.POS.AvgPerceptronTagger
-- | Create an Averaged Perceptron Tagger using the specified back-off
-- tagger as a fall-back, if one is specified.
--
-- This uses a tokenizer adapted from the tokenize package for a
-- tokenizer, and Erik Kow's fullstop sentence segmenter
-- (http://hackage.haskell.org/package/fullstop) as a sentence
-- splitter.
mkTagger :: Tag t => Perceptron -> Maybe (POSTagger t) -> POSTagger t
-- | Train a new Perceptron.
--
-- The training corpus should be a collection of sentences, one sentence
-- on each line, and with each token tagged with a part of speech.
--
-- For example, the input:
--
--
-- "The/DT dog/NN jumped/VB ./.\nThe/DT cat/NN slept/VB ./."
--
--
-- defines two training sentences.
--
--
-- >>> tagger <- trainNew "Dear/jj Sirs/nns :/: Let/vb\nUs/nn begin/vb\n"
--
-- >>> tag tagger $ map T.words $ T.lines "Dear sir"
-- "Dear/jj Sirs/nns :/: Let/vb"
--
trainNew :: Tag t => (Text -> t) -> Text -> IO Perceptron
-- | Train a new Perceptron on a corpus of files.
trainOnFiles :: Tag t => (Text -> t) -> [FilePath] -> IO Perceptron
-- | Add training examples to a perceptron.
--
--
-- >>> tagger <- train emptyPerceptron "Dear/jj Sirs/nns :/: Let/vb\nUs/nn begin/vb\n"
--
-- >>> tag tagger $ map T.words $ T.lines "Dear sir"
-- "Dear/jj Sirs/nns :/: Let/vb"
--
--
-- If you're using multiple input files, this can be useful to improve
-- performance (by folding over the files). For example, see
-- trainOnFiles
train :: Tag t => (Text -> t) -> Perceptron -> Text -> IO Perceptron
-- | Train a model from sentences.
--
-- Ported from Python:
--
--
-- def train(self, sentences, save_loc=None, nr_iter=5):
-- self._make_tagdict(sentences)
-- self.model.classes = self.classes
-- prev, prev2 = START
-- for iter_ in range(nr_iter):
-- c = 0
-- n = 0
-- for words, tags in sentences:
-- context = START + [self._normalize(w) for w in words] + END
-- for i, word in enumerate(words):
-- guess = self.tagdict.get(word)
-- if not guess:
-- feats = self._get_features(i, word, context, prev, prev2)
-- guess = self.model.predict(feats)
-- self.model.update(tags[i], guess, feats)
-- prev2 = prev; prev = guess
-- c += guess == tags[i]
-- n += 1
-- random.shuffle(sentences)
-- logging.info("Iter {0}: {1}/{2}={3}".format(iter_, c, n, _pc(c, n)))
-- self.model.average_weights()
-- # Pickle as a binary file
-- if save_loc is not None:
-- pickle.dump((self.model.weights, self.tagdict, self.classes),
-- open(save_loc, 'wb'), -1)
-- return None
--
trainInt :: Tag t => Int -> Perceptron -> [TaggedSentence t] -> IO Perceptron
-- | Tag a document (represented as a list of Sentences) with a
-- trained Perceptron
--
-- Ported from Python:
--
--
-- def tag(self, corpus, tokenize=True):
-- '''Tags a string `corpus`.'''
-- # Assume untokenized corpus has \n between sentences and ' ' between words
-- s_split = nltk.sent_tokenize if tokenize else lambda t: t.split('\n')
-- w_split = nltk.word_tokenize if tokenize else lambda s: s.split()
-- def split_sents(corpus):
-- for s in s_split(corpus):
-- yield w_split(s)
-- prev, prev2 = self.START
-- tokens = []
-- for words in split_sents(corpus):
-- context = self.START + [self._normalize(w) for w in words] + self.END
-- for i, word in enumerate(words):
-- tag = self.tagdict.get(word)
-- if not tag:
-- features = self._get_features(i, word, context, prev, prev2)
-- tag = self.model.predict(features)
-- tokens.append((word, tag))
-- prev2 = prev
-- prev = tag
-- return tokens
--
tag :: Tag t => Perceptron -> [Sentence] -> [TaggedSentence t]
-- | Tag a single sentence.
tagSentence :: Tag t => Perceptron -> Sentence -> TaggedSentence t
-- | An empty perceptron, used to start training.
emptyPerceptron :: Perceptron
taggerID :: ByteString
readTagger :: Tag t => ByteString -> Maybe (POSTagger t) -> Either String (POSTagger t)
-- | This module aims to make tagging text with parts of speech trivially
-- easy.
--
-- If you're new to chatter and POS-tagging, then I suggest you
-- simply try:
--
--
-- >>> tagger <- defaultTagger
--
-- >>> tagStr tagger "This is a sample sentence."
-- "This/dt is/bez a/at sample/nn sentence/nn ./."
--
--
-- Note that we used tagStr, instead of tag, or
-- tagText. Many people don't (yet!) use Data.Text by
-- default, so there is a wrapper around tag that packs and
-- unpacks the String. This is innefficient, but it's just to get
-- you started, and tagStr can be very handy when you're debugging
-- a tagger in ghci (or cabal repl).
--
-- tag exposes more details of the tokenization and tagging, since
-- it returns a list of TaggedSentences, but it doesn't print
-- results as nicely.
module NLP.POS
-- | Tag a chunk of input text with part-of-speech tags, using the sentence
-- splitter, tokenizer, and tagger contained in the POSTager.
tag :: Tag t => POSTagger t -> Text -> [TaggedSentence t]
-- | Tag the tokens in a string.
--
-- Returns a space-separated string of tokens, each token suffixed with
-- the part of speech. For example:
--
--
-- >>> tag tagger "the dog jumped ."
-- "the/at dog/nn jumped/vbd ./."
--
tagStr :: Tag t => POSTagger t -> String -> String
-- | Text version of tagStr
tagText :: Tag t => POSTagger t -> Text -> Text
-- | Train a POSTagger on a corpus of sentences.
--
-- This will recurse through the POSTagger stack, training all the
-- backoff taggers as well. In order to do that, this function has to be
-- generic to the kind of taggers used, so it is not possible to train up
-- a new POSTagger from nothing: train wouldn't know what tagger
-- to create.
--
-- To get around that restriction, you can use the various
-- mkTagger implementations, such as mkTagger or
-- NLP.POS.AvgPerceptronTagger.mkTagger'. For example:
--
--
-- import NLP.POS.AvgPerceptronTagger as APT
--
-- let newTagger = APT.mkTagger APT.emptyPerceptron Nothing
-- posTgr <- train newTagger trainingExamples
--
train :: Tag t => POSTagger t -> [TaggedSentence t] -> IO (POSTagger t)
-- | Train a tagger on string input in the standard form for POS tagged
-- corpora:
--
--
-- trainStr tagger "the/at dog/nn jumped/vbd ./."
--
trainStr :: Tag t => POSTagger t -> String -> IO (POSTagger t)
-- | The Text version of trainStr
trainText :: Tag t => POSTagger t -> Text -> IO (POSTagger t)
tagTokens :: Tag t => POSTagger t -> [Sentence] -> [TaggedSentence t]
-- | Evaluate a POSTager.
--
-- Measures accuracy over all tags in the test corpus.
--
-- Accuracy is calculated as:
--
--
-- |tokens tagged correctly| / |all tokens|
--
eval :: Tag t => POSTagger t -> [TaggedSentence t] -> Double
serialize :: Tag t => POSTagger t -> ByteString
deserialize :: Tag t => Map ByteString (ByteString -> Maybe (POSTagger t) -> Either String (POSTagger t)) -> ByteString -> Either String (POSTagger t)
-- | The default table of tagger IDs to readTagger functions. Each tagger
-- packaged with Chatter should have an entry here. By convention, the
-- IDs use are the fully qualified module name of the tagger package.
taggerTable :: Tag t => Map ByteString (ByteString -> Maybe (POSTagger t) -> Either String (POSTagger t))
-- | Store a POSTager to a file.
saveTagger :: Tag t => POSTagger t -> FilePath -> IO ()
-- | Load a tagger, using the interal taggerTable. If you need to
-- specify your own mappings for new composite taggers, you should use
-- deserialize.
--
-- This function checks the filename to determine if the content should
-- be decompressed. If the file ends with ".gz", then we assume it is a
-- gziped model.
loadTagger :: Tag t => FilePath -> IO (POSTagger t)
defaultTagger :: IO (POSTagger Tag)