-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Nerf, a named entity recognition tool based on linear-chain CRFs
--   
--   Please see the README on GitHub at
--   <a>https://github.com/kawu/nerf#readme</a>
@package nerf
@version 0.5.4


-- | Compare two NE-annotated datasets.
module NLP.Nerf.Compare

-- | Statistics.
data Stats
Stats :: !Int -> !Int -> !Int -> !Int -> Stats

-- | false positive
[fp] :: Stats -> !Int

-- | true positive
[tp] :: Stats -> !Int

-- | false negative
[fn] :: Stats -> !Int

-- | true negative
[tn] :: Stats -> !Int

-- | Add stats.
(.+.) :: Stats -> Stats -> Stats

-- | Compare two NE-annotated datasets. The function assumes, that forest
--   pairs correspond to the same sentences.
compare :: Ord a => [(NeForest a Text, NeForest a Text)] -> Map a Stats
instance GHC.Classes.Ord a => GHC.Classes.Ord (NLP.Nerf.Compare.Node a)
instance GHC.Classes.Eq a => GHC.Classes.Eq (NLP.Nerf.Compare.Node a)
instance GHC.Show.Show a => GHC.Show.Show (NLP.Nerf.Compare.Node a)
instance GHC.Classes.Ord NLP.Nerf.Compare.Stats
instance GHC.Classes.Eq NLP.Nerf.Compare.Stats
instance GHC.Show.Show NLP.Nerf.Compare.Stats


-- | Basic types for dictionary handling.
module NLP.Nerf.Dict.Base

-- | A type of named entity.
type NeType = Text

-- | A orthographic form.
type Form = Text

-- | Is the form a multiword one?
isMultiWord :: Form -> Bool

-- | A Named Entity entry from the LMF dictionary.
data Entry
Entry :: !Form -> !NeType -> Entry

-- | Orthographic form of the NE
[neOrth] :: Entry -> !Form

-- | Type of the NE
[neType] :: Entry -> !NeType

-- | Dictionary label.
type Label = Text

-- | A <a>Dict</a> is a map from forms to labels. Each form may be
--   annotated with multiple labels. The map is represented using the
--   directed acyclic word graph. type Dict = D.DAWG (S.Set Label)
type DAWG = DAWG Trans Char ()
type Dict = DAWG (Set Label)

-- | Construct dictionary from the list of form/label pairs.
fromPairs :: [(Form, Label)] -> Dict

-- | Construct dictionary from the list of entries.
fromEntries :: [Entry] -> Dict

-- | Remove dictionary entries which do not satisfy the predicate.
siftDict :: (Form -> Set Label -> Bool) -> Dict -> Dict

-- | Save the dictionary in the file.
saveDict :: FilePath -> Dict -> IO ()

-- | Load the dictionary from the file.
loadDict :: FilePath -> IO Dict

-- | Merge dictionary resources.
merge :: [Dict] -> Dict

-- | Differentiate labels from separate dictionaries using
--   dictionary-unique prefixes.
diff :: [Dict] -> [Dict]
instance GHC.Classes.Ord NLP.Nerf.Dict.Base.Entry
instance GHC.Classes.Eq NLP.Nerf.Dict.Base.Entry
instance GHC.Read.Read NLP.Nerf.Dict.Base.Entry
instance GHC.Show.Show NLP.Nerf.Dict.Base.Entry


-- | Handling the NELexicon dictionary.
module NLP.Nerf.Dict.NELexicon

-- | Parse the NELexicon into a list of entries.
parseNELexicon :: Text -> [Entry]

-- | Read the dictionary from the file.
readNELexicon :: FilePath -> IO [Entry]


-- | Parsing the Gazetteer for Polish Named Entities (used formerly within
--   the SProUT platform) in the LMF format.
module NLP.Nerf.Dict.PNEG

-- | Parse the dictionary to the list of entries.
parsePNEG :: Text -> [Entry]

-- | Read the dictionary from the file.
readPNEG :: FilePath -> IO [Entry]


-- | Polish Named Entity Triggers <a>http://zil.ipipan.waw.pl/PNET</a>
--   dictionary.
module NLP.Nerf.Dict.PNET

-- | Parse dictionary into a list of entries.
parsePNET :: Text -> [Entry]

-- | Read dictionary from the file.
readPNET :: FilePath -> IO [Entry]

-- | Trigger type.
data Typ
Internal :: Typ
External :: Typ

-- | Does entry represents a trigger of the given type?
hasTyp :: Typ -> Entry -> Bool

-- | PNET entry.
data Entry
Entry :: Text -> Text -> Text -> Typ -> Text -> Text -> Entry
[orth] :: Entry -> Text
[base] :: Entry -> Text
[tag] :: Entry -> Text
[typ] :: Entry -> Typ
[neTyp] :: Entry -> Text
[example] :: Entry -> Text
instance GHC.Classes.Ord NLP.Nerf.Dict.PNET.Typ
instance GHC.Classes.Eq NLP.Nerf.Dict.PNET.Typ
instance GHC.Show.Show NLP.Nerf.Dict.PNET.Typ


-- | Handling Prolexbase dictionaries, both with the same storage format.
module NLP.Nerf.Dict.Prolexbase

-- | Parse dictionary into a list of entries.
parseProlexbase :: Text -> [Entry]

-- | Read the dictionary from the file.
readProlexbase :: FilePath -> IO [Entry]


-- | Extraction utilities for various dictionary resources.
module NLP.Nerf.Dict

-- | Extract NEs dictionary from PoliMorf.
extractPoliMorf :: FilePath -> IO Dict

-- | Extract NEs dictionary from PNEG.
extractPNEG :: FilePath -> IO Dict

-- | Extract NEs dictionary from NELexicon.
extractNELexicon :: FilePath -> IO Dict

-- | Extract NEs dictionary from Prolexbase.
extractProlexbase :: FilePath -> IO Dict

-- | Extract internal triggers from PNET dictionary.
extractIntTriggers :: FilePath -> IO Dict

-- | Extract external triggers from PNET dictionary.
extractExtTriggers :: FilePath -> IO Dict


-- | The module implements the tokenization used within Nerf and some other
--   tokenization-related stuff.
module NLP.Nerf.Tokenize

-- | Tokenize sentence using the default tokenizer.
tokenize :: String -> [String]

-- | A class of objects which can be converted to <a>String</a>.
class Word a
word :: Word a => a -> String

-- | Synchronize the list of NE trees with the new tokenization.
sync :: (Word b, Word c) => NeForest a b -> [c] -> NeForest a c
instance NLP.Nerf.Tokenize.Word GHC.Base.String
instance NLP.Nerf.Tokenize.Word Data.Text.Internal.Text
instance NLP.Nerf.Tokenize.Word Data.Text.Internal.Lazy.Text


-- | Basic types.
module NLP.Nerf.Types

-- | A word.
type Word = Text

-- | A named entity.
type NE = Text

-- | An observation consist of an index (of list type) and an actual
--   observation value.
type Ob = ([Int], Text)

-- | A label is created by encoding the named entity forest using the IOB
--   method.
type Lb = Label NE


-- | Observation schema blocks for Nerf.
module NLP.Nerf.Schema

-- | The Ox monad specialized to word token type and text observations.
type Ox a = Ox Word Text a

-- | A schema is a block of the Ox computation performed within the context
--   of the sentence and the absolute sentence position.
type Schema a = Vector Word -> Int -> Ox a

-- | A dummy schema block.
void :: a -> Schema a

-- | Sequence the list of schemas (or blocks) and discard individual
--   values.
sequenceS_ :: [Vector Word -> a -> Ox b] -> Vector Word -> a -> Ox ()

-- | Use the schema to extract observations from the sentence.
schematize :: Schema a -> [Word] -> Sent Ob

-- | Body of configuration entry.
data Body a
Body :: [Int] -> a -> Body a

-- | Range argument for the schema block.
[range] :: Body a -> [Int]

-- | Additional arguments for the schema block.
[args] :: Body a -> a

-- | Maybe entry.
type Entry a = Maybe (Body a)

-- | Plain entry with no additional arugments.
entry :: [Int] -> Entry ()

-- | Entry with additional arguemnts.
entryWith :: a -> [Int] -> Entry a

-- | Configuration of the schema. All configuration elements specify the
--   range over which a particular observation type should be taken on
--   account. For example, the <tt>[-1, 0, 2]</tt> range means that
--   observations of particular type will be extracted with respect to
--   previous (<tt>k - 1</tt>), current (<tt>k</tt>) and after the next
--   (<tt>k + 2</tt>) positions when identifying the observation set for
--   position <tt>k</tt> in the input sentence.
data SchemaConf
SchemaConf :: Entry () -> Entry () -> Entry [Int] -> Entry [Int] -> Entry Int -> Entry () -> Entry () -> Entry () -> Entry () -> Entry [Dict] -> Entry Dict -> Entry Dict -> SchemaConf

-- | The <a>orthB</a> schema block.
[orthC] :: SchemaConf -> Entry ()

-- | The <a>splitOrthB</a> schema block.
[splitOrthC] :: SchemaConf -> Entry ()

-- | The <a>lowPrefixesB</a> schema block. The first list of ints
--   represents lengths of prefixes.
[lowPrefixesC] :: SchemaConf -> Entry [Int]

-- | The <a>lowSuffixesB</a> schema block. The first list of ints
--   represents lengths of suffixes.
[lowSuffixesC] :: SchemaConf -> Entry [Int]

-- | The <a>lemmaB</a> schema block.
[lemmaC] :: SchemaConf -> Entry Int

-- | The <a>shapeB</a> schema block.
[shapeC] :: SchemaConf -> Entry ()

-- | The <a>packedB</a> schema block.
[packedC] :: SchemaConf -> Entry ()

-- | The <a>shapePairB</a> schema block.
[shapePairC] :: SchemaConf -> Entry ()

-- | The <a>packedPairB</a> schema block.
[packedPairC] :: SchemaConf -> Entry ()

-- | Dictionaries of NEs (<a>dictB</a> schema block).
[dictC] :: SchemaConf -> Entry [Dict]

-- | Dictionary of internal triggers.
[intTrigsC] :: SchemaConf -> Entry Dict

-- | Dictionary of external triggers.
[extTrigsC] :: SchemaConf -> Entry Dict

-- | Null configuration of the observation schema.
nullConf :: SchemaConf

-- | Default configuration of the observation schema.
defaultConf :: [Dict] -> Maybe Dict -> Maybe Dict -> IO SchemaConf

-- | Build the schema based on the configuration.
fromConf :: SchemaConf -> Schema ()

-- | A block is a chunk of the Ox computation performed within the context
--   of the sentence and the list of absolute sentence positions.
type Block a = Vector Word -> [Int] -> Ox a

-- | Transform the block to the schema depending on the list of relative
--   sentence positions.
fromBlock :: Block a -> [Int] -> Schema a

-- | Orthographic form at the current position.
orthB :: Block ()

-- | Orthographic form split into two observations: the lowercased form and
--   the original form (only when different than the lowercased one).
splitOrthB :: Block ()

-- | List of lowercased prefixes of given lengths.
lowPrefixesB :: [Int] -> Block ()

-- | List of lowercased suffixes of given lengths.
lowSuffixesB :: [Int] -> Block ()

-- | Lemma substitute parametrized by the number specifying the span over
--   which lowercased prefixes and suffixes will be <a>save</a>d. For
--   example, <tt>lemmaB 2</tt> will take affixes of lengths <tt>0, -1</tt>
--   and <tt>-2</tt> on account.
lemmaB :: Int -> Block ()

-- | Shape of the word.
shapeB :: Block ()

-- | Packed shape of the word.
packedB :: Block ()

-- | Combined shapes of two consecutive (at <tt>k-1</tt> and <tt>k</tt>
--   positions) words.
shapePairB :: Block ()

-- | Combined packed shapes of two consecutive (at <tt>k-1</tt> and
--   <tt>k</tt> positions) words.
packedPairB :: Block ()

-- | Plain dictionary search determined with respect to the list of
--   relative positions.
dictB :: Dict -> Block ()
instance GHC.Show.Show NLP.Nerf.Schema.SchemaConf
instance GHC.Show.Show a => GHC.Show.Show (NLP.Nerf.Schema.Body a)
instance Data.Binary.Class.Binary NLP.Nerf.Schema.SchemaConf
instance Data.Binary.Class.Binary a => Data.Binary.Class.Binary (NLP.Nerf.Schema.Body a)


-- | Main module of the Nerf tool.
module NLP.Nerf

-- | A Nerf consists of the observation schema configuration and the CRF
--   model.
data Nerf
Nerf :: SchemaConf -> CRF Ob Lb -> Nerf
[schemaConf] :: Nerf -> SchemaConf
[crf] :: Nerf -> CRF Ob Lb

-- | Train Nerf on the input data using the SGD method.
train :: SgdArgs -> SchemaConf -> FilePath -> Maybe FilePath -> IO Nerf

-- | Perform named entity recognition (NER) using the Nerf.
ner :: Nerf -> String -> NeForest NE Word

-- | Show results of observation extraction on the input ENAMEX file.
tryOx :: SchemaConf -> FilePath -> IO ()
instance Data.Binary.Class.Binary NLP.Nerf.Nerf

module NLP.Nerf.Server

-- | Run a Nerf server on a given port.
runNerfServer :: Nerf -> PortID -> IO ()

-- | Perform NER tagging on the input sentence.
ner :: HostName -> PortID -> String -> IO (NeForest NE Word)


-- | Support for the XCES format.
module NLP.Nerf.XCES

-- | Annotate XCES (in a form of a tag list) with NEs with respect to the
--   given NER function. nerXCES :: Nerf.Nerf -&gt; L.Text -&gt; L.Text
nerXCES :: (String -> NeForest NE Word) -> Text -> Text
instance GHC.Show.Show NLP.Nerf.XCES.SentI
instance GHC.Show.Show NLP.Nerf.XCES.SegT
instance NLP.Nerf.Tokenize.Word NLP.Nerf.XCES.Tok