-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Morphosyntactic tagging tool based on constrained CRFs
--   
--   A morphosyntactic tagging tool based on constrained conditional random
--   fields.
@package concraft
@version 0.4.0


-- | Types and functions related to the morphosyntax data layer.
module NLP.Concraft.Morphosyntax

-- | A sentence of <a>Word</a>s.
type Sent t = [Word t]

-- | A word parametrized over a tag type.
data Word t
Word :: Text -> WMap t -> Bool -> Word t

-- | Orthographic form.
orth :: Word t -> Text

-- | Set of word interpretations. To each interpretation a weight of
--   correctness within the context is assigned.
tagWMap :: Word t -> WMap t

-- | Out-of-vocabulary (OOV) word, i.e. word unknown to the morphosyntactic
--   analyser.
oov :: Word t -> Bool

-- | Map function over word tags.
mapWord :: Ord b => (a -> b) -> Word a -> Word b

-- | Map function over sentence tags.
mapSent :: Ord b => (a -> b) -> Sent a -> Sent b

-- | Interpretations of the word.
interpsSet :: Word t -> Set t

-- | Interpretations of the word.
interps :: Word t -> [t]

-- | A weighted collection of type <tt>a</tt> elements.
data WMap a

-- | Make a weighted collection.
mkWMap :: Ord a => [(a, Double)] -> WMap a

-- | Map function over weighted collection elements.
mapWMap :: Ord b => (a -> b) -> WMap a -> WMap b
instance Show a => Show (WMap a)
instance Eq a => Eq (WMap a)
instance Ord a => Ord (WMap a)
instance Show t => Show (Word t)
instance Eq t => Eq (Word t)
instance Ord t => Ord (Word t)


-- | The module provides several abstractions for representing external
--   data formats. Concraft will be able to work with any format which
--   implements those abstractions.
module NLP.Concraft.Format

-- | Textual representation of morphposyntactic tag.
type Tag = Text

-- | Word handler.
data Word w
Word :: (w -> Word Tag) -> (WMap Tag -> w -> w) -> Word w

-- | Extract information relevant for tagging.
extract :: Word w -> w -> Word Tag

-- | Select the set of morphosyntactic interpretations.
select :: Word w -> WMap Tag -> w -> w

-- | Sentence handler.
data Sent s w
Sent :: (s -> [w]) -> ([w] -> s -> s) -> Word w -> Sent s w

-- | Split sentence into a list of words.
parseSent :: Sent s w -> s -> [w]

-- | Merge words with a sentence.
mergeSent :: Sent s w -> [w] -> s -> s

-- | Words handler.
wordHandler :: Sent s w -> Word w

-- | Document format.
data Doc f s w
Doc :: (Text -> f s) -> (f s -> Text) -> Sent s w -> Doc f s w

-- | Parse textual interpretations into a functor with sentence elements.
parseDoc :: Doc f s w -> Text -> f s

-- | Show textual reprezentation of a document.
showDoc :: Doc f s w -> f s -> Text

-- | Sentence handler.
sentHandler :: Doc f s w -> Sent s w


-- | Observation schema blocks for Concraft.
module NLP.Concraft.Schema

-- | An observation consist of an index (of list type) and an actual
--   observation value.
type Ob = ([Int], Text)

-- | The Ox monad specialized to word token type and text observations.
type Ox t a = Ox (Word t) Text a

-- | A schema is a block of the Ox computation performed within the context
--   of the sentence and the absolute sentence position.
type Schema t a = Vector (Word t) -> Int -> Ox t a

-- | A dummy schema block.
void :: a -> Schema t a

-- | Sequence the list of schemas (or blocks) and discard individual
--   values.
sequenceS_ :: [Vector (Word t) -> a -> Ox t b] -> Vector (Word t) -> a -> Ox t ()

-- | Use the schema to extract observations from the sentence.
schematize :: Schema t a -> Sent t -> [[Ob]]

-- | Body of configuration entry.
data Body a
Body :: [Int] -> Bool -> a -> Body a

-- | Range argument for the schema block.
range :: Body a -> [Int]

-- | When true, the entry is used only for oov words.
oovOnly :: Body a -> Bool

-- | Additional arguments for the schema block.
args :: Body a -> a

-- | Maybe entry.
type Entry a = Maybe (Body a)

-- | Plain entry with no additional arugments.
entry :: [Int] -> Entry ()

-- | Entry with additional arguemnts.
entryWith :: a -> [Int] -> Entry a

-- | Configuration of the schema. All configuration elements specify the
--   range over which a particular observation type should be taken on
--   account. For example, the <tt>[-1, 0, 2]</tt> range means that
--   observations of particular type will be extracted with respect to
--   previous (<tt>k - 1</tt>), current (<tt>k</tt>) and after the next
--   (<tt>k + 2</tt>) positions when identifying the observation set for
--   position <tt>k</tt> in the input sentence.
data SchemaConf
SchemaConf :: Entry () -> Entry () -> Entry [Int] -> Entry [Int] -> Entry () -> Entry () -> Entry () -> Entry () -> SchemaConf

-- | The <a>orthB</a> schema block.
orthC :: SchemaConf -> Entry ()

-- | The <a>lowOrthB</a> schema block.
lowOrthC :: SchemaConf -> Entry ()

-- | The <a>lowPrefixesB</a> schema block. The first list of ints
--   represents lengths of prefixes.
lowPrefixesC :: SchemaConf -> Entry [Int]

-- | The <a>lowSuffixesB</a> schema block. The first list of ints
--   represents lengths of suffixes.
lowSuffixesC :: SchemaConf -> Entry [Int]

-- | The <a>knownB</a> schema block.
knownC :: SchemaConf -> Entry ()

-- | The <a>shapeB</a> schema block.
shapeC :: SchemaConf -> Entry ()

-- | The <a>packedB</a> schema block.
packedC :: SchemaConf -> Entry ()

-- | The <a>begPackedB</a> schema block.
begPackedC :: SchemaConf -> Entry ()

-- | Null configuration of the observation schema.
nullConf :: SchemaConf

-- | Build the schema based on the configuration.
fromConf :: SchemaConf -> Schema t ()

-- | Default configuration for the guessing observation schema.
guessConfDefault :: SchemaConf

-- | Default configuration for the guessing observation schema.
disambConfDefault :: SchemaConf

-- | A block is a chunk of the Ox computation performed within the context
--   of the sentence and the list of absolute sentence positions.
type Block t a = Vector (Word t) -> [Int] -> Ox t a

-- | Transform a block to a schema depending on * A list of relative
--   sentence positions, * A boolean value; if true, the block computation
--   will be performed only on positions where an OOV word resides.
fromBlock :: Block t a -> [Int] -> Bool -> Schema t a

-- | Orthographic form at the current position.
orthB :: Block t ()

-- | Orthographic form at the current position.
lowOrthB :: Block t ()

-- | List of lowercased prefixes of given lengths.
lowPrefixesB :: [Int] -> Block t ()

-- | List of lowercased suffixes of given lengths.
lowSuffixesB :: [Int] -> Block t ()

-- | Shape of the word.
knownB :: Block t ()

-- | Shape of the word.
shapeB :: Block t ()

-- | Packed shape of the word.
packedB :: Block t ()

-- | Packed shape of the word.
begPackedB :: Block t ()
instance Show a => Show (Body a)
instance Show SchemaConf
instance Binary SchemaConf
instance Binary a => Binary (Body a)

module NLP.Concraft.Guess

-- | A guessing model.
data Guesser t
Guesser :: SchemaConf -> CRF Ob t -> Guesser t
schemaConf :: Guesser t -> SchemaConf
crf :: Guesser t -> CRF Ob t

-- | Determine the <tt>k</tt> most probable labels for each word in the
--   sentence.
guess :: Ord t => Int -> Guesser t -> Sent t -> [[t]]

-- | Tag sentence in external format. Selected interpretations (tags
--   correct within the context) will be preserved.
guessSent :: Sent s w -> Int -> Guesser Tag -> s -> s

-- | Tag file.
guessDoc :: Functor f => Doc f s w -> Int -> Guesser Tag -> Text -> Text

-- | Include guessing results into the sentence.
include :: Ord t => Sent t -> [[t]] -> Sent t

-- | Training configuration.
data TrainConf
TrainConf :: SchemaConf -> SgdArgs -> TrainConf
schemaConfT :: TrainConf -> SchemaConf
sgdArgsT :: TrainConf -> SgdArgs

-- | Train guesser.
train :: Foldable f => Doc f s w -> TrainConf -> FilePath -> Maybe FilePath -> IO (Guesser Tag)
instance (Ord t, Binary t) => Binary (Guesser t)

module NLP.Concraft.Disamb

-- | A disambiguation model.
data Disamb
Disamb :: Tagset -> [Tier] -> SchemaConf -> CRF Ob Atom -> Disamb
tagset :: Disamb -> Tagset
tiers :: Disamb -> [Tier]
schemaConf :: Disamb -> SchemaConf
crf :: Disamb -> CRF Ob Atom

-- | CRF model data.
data CRF a b

-- | A tier description.
data Tier
Tier :: Bool -> Set Attr -> Tier

-- | Does it include the part of speech?
withPos :: Tier -> Bool

-- | Tier grammatical attributes.
withAtts :: Tier -> Set Attr

-- | An atomic part of morphosyntactic tag with optional POS.
data Atom
Atom :: Maybe POS -> Map Attr Text -> Atom
pos :: Atom -> Maybe POS
atts :: Atom -> Map Attr Text

-- | Default tiered tagging configuration.
tiersDefault :: [Tier]

-- | Perform context-sensitive disambiguation.
disamb :: Disamb -> Sent Tag -> [Tag]

-- | Tag the sentence.
disambSent :: Sent s w -> Disamb -> s -> s

-- | Disambiguate document.
disambDoc :: Functor f => Doc f s w -> Disamb -> Text -> Text

-- | Training configuration.
data TrainConf
TrainConf :: Tagset -> [Tier] -> SchemaConf -> SgdArgs -> TrainConf
tagsetT :: TrainConf -> Tagset
tiersT :: TrainConf -> [Tier]
schemaConfT :: TrainConf -> SchemaConf
sgdArgsT :: TrainConf -> SgdArgs

-- | Train disamb model.
train :: Foldable f => Doc f s w -> TrainConf -> FilePath -> Maybe FilePath -> IO Disamb
instance Binary Disamb


-- | Simple format for morphosyntax representation which assumes that all
--   tags have a textual representation with no spaces inside and that one
--   of the tags indicates unknown words.
module NLP.Concraft.Format.Plain

-- | A token.
data Token
Token :: Text -> Space -> Bool -> Map Interp Bool -> Token
orth :: Token -> Text
space :: Token -> Space
known :: Token -> Bool

-- | Interpretations of the token, each interpretation annotated with a
--   <i>disamb</i> Boolean value (if <a>True</a>, the interpretation is
--   correct within the context).
interps :: Token -> Map Interp Bool
data Interp
Interp :: Maybe Text -> Tag -> Interp
base :: Interp -> Maybe Text
tag :: Interp -> Tag

-- | No space, space or newline.
data Space
None :: Space
Space :: Space
NewLine :: Space

-- | Create document handler given value of the <i>ignore</i> tag.
plainFormat :: Tag -> Doc [] [Token] Token

-- | Parse the text in the plain format given the <i>oov</i> tag.
parsePlain :: Tag -> Text -> [[Token]]

-- | Parse the sentence in the plain format given the <i>oov</i> tag.
parseSent :: Tag -> Text -> [Token]

-- | Show the plain data.
showPlain :: Tag -> [[Token]] -> Text

-- | Show the sentence.
showSent :: Tag -> [Token] -> Text
instance Show Space
instance Eq Space
instance Ord Space
instance Show Interp
instance Eq Interp
instance Ord Interp
instance Show Token
instance Eq Token
instance Ord Token

module NLP.Concraft

-- | Concraft data.
data Concraft
Concraft :: Int -> Guesser Tag -> Disamb -> Concraft
guessNum :: Concraft -> Int
guesser :: Concraft -> Guesser Tag
disamb :: Concraft -> Disamb

-- | Perform disambiguation preceded by context-sensitive guessing.
tag :: Concraft -> Sent Tag -> [Tag]

-- | Tag the sentence.
tagSent :: Sent s w -> Concraft -> s -> s

-- | Tag document.
tagDoc :: Functor f => Doc f s w -> Concraft -> Text -> Text

-- | Train guessing and disambiguation models.
train :: (Functor f, Foldable f) => Doc f s w -> Int -> TrainConf -> TrainConf -> FilePath -> Maybe FilePath -> IO Concraft
instance Binary Concraft