-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Morphosyntactic tagging tool based on constrained CRFs -- -- A morphosyntactic tagging tool based on constrained conditional random -- fields. @package concraft @version 0.3.0 -- | The module provides functions for splitting positional tags. They can -- be used together with the layered disambiguation model. module NLP.Concraft.Disamb.Positional -- | A tier description. data Tier Tier :: Bool -> Set Attr -> Tier -- | Does it include the part of speech? withPos :: Tier -> Bool -- | Tier grammatical attributes. withAtts :: Tier -> Set Attr -- | An atomic part of morphosyntactic tag with optional POS. data Part Part :: Maybe POS -> Map Attr Text -> Part pos :: Part -> Maybe POS atts :: Part -> Map Attr Text -- | Select tier attributes. select :: Tier -> Tag -> Part -- | Split the positional tag. split :: [Tier] -> Tag -> [Part] -- | Default tiered tagging configuration. tierConfDefault :: [Tier] instance Show Part instance Eq Part instance Ord Part instance Binary Part instance Binary Tier -- | Types and functions related to the morphosyntax data layer. module NLP.Concraft.Morphosyntax -- | A sentence of Words. type Sent t = [Word t] -- | A word parametrized over a tag type. data Word t Word :: Text -> WMap t -> Bool -> Word t -- | Orthographic form. orth :: Word t -> Text -- | Set of word interpretations. To each interpretation a weight of -- correctness within the context is assigned. tagWMap :: Word t -> WMap t -- | Out-of-vocabulary (OOV) word, i.e. word unknown to the morphosyntactic -- analyser. oov :: Word t -> Bool -- | Map function over word tags. mapWord :: Ord b => (a -> b) -> Word a -> Word b -- | Map function over sentence tags. mapSent :: Ord b => (a -> b) -> Sent a -> Sent b -- | Interpretations of the word. interpsSet :: Word t -> Set t -- | Interpretations of the word. interps :: Word t -> [t] -- | A weighted collection of type a elements. data WMap a -- | Make a weighted collection. mkWMap :: Ord a => [(a, Double)] -> WMap a -- | Map function over weighted collection elements. mapWMap :: Ord b => (a -> b) -> WMap a -> WMap b instance Show a => Show (WMap a) instance Eq a => Eq (WMap a) instance Ord a => Ord (WMap a) instance Show t => Show (Word t) instance Eq t => Eq (Word t) instance Ord t => Ord (Word t) -- | The module provides several abstractions for representing external -- data formats. Concraft will be able to work with any format which -- implements those abstractions. module NLP.Concraft.Format -- | Textual representation of morphposyntactic tag. type Tag = Text -- | Words handler. data Word w Word :: (w -> Word Tag) -> (WMap Tag -> w -> w) -> Word w -- | Extract information relevant for tagging. extract :: Word w -> w -> Word Tag -- | Select the set of morphosyntactic interpretations. select :: Word w -> WMap Tag -> w -> w -- | Sentence handler. data Sent s w Sent :: (s -> [w]) -> ([w] -> s -> s) -> Word w -> Sent s w -- | Split sentence into a list of words. parseSent :: Sent s w -> s -> [w] -- | Merge words with a sentence. mergeSent :: Sent s w -> [w] -> s -> s -- | Words handler. wordHandler :: Sent s w -> Word w -- | Document format. data Doc f s w Doc :: (Text -> f s) -> (f s -> Text) -> Sent s w -> Doc f s w -- | Parse textual interpretations into a functor with sentence elements. parseDoc :: Doc f s w -> Text -> f s -- | Show textual reprezentation of a document. showDoc :: Doc f s w -> f s -> Text -- | Sentence handler. sentHandler :: Doc f s w -> Sent s w -- | Simple format for morphosyntax representation which assumes that all -- tags have a textual representation with no spaces inside and that one -- of the tags indicates unknown words. module NLP.Concraft.Format.Plain -- | Create document handler given value of the ignore tag. plainFormat :: Tag -> Doc [] [Token] Token instance Show Space instance Eq Space instance Ord Space instance Show Interp instance Eq Interp instance Ord Interp instance Show Token instance Eq Token instance Ord Token module NLP.Concraft.Schema -- | A schema is a block of the Ox computation performed within the context -- of the sentence and the absolute sentence position. type Schema t a = Vector (Word t) -> Int -> Ox t a -- | The Ox monad specialized to word token type and text observations. type Ox t a = Ox (Word t) Text a -- | An observation consist of an index (of list type) and an actual -- observation value. type Ob = ([Int], Text) -- | Default guessing schema. guessSchemaDefault :: Schema t () -- | Default disambiguation schema. disambSchemaDefault :: Schema t () module NLP.Concraft.Guess -- | A guesser represented by the conditional random field. newtype Guesser t Guesser :: CRF Ob t -> Guesser t crf :: Guesser t -> CRF Ob t -- | Determine the k most probable labels for each word in the -- sentence. guess :: Ord t => Int -> Schema t a -> Guesser t -> Sent t -> [[t]] -- | Include guessing results into the sentence. include :: Ord t => Sent t -> [[t]] -> Sent t -- | Tag sentence in external format. Selected interpretations (tags -- correct within the context) will be preserved. guessSent :: Sent s w -> Int -> Schema Tag a -> Guesser Tag -> s -> s -- | Tag file. guessDoc :: Functor f => Doc f s w -> Int -> Schema Tag a -> Guesser Tag -> Text -> Text -- | Train guesser. trainOn :: Foldable f => Doc f s w -> Schema Tag a -> SgdArgs -> FilePath -> Maybe FilePath -> IO (Guesser Tag) instance (Ord t, Binary t) => Binary (Guesser t) module NLP.Concraft.Disamb -- | Split is just a function from an original tag form to a complex tag -- form. type Split r t = r -> t -- | CRF training function. type TrainCRF o t c = IO [SentL o t] -> Maybe (IO [SentL o t]) -> IO c -- | CRF tagging function. type TagCRF o t = Sent o t -> [t] -- | Perform context-sensitive disambiguation. disamb :: (Ord r, Ord t) => Schema t a -> Split r t -> TagCRF Ob t -> Sent r -> [r] -- | Tag the sentence. disambSent :: Ord t => Sent s w -> Schema t a -> Split Tag t -> TagCRF Ob t -> s -> s -- | Disambiguate document. disambDoc :: (Functor f, Ord t) => Doc f s w -> Schema t a -> Split Tag t -> TagCRF Ob t -> Text -> Text -- | Train disamb model. trainOn :: (Foldable f, Ord t) => Doc f s w -> Schema t a -> Split Tag t -> TrainCRF Ob t c -> FilePath -> Maybe FilePath -> IO c module NLP.Concraft.Disamb.Tiered -- | Observation. newtype Ob Ob :: Int -> Ob unOb :: Ob -> Int -- |