-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Data types for named entities -- -- The library provides data types which can be used to represent forest -- structures with labels stored in internal nodes and words kept in -- leaves. In particular, those types are well suited for representing -- the layer of named entities (NEs). -- -- The IOB method is implemented in the Data.Named.IOB module and can be -- used to translate between a forest of entities and a sequence of -- compound IOB labels. This method can be used together with a sequence -- classifier to indirectly model forest structures. -- -- The Data.Named.Graph module can be used to represent more general, -- graph structures of entities. The module provides also a lossy -- conversion from a DAG to a disjoint forest of entities. @package data-named @version 0.2.0 -- | Parsing text in the Enamex data format. Each node is enclosed between -- opening and closing tags with tag name representing the label and -- contents representing children of the node. Both leaf and label values -- should be escaped by prepending the \ character before special >, -- <, \ and space characters. -- -- Example: -- --
--   >>> :m Data.Tree Data.Text Text.Named.Enamex
--   
--   >>> let drawIt = putStr . drawForest . fmap (fmap unpack) . parseForest
--   
--   >>> drawIt $ pack "<x>w1.1\\ w1.2</x> <y><z>w2</z> w3</y>"
--   x
--   |
--   `- w1.1 w1.2
--   ,
--   y
--   |
--   +- z
--   |  |
--   |  `- w2
--   |
--   `- w3
--   
module Text.Named.Enamex -- | Parse the enamex forest. parseForest :: Text -> Forest -- | Parse the enamex file. parseEnamex :: Text -> [Forest] -- | Map the first function over internal nodes and the second one over -- leaves. mapTwo :: (a -> b) -> (a -> c) -> Tree a -> Tree (Either b c) -- | IOB encoding method extended to forests. -- -- Example: -- --
--   >>> :m Data.Tree Data.Text Text.Named.Enamex Data.Named.IOB
--   
--   >>> let enamex = pack "<x>w1.1\\ w1.2</x> w2 <y><z>w3</z> w4</y>"
--   
--   >>> let parseIt = fmap (mapTwo id id . fmap unpack) . parseForest
--   
-- --
--   >>> putStr . drawForest . fmap (fmap show) . parseIt $ enamex
--   Left "x"
--   |
--   `- Right "w1.1 w1.2"
--   ,
--   Right "w2"
--   ,
--   Left "y"
--   |
--   +- Left "z"
--   |  |
--   |  `- Right "w3"
--   |
--   `- Right "w4"
--   
-- --
--   >>> mapM_ print . encodeForest . parseIt $ enamex
--   IOB {word = "w1.1 w1.2", label = [B "x"]}
--   IOB {word = "w2", label = []}
--   IOB {word = "w3", label = [B "y",B "z"]}
--   IOB {word = "w4", label = [I "y"]}
--   
module Data.Named.IOB -- | An IOB data structure consists of a word with a corresponding -- compound label. data IOB w a IOB :: w -> Label a -> IOB w a word :: IOB w a -> w label :: IOB w a -> Label a -- | A Label consists of a list of atomic Atom labels. type Label a = [Atom a] -- | An Atom is the atomic label with additional marker. data Atom a -- | Beginning marker B :: a -> Atom a -- | Inside marker I :: a -> Atom a -- | Encode the forest with the IOB method. encodeForest :: Forest (Either a w) -> [IOB w a] -- | Decode the forest using the IOB method. decodeForest :: Eq a => [IOB w a] -> Forest (Either a w) instance Show a => Show (Atom a) instance Eq a => Eq (Atom a) instance Ord a => Ord (Atom a) instance (Show w, Show a) => Show (IOB w a) -- | Working with NE trees and forests. module Data.Named.Tree -- | Combine the disjoint forest with the list of words. Discontinuities -- will be patched with no trace. addWords :: Ord k => Forest k -> [k] -> Forest k -- | Spanning of a tree. data Span Span :: Int -> Int -> Span beg :: Span -> Int end :: Span -> Int -- | Make span for a leaf node. leafSpan :: Int -> Span -- | Minimum span overlapping both input spans. (<>) :: Span -> Span -> Span -- | Set of positions covered by the span. spanSet :: Span -> IntSet -- | Get span of the span-annotated tree. span :: Tree (a, Span) -> Span -- | Annotate tree nodes with spanning info given the function which -- assignes indices to leaf nodes. spanTree :: (k -> Int) -> Tree k -> Tree (k, Span) -- | Annotate forest nodes with spanning info. spanForest :: (k -> Int) -> Forest k -> Forest (k, Span) -- | Remove span annotations from the tree. unSpanTree :: Tree (k, Span) -> Tree k -- | Remove span annotations from the forest. unSpanForest :: Forest (k, Span) -> Forest k -- | Sort the tree with respect to spanning info. sortTree :: Tree (k, Span) -> Tree (k, Span) -- | Sort the forest with respect to spanning info. sortForest :: Forest (k, Span) -> Forest (k, Span) -- | Map function over each tree from the forest. mapTrees :: (a -> b) -> Forest a -> Forest b instance Show Span instance Eq Span instance Ord Span -- | Implementation of a graph with each node identified by a unique key. -- It is a provisional module and it might be replaced by the standard -- graph from containers package in the future. module Data.Named.Graph -- | A graph. data Graph k v Graph :: Map k v -> Map k [k] -> Graph k v nodeMap :: Graph k v -> Map k v edgeMap :: Graph k v -> Map k [k] -- | Make a graph from a list of (key, value, [children keys]) tuples. mkGraph :: Ord k => [(k, v, [k])] -> Graph k v -- | Get node with the given key. node :: (Show k, Ord k) => Graph k v -> k -> v -- | Get keys of adjacent nodes for the given node key. edges :: (Show k, Ord k) => Graph k v -> k -> [k] -- | Return all graph roots (i.e. nodes with no parents). roots :: Ord k => Graph k v -> [k] -- | Spanning-like forest of a DAG. Trees in the resulting forest are -- disjoint with respect to their ranges. It is not checked if the input -- graph is actually a DAG. disjointForest :: (Show k, Ord k) => (k -> Int) -> Graph k v -> Forest k instance Monad RanM