-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Data types for named entities -- -- The library provides data types which can be used to represent forest -- structures with labels stored in internal nodes and words kept in -- leaves. In particular, those types are well suited for representing -- the layer of named entities (NEs). -- -- The IOB method is implemented in the Data.Named.IOB module and can be -- used to translate between a forest of entities and a sequence of -- compound IOB labels. This method can be used together with a sequence -- classifier to indirectly model forest structures. -- -- The Data.Named.Graph module can be used to represent more general, -- graph structures of entities. The module provides also a lossy -- conversion from a DAG to a forest of entities. @package data-named @version 0.1.0 -- | Parsing text in the Enamex data format. Each node is enclosed between -- opening and closing tags with tag name representing the label and -- contents representing children of the node. Both leaf and label values -- should be escaped by prepending the '\' character before special ' ' -- (space), >, < and '\' characters. -- -- Example: -- --
-- >>> :m Data.Tree Data.Text Text.Named.Enamex -- -- >>> let drawIt = putStr . drawForest . fmap (fmap unpack) . parseForest -- -- >>> drawIt $ pack "<x>w1.1\\ w1.2</x> <y><z>w2</z> w3</y>" -- x -- | -- `- w1.1 w1.2 -- , -- y -- | -- +- z -- | | -- | `- w2 -- | -- `- w3 --module Text.Named.Enamex -- | Parse the enamex forest. parseForest :: Text -> Forest -- | Parse the enamex file. parseEnamex :: Text -> [Forest] -- | Map the first function over internal nodes and the second one over -- leaves. mapTwo :: (a -> b) -> (a -> c) -> Tree a -> Tree (Either b c) -- | IOB encoding method extended to forests. -- -- Example: -- --
-- >>> :m Data.Tree Data.Text Text.Named.Enamex Data.Named.IOB -- -- >>> let enamex = pack "<x>w1.1\\ w1.2</x> w2 <y><z>w3</z> w4</y>" -- -- >>> let parseIt = fmap (mapTwo id id . fmap unpack) . parseForest ---- --
-- >>> putStr . drawForest . fmap (fmap show) . parseIt $ enamex -- Left "x" -- | -- `- Right "w1.1 w1.2" -- , -- Right "w2" -- , -- Left "y" -- | -- +- Left "z" -- | | -- | `- Right "w3" -- | -- `- Right "w4" ---- --
-- >>> mapM_ print . encodeForest . parseIt $ enamex
-- IOB {word = "w1.1 w1.2", label = [B "x"]}
-- IOB {word = "w2", label = []}
-- IOB {word = "w3", label = [B "y",B "z"]}
-- IOB {word = "w4", label = [I "y"]}
--
module Data.Named.IOB
-- | An IOB data structure consists of a word with a corresponding
-- compound label.
data IOB w a
IOB :: w -> Label a -> IOB w a
word :: IOB w a -> w
label :: IOB w a -> Label a
-- | A Label consists of a list of atomic Atom labels.
type Label a = [Atom a]
-- | An Atom is the atomic label with additional marker.
data Atom a
-- | Beginning marker
B :: a -> Atom a
-- | Inside marker
I :: a -> Atom a
-- | Encode the forest with the IOB method.
encodeForest :: Forest (Either a w) -> [IOB w a]
-- | Decode the forest using the IOB method.
decodeForest :: Eq a => [IOB w a] -> Forest (Either a w)
instance Show a => Show (Atom a)
instance Eq a => Eq (Atom a)
instance Ord a => Ord (Atom a)
instance (Show w, Show a) => Show (IOB w a)
-- | Implementation of a graph with each node identified by a unique key.
-- It is a provisional module and it might be replaced by the standard
-- graph from containers package in the future.
module Data.Named.Graph
-- | A graph.
data Graph k v
Graph :: Map k v -> Map k [k] -> Graph k v
nodeMap :: Graph k v -> Map k v
edgeMap :: Graph k v -> Map k [k]
-- | Make a graph from a list of (key, value, [children keys]) tuples.
mkGraph :: Ord k => [(k, v, [k])] -> Graph k v
-- | Get node with the given key.
node :: (Show k, Ord k) => Graph k v -> k -> v
-- | Get keys of adjacent nodes for the given node key.
edges :: (Show k, Ord k) => Graph k v -> k -> [k]
-- | Return all graph roots (i.e. nodes with no parents).
roots :: Ord k => Graph k v -> [k]
-- | Make a tree rooted in the node with respect to the graph.
toTree :: (Show k, Ord k) => Graph k v -> k -> Tree v
-- | Make a key tree rooted in the node with respect to the graph.
toKeyTree :: (Show k, Ord k) => Graph k v -> k -> Tree k
-- | Transform graph into a forest given the priority function. That is,
-- trees with higher priorities will be taken first, while those with
-- lower priorities might be trimmed down (since we don't want to have
-- nodes with multiple parents in the resulting forest).
toForestWith :: (Show k, Ord k, Ord a) => (Tree v -> a) -> Graph k v -> Forest v
-- | Transform graph into a forest. It removes duplicate nodes from trees
-- chosing trees in an arbitrary order.
toForest :: (Show k, Ord k) => Graph k v -> Forest v