module NLP.Adict.Trie.Internal
( TrieM
, Trie (..)
, empty
, unTrie
, child
, anyChild
, mkTrie
, setValue
, substChild
, insert
, size
, follow
, lookup
, fromLang
, fromList
, toList
, serialize
, deserialize
, implicitDAWG
) where
import Prelude hiding (lookup)
import Control.Applicative ((<$>), (<*>))
import Control.Monad ((>=>))
import Data.List (foldl')
import Data.Binary (Binary, get, put)
import qualified Data.Map as M
import NLP.Adict.Node
type TrieM a b = Trie a (Maybe b)
data Trie a b = Trie {
rootValue :: b,
edgeMap :: M.Map a (Trie a b)
} deriving (Show, Eq, Ord)
instance Functor (Trie a) where
fmap f Trie{..} = Trie (f rootValue) (fmap (fmap f) edgeMap)
instance (Ord a, Binary a, Binary b) => Binary (Trie a b) where
put Trie{..} = do
put rootValue
put edgeMap
get = Trie <$> get <*> get
unTrie :: Trie a b -> (b, [(a, Trie a b)])
unTrie t = (rootValue t, M.toList $ edgeMap t)
child :: Ord a => a -> Trie a b -> Maybe (Trie a b)
child x Trie{..} = x `M.lookup` edgeMap
anyChild :: Trie a b -> [(a, Trie a b)]
anyChild = snd . unTrie
mkTrie :: Ord a => b -> [(a, Trie a b)] -> Trie a b
mkTrie !v !cs = Trie v (M.fromList cs)
empty :: Ord a => TrieM a b
empty = mkTrie Nothing []
setValue :: b -> Trie a b -> Trie a b
setValue !x !t = t { rootValue = x }
substChild :: Ord a => a -> Trie a b -> Trie a b -> Trie a b
substChild !x !trie !newChild =
let how _ = Just newChild
!edges = M.alter how x (edgeMap trie)
in trie { edgeMap = edges }
insert :: Ord a => [a] -> b -> TrieM a b -> TrieM a b
insert [] v t = setValue (Just v) t
insert (x:xs) v t = substChild x t . insert xs v $
case child x t of
Just t' -> t'
Nothing -> empty
size :: Trie a b -> Int
size t = 1 + sum (map (size.snd) (anyChild t))
follow :: Ord a => [a] -> Trie a b -> Maybe (Trie a b)
follow xs t = foldr (>=>) return (map child xs) t
lookup :: Ord a => [a] -> TrieM a b -> Maybe b
lookup xs t = follow xs t >>= rootValue
fromList :: Ord a => [([a], b)] -> TrieM a b
fromList xs =
let update t (x, v) = insert x v t
in foldl' update empty xs
toList :: TrieM a b -> [([a], b)]
toList t = case rootValue t of
Just y -> ([], y) : lower
Nothing -> lower
where
lower = concatMap goDown $ anyChild t
goDown (x, t') = map (addChar x) $ toList t'
addChar x (xs, y) = (x:xs, y)
fromLang :: Ord a => [[a]] -> TrieM a ()
fromLang xs = fromList [(x, ()) | x <- xs]
implicitDAWG :: (Ord a, Ord b) => Trie a b -> Trie a b
implicitDAWG = deserialize . serialize
serialize :: (Ord a, Ord b) => Trie a b -> [Node a b]
serialize r =
[ mkNode (rootValue t)
[ (c, m M.! s)
| (c, s) <- anyChild t ]
| t <- M.elems m' ]
where
m = collect r
m' = M.fromList [(y, x) | (x, y) <- M.toList m]
deserialize :: Ord a => [Node a b] -> Trie a b
deserialize =
snd . M.findMax . foldl' update M.empty
where
update m n =
let t = mkTrie (nodeValue n) [(c, m M.! k) | (c, k) <- nodeEdges n]
in M.insert (M.size m) t m
collect :: (Ord a, Ord b) => Trie a b -> M.Map (Trie a b) Int
collect t = collect' M.empty t
collect' :: (Ord a, Ord b) => M.Map (Trie a b) Int
-> Trie a b -> M.Map (Trie a b) Int
collect' m0 t = M.alter f t m
where
!m = foldl' collect' m0 (M.elems $ edgeMap t)
!k = M.size m
f Nothing = Just k
f (Just x) = Just x