-- | The module provides implementation of /directed acyclic word graphs/ -- (DAWGs) also known as /minimal acyclic finite-state automata/. -- The implementation provides fast insert and delete operations -- which can be used to build the DAWG structure incrementaly. module Data.DAWG ( DAWG (..) , empty , numStates , insert , delete , lookup , fromList , fromLang ) where import Prelude hiding (lookup) import Control.Applicative ((<$>), (<*>)) import Data.List (foldl') import Data.Binary (Binary, put, get) import qualified Control.Monad.State.Strict as S import Data.DAWG.Graph (Id, Node, Graph) import qualified Data.DAWG.Graph as G import qualified Data.DAWG.VMap as V type GraphM a b = S.State (Graph (Maybe a)) b mkState :: (Graph a -> Graph a) -> Graph a -> ((), Graph a) mkState f g = ((), f g) -- | Leaf node with no children and 'Nothing' value. leaf :: Node (Maybe a) leaf = G.Node { G.value = Nothing , G.edges = V.empty } -- | Return node with the given identifier. nodeBy :: Id -> GraphM a (Node (Maybe a)) nodeBy i = G.nodeBy i <$> S.get -- Evaluate the 'G.insert' function within the monad. insertNode :: Ord a => Node (Maybe a) -> GraphM a Id insertNode = S.state . G.insert -- Evaluate the 'G.delete' function within the monad. deleteNode :: Ord a => Node (Maybe a) -> GraphM a () deleteNode = S.state . mkState . G.delete insertM :: Ord a => String -> a -> Id -> GraphM a Id insertM [] y i = do n <- nodeBy i deleteNode n insertNode (n { G.value = Just y }) insertM (x:xs) y i = do n <- nodeBy i j <- case G.onChar x n of Just j -> return j Nothing -> insertNode leaf k <- insertM xs y j deleteNode n insertNode (G.subst x k n) deleteM :: Ord a => String -> Id -> GraphM a Id deleteM [] i = do n <- nodeBy i deleteNode n insertNode (n { G.value = Nothing }) deleteM (x:xs) i = do n <- nodeBy i case G.onChar x n of Nothing -> return i Just j -> do k <- deleteM xs j deleteNode n insertNode (G.subst x k n) lookupM :: String -> Id -> GraphM a (Maybe a) lookupM [] i = G.value <$> nodeBy i lookupM (x:xs) i = do n <- nodeBy i case G.onChar x n of Just j -> lookupM xs j Nothing -> return Nothing -- | A 'G.Graph' with one root from which all other graph nodes should -- be accesible. data DAWG a = DAWG { graph :: !(Graph (Maybe a)) , root :: !Id } deriving (Show, Eq, Ord) instance (Ord a, Binary a) => Binary (DAWG a) where put d = do put (graph d) put (root d) get = DAWG <$> get <*> get -- | Empty DAWG. empty :: Ord a => DAWG a empty = let (i, g) = G.insert leaf G.empty in DAWG g i -- | Number of states in the underlying graph. numStates :: DAWG a -> Int numStates = G.size . graph -- | Insert the (key, value) pair into the DAWG. insert :: Ord a => String -> a -> DAWG a -> DAWG a insert xs y d = let (i, g) = S.runState (insertM xs y $ root d) (graph d) in DAWG g i -- | Delete the key from the DAWG. delete :: Ord a => String -> DAWG a -> DAWG a delete xs d = let (i, g) = S.runState (deleteM xs $ root d) (graph d) in DAWG g i -- | Find value associated with the key. lookup :: String -> DAWG a -> Maybe a lookup xs d = S.evalState (lookupM xs $ root d) (graph d) -- | Construct DAWG from the list of (word, value) pairs. fromList :: Ord a => [(String, a)] -> DAWG a fromList xs = let update t (x, v) = insert x v t in foldl' update empty xs -- | Make DAWG from the list of words. Annotate each word with -- the @()@ value. fromLang :: [String] -> DAWG () fromLang xs = fromList [(x, ()) | x <- xs]