{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE FlexibleContexts #-} -- | Abstract implementation of an automaton (or a set of automata, -- in general). `Auto` provides a minimal interface needed to -- use automata in parsing and thus allows to use one of the -- concrete implementations provided by the library: -- -- * "NLP.Partage.Auto.DAWG": directed acyclic word graph -- * "NLP.Partage.Auto.Trie": prefix tree -- * "NLP.Partage.Auto.List": set of lists -- * "NLP.Partage.Auto.Set": set of automata, one automaton per -- `Head` non-terminal module NLP.Partage.Auto ( -- * Automata Auto (..) , Edge (..) , GramAuto -- * Utilities , allIDs , allEdges ) where import qualified Control.Monad.State.Strict as E import qualified Data.Set as S import Data.DAWG.Ord (ID) import NLP.Partage.FactGram (Lab(..)) -- | A datatype used to distinguish head non-terminals from body -- non-terminals in automata-based grammar representation. data Edge a = Head a | Body a deriving (Show, Eq, Ord) -- | Minimal automaton implementation. -- Multiple roots are allowed in order to account for -- list implementation of an automaton. data Auto a = Auto { roots :: S.Set ID -- ^ Set of automata roots , follow :: ID -> a -> Maybe ID -- ^ Follow a transition with the given symbol from the given node , edges :: ID -> [(a, ID)] -- ^ List of outgoing edges (transitions) } -- | Automaton type specialized to represent grammar rules. type GramAuto n t = Auto (Edge (Lab n t)) -- | Extract the set of underlying IDs. allIDs :: Ord a => Auto a -> S.Set ID allIDs d = S.fromList $ concat [[i, j] | (i, _, j) <- allEdges d] -- | Return the list of automaton transitions. allEdges :: Ord a => Auto a -> [(ID, a, ID)] allEdges = S.toList . walk -- | Traverse the automaton and collect all the edges. walk :: Ord a => Auto a -> S.Set (ID, a, ID) walk Auto{..} = flip E.execState S.empty $ flip E.evalStateT S.empty $ mapM_ doit (S.toList roots) where -- The embedded state serves to store the resulting set of -- transitions; the surface state serves to keep track of -- already visited nodes. doit i = do b <- E.gets $ S.member i E.when (not b) $ do E.modify $ S.insert i E.forM_ (edges i) $ \(x, j) -> do E.lift . E.modify $ S.insert (i, x, j) doit j