{-# LANGUAGE StrictData #-} module Little.Earley.Internal.Core where import Control.Monad.State import Data.Foldable (for_) import Data.Maybe (fromMaybe) import Data.Sequence (Seq) import qualified Data.Sequence as Seq import Data.Set (Set) import qualified Data.Set as Set -- | Grammars with non-terminal symbols @n@, terminal symbols @t@, and tokens @c@. -- -- A grammar defines a language, which is a set of sequences of tokens @c@. -- -- Two basic choices for @t@ and @c@ are: -- -- - @t = 'Little.Earley.Examples.CharT'@ and @c = Char@, -- with @'match' = 'Little.Earley.Examples.matchCharT'@: then the input @[c]@ is a @String@. -- -- - @t = String@ and @c = String@, with @'match' = (==)@: -- then the input @[c]@ is a @[String]@, which can be produced using 'words'; -- just remember to put spaces around operators and parentheses. -- -- See also examples in "Little.Earley.Examples". data Grammar n t c = Grammar { rules :: n -> [Rule n t] -- ^ Production rules associated with each non-terminal symbol. , match :: t -> c -> Bool -- ^ Match a token @c@ with a terminal symbol @t@. , isNullable :: n -> Bool -- ^ Predicate for non-terminal symbols which may expand to the empty string. -- This function MUST be correct for the library to work. -- It can be populated automatically using 'mkGrammar'. } -- | Construct a grammar given the fields 'rules' and 'match', -- implicitly populating 'isNullable'. mkGrammar :: (Ord n, Bounded n, Enum n) => (n -> [Rule n t]) -> (t -> c -> Bool) -> Grammar n t c mkGrammar rs mc = Grammar { rules = rs , match = mc , isNullable = (`Set.member` vs) } where vs = nullableSymbols rs [minBound .. maxBound] -- | Compute the set of non-terminal symbols which may expand to the empty string, -- given an enumeration of all non-terminal symbols. nullableSymbols :: Ord n => (n -> [Rule n t]) -> [n] -> Set n nullableSymbols g ns = loop Set.empty where loop vs = let vs' = Set.fromList (filter (isNullable_ vs) ns) in if vs == vs' then vs else loop vs' isNullable_ vs n = any (all (isNullableS vs)) (g n) isNullableS vs (N n) = n `Set.member` vs isNullableS _vs (T _) = False -- | A production rule is a sequence of atoms. type Rule n t = [Atom n t] -- | An atom is either a non-terminal or a terminal. data Atom n t = N n | T t deriving (Eq, Ord, Show) -- | A rule can be identified by a non-terminal and an index into all -- of the associated rules of that non-terminal. data RuleId n = RuleId n Int deriving (Eq, Ord, Show) data Item n t = Item (RuleId n) [Atom n t] Int deriving (Eq, Ord, Show) data S n t = S { itemSets :: Seq (Set (Item n t)) , currItemSet :: Set (Item n t) , nextItemSet :: Set (Item n t) , currIndex :: Int , todo :: [Item n t] } deriving (Eq, Show) emptyS :: S n t emptyS = S { itemSets = Seq.empty , currItemSet = Set.empty , nextItemSet = Set.empty , currIndex = 0 , todo = [] } initialS :: (Ord n, Ord t) => Grammar n t c -> n -> S n t initialS g n = emptyS { currItemSet = Set.fromList (newItems g 0 n) } newItems :: Ord n => Grammar n t c -> Int -> n -> [Item n t] newItems g i n = let rs = rules g n in map (\(ri_, r) -> Item (RuleId n ri_) r i) (zip [0 ..] rs) allItemSets :: S n t -> Seq (Set (Item n t)) allItemSets s = itemSets s Seq.|> currItemSet s Seq.|> nextItemSet s type Parser n t = State (S n t) next :: Parser n t () next = modify' (\s -> let items | currIndex s == -1 = Seq.empty | otherwise = itemSets s Seq.|> currItemSet s in s { itemSets = items , currItemSet = nextItemSet s , nextItemSet = Set.empty , currIndex = currIndex s + 1 , todo = [] }) addItemCurr :: (Ord n, Ord t) => Item n t -> Parser n t () addItemCurr item = do s <- get if Set.member item (currItemSet s) then pure () else do put (s { currItemSet = Set.insert item (currItemSet s) , todo = item : todo s }) -- Invariant: @i <= currIndex s@ -- In particular, never look up @nextItemSet s@. lookupItemSet :: Int -> Parser n t (Set (Item n t)) lookupItemSet i = gets (\s -> if i == currIndex s then currItemSet s else fromMaybe (error "Should exist") (Seq.lookup i (itemSets s))) addItemNext :: (Ord n, Ord t) => Item n t -> Parser n t () addItemNext item = do modify' (\s -> s { nextItemSet = Set.insert item (nextItemSet s) }) stepItem :: (Ord n, Ord t) => Grammar n t c -> Maybe c -> Item n t -> Parser n t () stepItem g _c (Item ri (N n : xs) iStart) = do i <- gets currIndex for_ (newItems g i n) addItemCurr if isNullable g n then addItemCurr (Item ri xs iStart) else pure () stepItem _ _ (Item (RuleId n _) [] iStart) = do sset <- lookupItemSet iStart for_ sset (\(Item ri' xs' iStart') -> case xs' of N n' : xs1' | n == n' -> addItemCurr (Item ri' xs1' iStart') _ -> pure ()) stepItem g (Just c) (Item ri (T t : xs) iStart) | match g t c = addItemNext (Item ri xs iStart) stepItem _ _ (Item _ (T _ : _) _) = pure () step :: (Ord n, Ord t) => Grammar n t c -> Maybe c -> Parser n t () step g c = gets (Set.toList . currItemSet) >>= go where go [] = pure () go td = do modify' (\s -> s { todo = [] }) for_ td (stepItem g c) gets todo >>= go steps :: (Ord n, Ord t) => Grammar n t c -> [c] -> Parser n t () steps g = go where go (c : cs) = step g (Just c) >> next >> go cs go [] = step g Nothing data Seq1 a = Seq a ::> a deriving (Eq, Show) -- Returns nonempty sequence. preparse :: (Ord n, Ord t) => Grammar n t c -> n -> [c] -> Seq1 (Set (Item n t)) preparse g n cs = itemSets' (execState (steps g cs) (initialS g n)) where itemSets' s = itemSets s ::> currItemSet s -- nextItemSet should be empty at the end after (step g Nothing) -- | Check whether a grammar matches a chain of character @[c]@ from a starting symbol @n@. accepts :: (Ord n, Ord t) => Grammar n t c -> n -> [c] -> Bool accepts g n cs = case preparse g n cs of _ ::> items -> (not . null) [() | Item (RuleId n' _) [] 0 <- Set.toList items, n == n']