{-# LANGUAGE CPP, BangPatterns, DeriveFunctor, GADTs, Rank2Types #-} -- | This module exposes the internals of the package: its API may change independently of the PVP-compliant version number. module Text.Earley.Internal where import Control.Applicative import Control.Arrow import Control.Monad import Control.Monad.Fix import Control.Monad.ST import Data.ListLike(ListLike) import qualified Data.ListLike as ListLike import Data.STRef import Text.Earley.Grammar #if !MIN_VERSION_base(4,8,0) import Data.Monoid #endif ------------------------------------------------------------------------------- -- * Concrete rules and productions ------------------------------------------------------------------------------- -- | The concrete rule type that the parser uses data Rule s r e t a = Rule { ruleProd :: ProdR s r e t a , ruleNullable :: !(STRef s (Maybe [a])) , ruleConts :: !(STRef s (STRef s [Cont s r e t a r])) } type ProdR s r e t a = Prod (Rule s r) e t a nullable :: Rule s r e t a -> ST s [a] nullable r = do mn <- readSTRef $ ruleNullable r case mn of Just xs -> return xs Nothing -> do writeSTRef (ruleNullable r) $ Just mempty res <- nullableProd $ ruleProd r writeSTRef (ruleNullable r) $ Just res return res nullableProd :: ProdR s r e t a -> ST s [a] nullableProd (Terminal _ _) = return mempty nullableProd (NonTerminal r p) = do as <- nullable r concat <$> mapM (\a -> nullableProd $ fmap ($ a) p) as nullableProd (Pure a) = return [a] nullableProd (Alts as p) = (\ass fs -> fs <*> concat ass) <$> mapM nullableProd as <*> nullableProd p nullableProd (Many p q) = do as <- nullableProd $ (:[]) <$> p <|> pure [] concat <$> mapM (\a -> nullableProd $ fmap ($ a) q) as nullableProd (Named p _) = nullableProd p resetConts :: Rule s r e t a -> ST s () resetConts r = writeSTRef (ruleConts r) =<< newSTRef [] -- | If we have something of type @f@, @'Args' s f a@ is what we need to do to -- @f@ to produce @a@s. type Args s f a = f -> ST s [a] noArgs :: Args s a a noArgs = return . pure funArg :: (f -> a) -> Args s f a funArg f = mapArgs f noArgs pureArg :: x -> Args s f a -> Args s (x -> f) a pureArg x args = args . ($ x) pureArgs :: [x] -> Args s f a -> Args s (x -> f) a pureArgs xs args f = concat <$> mapM (args . f) xs impureArgs :: ST s [x] -> Args s f a -> Args s (x -> f) a impureArgs mxs args f = fmap concat . mapM (args . f) =<< mxs mapArgs :: (a -> b) -> Args s f a -> Args s f b mapArgs = fmap . fmap . fmap composeArgs :: Args s a b -> Args s b c -> Args s a c composeArgs ab bc a = fmap concat . mapM bc =<< ab a ------------------------------------------------------------------------------- -- * States and continuations ------------------------------------------------------------------------------- type Pos = Int -- | An Earley state with result type @a@. data State s r e t a where State :: !Pos -> !(ProdR s r e t f) -> !(Args s f b) -> !(Conts s r e t b a) -> State s r e t a Final :: f -> Args s f a -> State s r e t a -- | A continuation accepting an @a@ and producing a @b@. data Cont s r e t a b where Cont :: !Pos -> !(Args s a b) -> !(ProdR s r e t (b -> c)) -> !(Args s c d) -> !(Conts s r e t d e') -> Cont s r e t a e' FinalCont :: Args s a c -> Cont s r e t a c data Conts s r e t a c = Conts { conts :: !(STRef s [Cont s r e t a c]) , contsArgs :: !(STRef s (Maybe (STRef s (ST s [a])))) } newConts :: STRef s [Cont s r e t a c] -> ST s (Conts s r e t a c) newConts r = Conts r <$> newSTRef Nothing contraMapCont :: Args s b a -> Cont s r e t a c -> Cont s r e t b c contraMapCont f (Cont cpos g p args cs) = Cont cpos (composeArgs f g) p args cs contraMapCont f (FinalCont args) = FinalCont (composeArgs f args) contToState :: ST s [a] -> Cont s r e t a c -> State s r e t c contToState r (Cont cpos g p args cs) = let mb = fmap concat . mapM g =<< r in State cpos p (impureArgs mb args) cs contToState r (FinalCont args) = Final id (impureArgs r args) -- | Strings of non-ambiguous continuations can be optimised by removing -- indirections. simplifyCont :: Conts s r e t b a -> ST s [Cont s r e t b a] simplifyCont Conts {conts = cont} = readSTRef cont >>= go False where go !_ [Cont _ g (Pure f) args cont'] = do ks' <- simplifyCont cont' go True $ map (contraMapCont $ mapArgs f g `composeArgs` args) ks' go True ks = do writeSTRef cont ks return ks go False ks = return ks ------------------------------------------------------------------------------- -- * Grammars ------------------------------------------------------------------------------- -- | Interpret an abstract 'Grammar'. grammar :: Grammar (Rule s r) e a -> ST s a grammar g = case g of RuleBind p k -> do c <- newSTRef =<< newSTRef mempty nr <- newSTRef Nothing grammar $ k $ NonTerminal (Rule p nr c) $ Pure id FixBind f k -> do a <- mfix $ fmap grammar f grammar $ k a Return x -> return x -- | Given a grammar, construct an initial state. initialState :: ProdR s a e t a -> ST s (State s a e t a) initialState p = State (-1) p noArgs <$> (newConts =<< newSTRef [FinalCont noArgs]) ------------------------------------------------------------------------------- -- * Parsing ------------------------------------------------------------------------------- -- | A parsing report, which contains fields that are useful for presenting -- errors to the user if a parse is deemed a failure. Note however that we get -- a report even when we successfully parse something. data Report e i = Report { position :: Int -- ^ The final position in the input (0-based) that the -- parser reached. , expected :: [e] -- ^ The named productions processed at the final -- position. , unconsumed :: i -- ^ The part of the input string that was not consumed, -- which may be empty. } deriving (Eq, Ord, Read, Show) -- | The result of a parse. data Result s e i a = Ended (Report e i) -- ^ The parser ended. | Parsed (ST s [a]) Int i (ST s (Result s e i a)) -- ^ The parser parsed a number of @a@s. These are given as a computation, -- @'ST' s [a]@ that constructs the 'a's when run. We can thus save some -- work by ignoring this computation if we do not care about the results. -- The 'Int' is the position in the input where these results were -- obtained, the @i@ the rest of the input, and the last component is the -- continuation. deriving Functor {-# INLINE safeHead #-} safeHead :: ListLike i t => i -> Maybe t safeHead ts | ListLike.null ts = Nothing | otherwise = Just $ ListLike.head ts {-# INLINE safeTail #-} safeTail :: ListLike i t => i -> i safeTail ts | ListLike.null ts = ts | otherwise = ListLike.tail ts data ParseEnv s e i t a = ParseEnv { results :: ![ST s [a]] -- ^ Results ready to be reported (when this position has been processed) , next :: ![State s a e t a] -- ^ States to process at the next position , reset :: !(ST s ()) -- ^ Computation that resets the continuation refs of productions , names :: ![e] -- ^ Named productions encountered at this position , pos :: !Pos -- ^ The current position in the input string , input :: !i -- ^ The input string } {-# INLINE emptyParseEnv #-} emptyParseEnv :: i -> ParseEnv s e i t a emptyParseEnv i = ParseEnv { results = mempty , next = mempty , reset = return () , names = mempty , pos = 0 , input = i } {-# SPECIALISE parse :: [State s a e t a] -> ParseEnv s e [t] t a -> ST s (Result s e [t] a) #-} -- | The internal parsing routine parse :: ListLike i t => [State s a e t a] -- ^ States to process at this position -> ParseEnv s e i t a -> ST s (Result s e i a) parse [] env@ParseEnv {results = [], next = []} = do reset env return $ Ended Report { position = pos env , expected = names env , unconsumed = input env } parse [] env@ParseEnv {results = []} = do reset env parse (next env) (emptyParseEnv $ safeTail $ input env) {pos = pos env + 1} parse [] env = do reset env return $ Parsed (concat <$> sequence (results env)) (pos env) (input env) $ parse [] env {results = [], reset = return ()} parse (st:ss) env = case st of Final f args -> parse ss env {results = args f : results env} State spos pr args scont -> case pr of Terminal f p -> case safeHead $ input env of Just t | f t -> parse ss env {next = State spos p (pureArg t args) scont : next env} _ -> parse ss env NonTerminal r p -> do rkref <- readSTRef $ ruleConts r ks <- readSTRef rkref writeSTRef rkref (Cont spos noArgs p args scont : ks) nulls <- nullable r let nullStates | null nulls = mempty | otherwise = pure $ State spos p (pureArgs nulls args) scont if null ks then do -- The rule has not been expanded at this position. st' <- State (pos env) (ruleProd r) noArgs <$> newConts rkref parse (st' : nullStates ++ ss) env {reset = resetConts r >> reset env} else -- The rule has already been expanded at this position. parse (nullStates ++ ss) env Pure a | spos /= pos env -> do let argsRef = contsArgs scont masref <- readSTRef argsRef case masref of Just asref -> do -- The continuation has already been followed at this position. modifySTRef asref (((++) <$> args a) <*>) parse ss env Nothing -> do -- It hasn't. asref <- newSTRef $ args a writeSTRef argsRef $ Just asref ks <- simplifyCont scont let kstates = map (contToState $ join $ readSTRef asref) ks parse (kstates ++ ss) env {reset = writeSTRef argsRef Nothing >> reset env} | otherwise -> parse ss env Alts as (Pure f) -> do let args' = funArg f `composeArgs` args sts = [State spos a args' scont | a <- as] parse (sts ++ ss) env Alts as p -> do scont' <- newConts =<< newSTRef [Cont spos noArgs p args scont] -- State is (-1) so that nullable alts are expanded correctly let sts = [State (-1) a noArgs scont' | a <- as] parse (sts ++ ss) env Many p q -> do c <- newSTRef =<< newSTRef mempty nr <- newSTRef Nothing let r = Rule (pure [] <|> (:) <$> p <*> NonTerminal r (Pure id)) nr c st' = State spos (NonTerminal r q) args scont parse (st' : ss) env Named pr' n -> parse (State spos pr' args scont : ss) env {names = n : names env} {-# INLINE parser #-} -- | Create a parser from the given grammar. parser :: ListLike i t => (forall r. Grammar r e (Prod r e t a)) -> i -> ST s (Result s e i a) parser g xs = do s <- initialState =<< grammar g parse [s] $ emptyParseEnv xs -- | Return all parses from the result of a given parser. The result may -- contain partial parses. The 'Int's are the position at which a result was -- produced. allParses :: (forall s. ST s (Result s e i a)) -> ([(a, Int)], Report e i) allParses p = runST $ p >>= go where go :: Result s e i a -> ST s ([(a, Int)], Report e i) go r = case r of Ended rep -> return ([], rep) Parsed mas cpos _ k -> do as <- mas fmap (first (zip as (repeat cpos) ++)) $ go =<< k {-# INLINE fullParses #-} -- | Return all parses that reached the end of the input from the result of a -- given parser. fullParses :: ListLike i t => (forall s. ST s (Result s e i a)) -> ([a], Report e i) fullParses p = runST $ p >>= go where go :: ListLike i t => Result s e i a -> ST s ([a], Report e i) go r = case r of Ended rep -> return ([], rep) Parsed mas _ i k | ListLike.null i -> do as <- mas fmap (first (as ++)) $ go =<< k | otherwise -> go =<< k {-# INLINE report #-} -- | See e.g. how far the parser is able to parse the input string before it -- fails. This can be much faster than getting the parse results for highly -- ambiguous grammars. report :: ListLike i t => (forall s. ST s (Result s e i a)) -> Report e i report p = runST $ p >>= go where go :: ListLike i t => Result s e i a -> ST s (Report e i) go r = case r of Ended rep -> return rep Parsed _ _ _ k -> go =<< k