{- Copyright 2010 Dominique Devriese This file is part of the grammar-combinators library. The grammar-combinators library is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Foobar is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with Foobar. If not, see . -} {-# LANGUAGE EmptyDataDecls #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE NoMonomorphismRestriction #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeFamilies #-} module Text.GrammarCombinators.Parser.Packrat ( Result (Parsed, NoParse), Derivs, parsePackrat ) where import Text.GrammarCombinators.Base data Result phi r t v = Parsed v (Derivs phi r t) | NoParse instance (Show v) => Show (Result phi r t v) where show (Parsed v _) = "Parsed " ++ show v ++ " _" show NoParse = "NoParse" instance Functor (Result phi r t) where fmap f (Parsed v d) = Parsed (f v) d fmap _ NoParse = NoParse data PRPrimTokenIx data PREndOfInputIx data PRBaseIx ix data PackratDomain phi ix where PackratDomainBase :: phi ix -> PackratDomain phi (PRBaseIx ix) PackratDomainPrimToken :: PackratDomain phi PRPrimTokenIx PackratDomainEndOfInput :: PackratDomain phi PREndOfInputIx instance DomainMap (PackratDomain phi) phi PRBaseIx where supIx = PackratDomainBase subIx (PackratDomainBase idx) = idx instance (MemoFam phi) => MemoFam (PackratDomain phi) where data Memo (PackratDomain phi) v = PRMemo (v PRPrimTokenIx, v PREndOfInputIx, Memo phi (SubVal PRBaseIx v)) toMemo f = PRMemo (f PackratDomainPrimToken, f PackratDomainEndOfInput, toMemo $ \idx -> MkSubVal $ f $ PackratDomainBase idx) fromMemo (PRMemo (_, _, bm)) (PackratDomainBase idx) = unSubVal $ fromMemo bm idx fromMemo (PRMemo (v, _, _)) (PackratDomainPrimToken) = v fromMemo (PRMemo (_, v, _)) (PackratDomainEndOfInput) = v data PackratValue (phi :: * -> *) t r ix where PRPrimTokenValue :: ConcreteToken t -> PackratValue phi t r PRPrimTokenIx PREndOfInputValue :: PackratValue phi t r PREndOfInputIx PRBaseValue :: r ix -> PackratValue phi t r (PRBaseIx ix) unPRPrimTokenValue :: PackratValue phi t r PRPrimTokenIx -> ConcreteToken t unPRPrimTokenValue (PRPrimTokenValue c) = c newtype PRResult phi r t ix = PRResult { unPRResult :: Result phi r t (PackratValue phi t r ix) } newtype Derivs phi r t = Derivs { unDerivs :: forall ix. PackratDomain phi ix -> PRResult phi r t ix } buildDerivs :: forall phi r t. (Token t, MemoFam phi) => (forall ix. PackratDomain phi ix -> PRResult phi r t ix) -> Derivs phi r t buildDerivs f = Derivs memoizedF where memoizedF :: forall ix. PackratDomain phi ix -> PRResult phi r t ix memoizedF = memoFamily f type InternalPRRule phi r t v = Derivs phi r t -> Result phi r t v type InternalGrammar phi r t = forall ix. phi ix -> InternalPRRule phi r t (r ix) data PackratRule phi r t v = PackratRule { runParse :: InternalGrammar phi r t -> InternalPRRule phi r t v } type PackratGrammar phi rr r t = forall ix. phi ix -> PackratRule phi rr t (r ix) instance ProductionRule (PackratRule phi r t) where a >>> b = PackratRule $ \g d0 -> case runParse a g d0 of Parsed f d1 -> case runParse b g d1 of Parsed x d2 -> Parsed (f x) d2 _ -> NoParse _ -> NoParse -- TODO : parameterise disambiguation (keeping only 1st match or all)? a ||| b = PackratRule $ \g d -> case runParse a g d of Parsed v1 d1 -> Parsed v1 d1 _ -> case runParse b g d of Parsed v2 d2 -> Parsed v2 d2 _ -> NoParse die = PackratRule $ \_ _ -> NoParse endOfInput = PackratRule $ \_ d -> case unPRResult $ unDerivs d PackratDomainEndOfInput of Parsed _ d' -> Parsed () d' _ -> NoParse instance EpsProductionRule (PackratRule phi r t) where epsilon v = PackratRule $ \_ -> Parsed v instance LiftableProductionRule (PackratRule phi r t) where epsilonL v _ = epsilon v instance (Token t) => TokenProductionRule (PackratRule phi r t) t where token c = PackratRule $ \_ d -> case unPRResult$ unDerivs d PackratDomainPrimToken of Parsed v' d' | classify (unPRPrimTokenValue v') == c -> Parsed (unPRPrimTokenValue v') d' _ -> NoParse anyToken = PackratRule $ \_ d -> case unPRResult$ unDerivs d PackratDomainPrimToken of Parsed v' d' -> Parsed (unPRPrimTokenValue v') d' _ -> NoParse instance RecProductionRule (PackratRule phi r t) phi r where ref (idx :: phi ix) = PackratRule $ \grammar d -> grammar idx d toInternalGrammar :: PackratGrammar phi r r t -> InternalGrammar phi r t toInternalGrammar g idx = runParse (g idx) (toInternalGrammar g) parsePackratAll :: forall phi r t. (Token t, MemoFam phi) => InternalGrammar phi r t -> [ConcreteToken t] -> Derivs phi r t parsePackratAll grammar s = let derivs :: forall ix. PackratDomain phi ix -> PRResult phi r t ix derivs (PackratDomainPrimToken) = case s of (c:s') -> PRResult $ Parsed (PRPrimTokenValue c) (parsePackratAll grammar s') _ -> PRResult NoParse derivs (PackratDomainBase ruleId) = PRResult $ fmap PRBaseValue $ grammar ruleId (buildDerivs derivs) derivs (PackratDomainEndOfInput) = case s of [] -> PRResult $ Parsed PREndOfInputValue $ Derivs $ const $ PRResult NoParse _ -> PRResult NoParse in buildDerivs derivs instance LoopProductionRule (PackratRule phi r t) phi r where manyRef = manyInf . ref -- | Parse a given string according to a given grammar, starting from a given start non-terminal, -- with a backtracking Packrat parser algorithm (like backtracking recursive descent, but with -- linear performance in the length of the input). parsePackrat :: forall phi r ix t. (Token t, MemoFam phi) => ProcessingContextFreeGrammar phi t r -> phi ix -> [ConcreteToken t] -> Result phi r t (r ix) parsePackrat usergram ruleId s = let grammar :: forall ix'. phi ix' -> Derivs phi r t -> Result phi r t (r ix') grammar = toInternalGrammar usergram result :: Result phi r t (PackratValue phi t r (PRBaseIx ix)) result = unPRResult $ unDerivs (parsePackratAll grammar s) (PackratDomainBase ruleId) in case result of Parsed (PRBaseValue v) d -> Parsed v d _ -> NoParse