module FormalLanguage.CFG.Parser
( module FormalLanguage.CFG.Parser
, Result (..)
) where
import Control.Applicative
import Control.Arrow
import Control.Lens hiding (Index, outside, indices, index)
import Control.Monad
import Control.Monad.State.Class (MonadState (..))
import Control.Monad.Trans.Class
import Control.Monad.Trans.State.Strict hiding (get)
import Data.ByteString.Char8 (pack)
import Data.Data.Lens
import Data.Default
import Data.List (nub,genericIndex,mapAccumL)
import Data.Map.Strict (Map)
import Data.Maybe
import Data.Monoid
import Data.Sequence (Seq)
import Debug.Trace
import qualified Data.HashSet as H
import qualified Data.Map.Strict as M
import qualified Data.Sequence as Seq
import qualified Data.Set as S
import qualified Text.PrettyPrint.ANSI.Leijen as AL
import System.IO.Unsafe (unsafePerformIO)
import Text.Parser.Token.Style
import Text.Printf
import Text.Trifecta
import Text.Trifecta.Delta (Delta (Directed))
import FormalLanguage.CFG.Grammar
import FormalLanguage.CFG.Outside
import FormalLanguage.CFG.PrettyPrint.ANSI
data GrammarEnv = GrammarEnv
{ _current :: Grammar
, _env :: Map String Grammar
, _emit :: Seq Grammar
, _verbose :: Bool
}
deriving (Show)
makeLenses ''GrammarEnv
instance Default GrammarEnv where
def = GrammarEnv { _current = def
, _env = def
, _emit = def
, _verbose = False
}
test ∷ IO ()
test = do
p ← parseFromFile (evalStateT (parseEverything empty) def{_verbose = True}) "./deps/FormalGrammars/tests/pseudo.gra"
print p
parse = parseString (evalStateT (parseEverything empty) def) (Directed (pack "via QQ") (fromIntegral 0) 0 0 0)
parseEverything :: Parse m () -> Parse m (Seq Grammar)
parseEverything ps = whiteSpace *> some (assign current def >> p) <* eof >> use emit
where p = parseCommands <|> parseGrammar <|> parseOutside <|> parseNormStartEps <|> parseEmitGrammar <|> ps
parseGrammar :: Parse m ()
parseGrammar = do
reserve fgIdents "Grammar:"
n <- newGrammarName
current.grammarName .= n
current.params <~ (M.fromList . fmap (_indexName &&& id)) <$> (option [] $ parseIndex EvalGrammar) <?> "global parameters"
current.synvars <~ (M.fromList . fmap (_name &&& id)) <$> some (parseSyntacticDecl EvalSymb)
current.synterms <~ (M.fromList . fmap (_name &&& id)) <$> many (parseSynTermDecl EvalSymb)
current.termvars <~ (M.fromList . fmap (_name &&& id)) <$> many parseTermDecl
current.indices <~ (M.fromList . fmap (_indexName &&& id)) <$> setIndices
current.start <~ parseStartSym
current.rules <~ (S.fromList . concat) <$> some parseRule
reserve fgIdents "//"
g <- use current
v <- use verbose
seq (unsafePerformIO $ if v then (printDoc . genGrammarDoc $ g) else return ())
$ env %= M.insert n g
setIndices :: Parse m [Index]
setIndices = do
sv <- use (current . synvars . folded . index)
st <- use (current . synterms . folded . index)
tv <- use (current . termvars . folded . index)
return $ nub $ sv ++ st ++ tv
parseEmitGrammar :: Parse m ()
parseEmitGrammar = do
reserve fgIdents "Emit:"
g <- knownGrammarName
v <- use verbose
seq (unsafePerformIO $ if v then (printDoc . genGrammarDoc $ g) else return ())
$ emit %= ( Seq.|> g)
parseNormStartEps :: Parse m ()
parseNormStartEps = do
reserve fgIdents "NormStartEps:"
n <- newGrammarName
current.grammarName .= n
reserve fgIdents "Source:"
g <- (set grammarName n) <$> knownGrammarName <?> "known source grammar"
reserve fgIdents "//"
let h = normalizeStartEpsilon g
v <- use verbose
seq (unsafePerformIO $ if v then (printDoc . genGrammarDoc $ h) else return ())
$ env %= M.insert n h
parseOutside :: Parse m ()
parseOutside = do
reserve fgIdents "Outside:"
n <- newGrammarName
reserve fgIdents "Source:"
g <- knownGrammarName <?> "known source grammar"
guard (not . isOutside $ g^.outside) <?> "source already is an outside grammar"
reserve fgIdents "//"
let h = set grammarName n $ toOutside g
current .= h
v <- use verbose
seq (unsafePerformIO $ if v then (printDoc . genGrammarDoc $ h) else return ())
$ env %= M.insert n h
parseCommands :: Parse m ()
parseCommands = help <|> vrbose
where help = reserve fgIdents "Help"
vrbose = reserve fgIdents "Verbose" >> verbose .= True
fgIdents = set styleReserved rs emptyIdents
where rs = H.fromList [ "Grammar:", "Outside:", "Source:", "NormStartEps:", "Emit:", "Help", "Verbose"
, "N:", "Y:", "T:", "S:", "->", "=", "<<<", "-", "e", "ε", "l", "λ"
]
newGrammarName :: Parse m String
newGrammarName = flip (<?>) "grammar name previously declared!" $ do
n <- ident fgIdents
e <- get
let g = M.lookup n $ e^.env
when (isJust g) $ unexpected "previously declared grammar name"
return n
knownGrammarName :: Parse m Grammar
knownGrammarName = try $ do
n <- ident fgIdents
e <- get
let g = M.lookup n $ e^.env
when (isNothing g) $ unexpected "known source grammar"
return $ fromJust g
parseSyntacticDecl :: EvalReq -> Parse m SynTermEps
parseSyntacticDecl e = do
reserve fgIdents "N:"
try split <|> normal
where split = angles (flip (set splitN) <$> normal <* string "," <*> integer)
normal = SynVar <$> (ident fgIdents <?> "syntactic variable name") <*> (option [] $ parseIndex e) <*> pure 1 <*> pure 0
parseSynTermDecl :: EvalReq -> Parse m SynTermEps
parseSynTermDecl e = do
reserve fgIdents "Y:"
SynTerm <$> (ident fgIdents <?> "syntactic variable name") <*> (option [] $ parseIndex e)
parseTermDecl :: Parse m SynTermEps
parseTermDecl =
(reserve fgIdents "T:" >> Term <$> (ident fgIdents <?> "terminal name") <*> pure [])
parseStartSym :: Parse m Symbol
parseStartSym
= (runUnlined $ reserve fgIdents "S:" *> knownSynVar EvalRule)
<* someSpace
data EvalReq
= EvalFull
| EvalRule
| EvalSymb
| EvalGrammar
knownSynVar :: EvalReq -> Stately m Symbol
knownSynVar e = Symbol <$> do
((:[]) <$> sv) <|> (brackets $ commaSep sv) <|> (angles $ commaSep sv)
where sv = flip (<?>) "known syntactic variable" . try $ do
s <- ident fgIdents
l <- use (current . synvars . at s)
case l of
Nothing -> fail "bla"
Just (SynVar s' i' n' _) ->
do i <- option [] $ parseIndex e
return $ SynVar s i n' 0
knownSynTerm :: EvalReq -> Stately m Symbol
knownSynTerm e = Symbol <$> do
((:[]) <$> sv) <|> (brackets $ commaSep sv)
where sv = flip (<?>) "known syntactic terminal" . try $ do
s <- ident fgIdents
use (current . synterms . at s) >>= guard . isJust
i <- option [] $ parseIndex e
return $ SynVar s i 0 0
parseIndex :: EvalReq -> Stately m [Index]
parseIndex e = concat <$> (braces . commaSep $ ix e) where
ix EvalGrammar = (\s -> [Index s 0 undefined [] 1]) <$> ident fgIdents
ix EvalSymb = do s <- ident fgIdents
reserve fgIdents "="
n <- natural
return [Index s 0 ISymbol [0..n-1] 1]
ix EvalRule = do s <- ident fgIdents
let req = (\k -> [Index s k IEq [] 1]) <$ reserve fgIdents "=" <*> natural
let rminus = (\k -> [Index s k IMinus [] 1]) <$ reserve fgIdents "-" <*> natural
let rplus = (\k -> [Index s k IPlus [] 1]) <$> (option 0 $ reserve fgIdents "+" *> natural)
try req <|> try rminus <|> rplus
knownTermVar :: EvalReq -> Stately m Symbol
knownTermVar e = Symbol <$> do
((:[]) <$> (eps <|> tv)) <|> (brackets $ commaSep (del <|> eps <|> loc <|> tv))
where tv = flip (<?>) "known terminal variable" . try $ do
i <- ident fgIdents
t <- use (current . termvars . at i)
s <- use (current . synvars . at i)
guard . isJust $ t <|> s
return $ if isJust t then Term i [] else SynVar i [] 1 0
del = Deletion <$ reserve fgIdents "-"
eps = Epsilon Global <$ (reserve fgIdents "e" <|> reserve fgIdents "ε")
loc = Epsilon Local <$ (reserve fgIdents "l" <|> reserve fgIdents "λ")
knownSymbol :: EvalReq -> Stately m Symbol
knownSymbol e = try (knownSynVar e) <|> try (knownSynTerm e) <|> knownTermVar e
parseRule :: Parse m [Rule]
parseRule = (expandIndexed =<< runUnlined rule) <* someSpace
where rule = Rule
<$> knownSynVar EvalRule
<* reserve fgIdents "->"
<*> afun
<* string "<<<" <* spaces
<*> (updateSplitCounts <$> some syms)
afun = (:[]) <$> ident fgIdents
syms = knownSymbol EvalRule
updateSplitCounts :: [Symbol] -> [Symbol]
updateSplitCounts = snd . mapAccumL go M.empty where
go m (Symbol [SynVar s i n k])
| n > 1 = let o = M.findWithDefault 0 (s,i) m + 1
in (M.insert (s,i) o m, Symbol [SynVar s i n o])
go m s = (m,s)
expandIndexed :: Rule -> Parse m [Rule]
expandIndexed r = do
let is :: [IndexName] = nub $ r ^.. biplate . indexName
js :: [Index] <- catMaybes <$> mapM (\i -> use (current . indices . at i)) is
if null js
then return [r]
else mapM go $ sequence $ map expand js
where
go :: [Index] -> Parse m Rule
go ixs = foldM (\b a -> return $ b & biplate.index.traverse %~ changeIndex a) r ixs
expand :: Index -> [Index]
expand i = [ i & indexHere .~ j | j <- i^.indexRange ]
changeIndex :: Index -> Index -> Index
changeIndex i o
| iin /= oin = o
| o^.indexOp == IEq = o
| null otr = error $ printf "index %s uses var %d that is not in range %s!\n" (oin^.getIndexName) oih (show rng)
| o^.indexOp == IPlus = o & indexHere .~ ((otr ++ cycle rng) `genericIndex` oih)
| o^.indexOp == IMinus = o & indexHere .~ ((tro ++ cycle (reverse rng)) `genericIndex` oih)
where rng = i^.indexRange
otr = dropWhile (/= i^.indexHere) rng
tro = dropWhile (/= i^.indexHere) $ reverse rng
iin = i^.indexName
iih = i^.indexHere
oin = o^.indexName
oih = o^.indexHere
type Parse m a = (TokenParsing m, MonadState GrammarEnv m, MonadPlus m) => m a
type Parse' a = StateT GrammarEnv Parser a
type Stately m a = (TokenParsing m, MonadState GrammarEnv m, MonadPlus m) => m a