{ {-# LANGUAGE DeriveDataTypeable #-} module Intel.Cnc.Spec.CncGrammar where import Intel.Cnc.Spec.CncLexer hiding (main) import Intel.Cnc.Spec.AST import Intel.Cnc.Spec.SrcLoc --import Data.Char import StringTable.Atom --import Data.Data import Text.PrettyPrint.HughesPJClass import Debug.Trace } -- (Based on example from Simon Marlow.) -- These are similar macros to those used by the GHC parser: -- define L0 L noSrcSpan -- define L1 sL (getLoc $1) -- Both arguments are Lexemes: -- One or both arguments can be Decorated instead: -- Here is a praticularly painful special case where we have a possibly -- empty list on the right end. We take any source info that is there -- and fall back to the second to last token otherwise. -- For now we enable BOTH the new syntax and the legacy one: -- First thing to declare is the name of your parser, -- and the type of the tokens the parser reads. %name parse_cnc %tokentype { Lexeme } -- The parser will be of type [Token] -> ?, where ? is determined by the -- production rules. Now we declare all the possible tokens: %token tuple { L _ LVarId "tuple" } var { L _ LVarId _ } qvar { L _ LQVarId _ } int { L _ LInteger _ } "->" { L _ LReservedOp "->" } "<-" { L _ LReservedOp "<-" } "::" { L _ LReservedOp "::" } -- ':' { L _ LReservedOp ":" } ':' { L _ LSpecial ":" } '(' { L _ LSpecial "(" } ')' { L _ LSpecial ")" } '[' { L _ LSpecial "[" } ']' { L _ LSpecial "]" } ';' { L _ LSpecial ";" } ',' { L _ LSpecial "," } '<' { L _ LReservedOp "<" } '>' { L _ LReservedOp ">" } '=' { L _ LVarOp "=" } '+' { L _ LVarOp "+" } '-' { L _ LVarOp "-" } -- '*' { L _ LVarOp "*" } '*' { L _ LSpecial "*" } '/' { L _ LVarOp "/" } op { L _ LVarOp _ } -- step { L _ LReservedId "step" } mod { L _ LReservedId "module" } tags { L _ LReservedId "tags" } items { L _ LReservedId "items" } steps { L _ LReservedId "steps" } reductions { L _ LReservedId "reductions" } dense { L _ LReservedId "dense" } prescribes { L _ LReservedId "prescribes" } type { L _ LReservedId "type" } -- FUTURE WORK: Reserving space in the grammar for presently -- unimplemented features [2011.04.12]: constrain { L _ LReservedId "constrain" } eof { L _ LEOF _ } -- comment { L _ LComment _ } -- The left hand side are the names of the terminals or tokens, -- and the right hand side is how to pattern match them. %nonassoc '<' '>' '<=' '>=' '==' '=' %left '+' '-' %left '*' '/' -- Like yacc, we include %% here, for no real reason. %% -- Now the production rules. ---------------------------------------------------------------------------------------------------- File :: { [PStatement SrcSpan] } File : Statements { $1 } Statements : Statement Statements { $1 ++ $2 } | Statement { $1 } | eof { [] } -- Statement : Terminated_Relation { [$1] } -- | Terminated_Decl { $1 } -- Terminated_Relation : Relation ';' { $1 } -- Terminated_Decl : Decl ';' { $1 } Statement : Relation ';' { [$1] } | Decl ';' { $1 } | type var '=' Type ';' { [TypeDef (lexPointSpan $2) (tAL $2) $4] } -- | type var Type ';' { [TypeDef (lexPointSpan $2) (tAL $2) $3] } -- These are just for better errors: | Relation eof { parseErrorSDoc (getDecor $1) $ text "Premature end of file, possible missing semi-colon." } | Decl eof { parseErrorSDoc (getDecorLs $1) $ text "Premature end of file, possible missing semi-colon." } -- reduce/reduce conflict -- | Relation Instance { parseErrorSDoc (getDecor $2) $ text "Possible missing semi-colon." } Decl :: { [PStatement SrcSpan] } Decl : steps VarLs { map (\x -> DeclareSteps (lexSpan $1) (tAL x)) $2 } -- Here we try particularly hard to get good source location info: | constrain Instance ':' TagExps { [Constraints (cLLS $1 (lexSpan $3) $4) $2 $4] } -- One additional shift/reduce conflict if we do not use a separator: | constrain Instance TagExps { [Constraints (cLLS $1 (getDecor $2) $3) $2 $3] } --| constrain TagExps { [Constraints (lexSpan $1) (InstName "foo") []] } --| constrain Instance { [Constraints (lexSpan $1) (InstName "foo") []] } --| constrain var { [Constraints (lexSpan $1) (InstName "foo") []] } {- #if 0 -} | Mods tags var { [DeclareTags (cLL $2 $3) (tAL $3) Nothing] } -- [2010.07.20] I am having a strange problem making Mods optional: -- | Mods tags '<' Type '>' var { [DeclareTags (lexSpan $2) (lexStr $6) (Just $4)] } | Mod Mods tags '<' Type '>' var { [DeclareTags (cLL $3 $7) (tAL $7) (Just $5)] } | tags '<' Type '>' var { [DeclareTags (combineSrcSpans (lexSpan $1) (lexSpan $>)) (tAL $5) (Just $3)] } -- TODO: Rearrange this so that the arbitrary numbers of template args -- are accepted and an error is thrown if the wrong number are present: | Mods items var { [DeclareItems (cLL $2 $3) (tAL $3) Nothing] } | Mods items '<' Type ',' Type '>' var { [DeclareItems (cLL $2 $8) (tAL $8) (Just ($4, $6))] } -- Reductions are similar to item collections except they are parameterized by a reduction op. -- | Mods reductions var '(' var ')' { [DeclareReductions (cLL $2 $3) (tAL $3) (tAL $5) Nothing] } -- | Mods reductions '<' Type ',' Type '>' var '(' var ')' -- { [DeclareReductions (cLL $2 $8) (tAL $8) (tAL $10) (Just ($4, $6))] } | Mods reductions var '(' var ',' Exp ')' { [DeclareReductions (cLL $2 $3) (tAL $3) (tAL $5) $7 Nothing] } | Mods reductions '<' Type ',' Type '>' var '(' var ',' Exp ')' { [DeclareReductions (cLL $2 $10) (tAL $8) (tAL $10) $12 (Just ($4, $6))] } | '<' Type var '>' { [DeclareTags (combineSrcSpans (lexSpan $1) (lexSpan $>)) (tAL $3) (Just $2)] } -- Inexplicable problem with this TagExps version, maybe because of '>' not being special... --| '<' Type var ':' TagExps '>' { [DeclareTags (lexSpan $3) (tAL $3) (Just $2)] } | '<' Type var ':' VarsOnlyHack '>' { [DeclareTags (combineSrcSpans (lexSpan $1) (lexSpan $>)) (tAL $3) (Just $2)] } | '[' Type var '<' Type '>' ':' TagExps ']' { [DeclareItems (combineSrcSpans (lexSpan $1) (lexSpan $>)) (tAL $3) (Just ($5, $2))] } | '[' Type var '<' Type '>' ']' { [DeclareItems (combineSrcSpans (lexSpan $1) (lexSpan $>)) (tAL $3) (Just ($5, $2))] } VarLs : var { [$1] } | var ',' VarLs { $1 : $3 } -- Modifier keywords can precede declarations. Mods : {- empty -} { [] } | Mod Mods { $1 : $2 } Mod : dense { "dense" } Relation :: { PStatement SrcSpan } Relation : Instances Chain { Chain $1 $2 } Chain :: { [RelLink SrcSpan] } Chain : { [] } -- | Link { [$1] } | Link Chain { $1 : $2 } Link : "->" Instances { ProduceLink (lexSpan $1) $2 } | "<-" Instances { RevProduceLink (lexSpan $1) $2 } | "::" Instances { PrescribeLink (lexSpan $1) $2 } | prescribes Instances { PrescribeLink (lexSpan $1) $2 } Instances :: { [CollectionInstance SrcSpan] } Instances : { [] } | Instance { [$1] } | Instance ',' Instances { $1 : $3 } Instance : var { InstName (lexSpan $1) (lexStr $1) } | var '[' TagExps ']' { InstItemCol (combineSrcSpans (lexSpan $1) (lexSpan $>)) (lexStr $1) $3 } -- | var '<' TagExps '>' { InstStepOrTags (combineSrcSpans (lexSpan $1) (lexSpan $>)) (lexStr $1) $3 } | var '(' TagExps ')' { InstStepOrTags (combineSrcSpans (lexSpan $1) (lexSpan $>)) (lexStr $1) $3 } | '<' Var '>' { InstTagCol (combineSrcSpans (lexSpan $1) (lexSpan $>)) (lexStr $2) [] } | '(' Var ')' { InstStepCol (combineSrcSpans (lexSpan $1) (lexSpan $>)) (lexStr $2) [] } | '[' Var ']' { InstItemCol (combineSrcSpans (lexSpan $1) (lexSpan $>)) (lexStr $2) [] } -- TEMP FIXME: Again, problem here with full tag exps: | '<' Var ':' VarsOnlyHack '>' { InstTagCol (combineSrcSpans (lexSpan $1) (lexSpan $>)) (lexStr $2) $4 } | '(' Var ':' TagExps ')' { InstStepCol (combineSrcSpans (lexSpan $1) (lexSpan $>)) (lexStr $2) $4 } | '[' Var ':' TagExps ']' { InstItemCol (combineSrcSpans (lexSpan $1) (lexSpan $>)) (lexStr $2) $4 } TagExps :: { [Exp SrcSpan] } TagExps : { [] } | Exp { [$1] } | Exp ',' TagExps { $1 : $3 } -- TEMP, HACK: VarsOnlyHack : { [] } | var { [Var (lexSpan $1) (tAL $1)] } | var ',' VarsOnlyHack { Var (lexSpan $1) (tAL $1) : $3 } -- This is just for catching errors: Var : var { $1 } -- This error checking must be introduced CAREFULLY or it can yield errors. Not every position can be checked. -- Sadly, this error checking should go EVERYWHERE: | tags { parseErrorSDoc (lexSpan $1) $ text "Keyword 'tags' used incorrectly." } | items { parseErrorSDoc (lexSpan $1) $ text "Keyword 'items' used incorrectly." } | reductions { parseErrorSDoc (lexSpan $1) $ text "Keyword 'reductions' used incorrectly." } | steps { parseErrorSDoc (lexSpan $1) $ text "Keyword 'steps' used incorrectly." } | dense { parseErrorSDoc (lexSpan $1) $ text "Keyword 'dense' used incorrectly." } | prescribes { parseErrorSDoc (lexSpan $1) $ text "Keyword 'prescribes' used incorrectly." } | constrain { parseErrorSDoc (lexSpan $1) $ text "Keyword 'constrain' used incorrectly." } | mod { parseErrorSDoc (lexSpan $1) $ text "Keyword 'module' used incorrectly." } -- | step { parseErrorSDoc (lexSpan $1) $ text "Keyword 'step' used incorrectly." } Exp :: { Exp SrcSpan } -- The haskell type of the result of parsing this syntax class. Exp : var { Var (lexSpan $1) (tAL $1) } | qvar { Var (lexSpan $1) (tAL $1) } | int { Lit (lexSpan $1) (LitInt $ read (lexStr $1)) } | '(' Exp ')' { $2 } -- Function application: | var '(' TagExps ')' { App (combineSrcSpans (lexSpan $1) (getDecor (last $3))) (Var (lexSpan $1) (tAL $1)) $3 } -- Tuples? *** TODO ***. -- Including explicit productions for arithmetic just to handle precedence/associativity: | Exp '+' Exp { App (combExpSpans $1 $3) (Var (combineSrcSpans (getDecor $1) (getDecor $>)) (toAtom "+")) [$1, $3] } | Exp '-' Exp { App (combExpSpans $1 $3) (Var (combineSrcSpans (getDecor $1) (getDecor $>)) (toAtom "-")) [$1, $3] } | Exp '*' Exp { App (combExpSpans $1 $3) (Var (combineSrcSpans (getDecor $1) (getDecor $>)) (toAtom "*")) [$1, $3] } | Exp '/' Exp { App (combExpSpans $1 $3) (Var (combineSrcSpans (getDecor $1) (getDecor $>)) (toAtom "/")) [$1, $3] } -- These need to be handled because they are lexed differently, being reserved characters: | Exp '<' Exp { App (combExpSpans $1 $3) (Var (combineSrcSpans (getDecor $1) (getDecor $>)) (toAtom "<")) [$1, $3] } | Exp '>' Exp { App (combExpSpans $1 $3) (Var (combineSrcSpans (getDecor $1) (getDecor $>)) (toAtom ">")) [$1, $3] } | Exp op Exp { App (combExpSpans $1 $3) (Var (combineSrcSpans (getDecor $1) (getDecor $>)) (tAL $2)) [$1, $3] } Type : var { TSym (toAtom $ lexStr $1) } | dense Type { TDense $2 } | Type '*' { TPtr $1 } | '(' Types ')' { TTuple $2 } | tuple '<' Types '>' { TTuple $3 } Types : { [] } | Type { [$1] } | Type ',' Types { $1 : $3 } -- We are simply returning the parsed data structure! Now we need -- some extra code, to support this parser, and make in complete: ---------------------------------------------------------------------------------------------------- { -- Combine two lexical tokens for a source span: cLL a b = (lexSpan a) `combineSrcSpans` (lexSpan b) -- more obscure, combine two tokens and the first (if it exists) of a list: cLLS a b c = combineSrcSpans (lexSpan a) $ combineSrcSpans b (getDecorLs c) getDecorLs [] = srcLocSpan noSrcLoc getDecorLs (h:t) = getDecor h -- All parsers must declair this function, which is called when an error -- is detected. Note that currently we do no error recovery. happyError :: [Lexeme] -> a happyError [] = error "Parse error. Strange - it's not before any token that I know of..." happyError ls = let loc = lexLoc $ head ls in error$ "Parse error before token at location : \n " ++ show (pPrint loc) ++ (if srcColumn loc <= 1 then "\n(An error at the beginning of the line like this could be a missing semi-colon on the previous line.)\n" else "") parseErrorSDoc span doc = error $ show $ text "\n\nPARSE ERROR!\n " <> doc $$ text "At location: " <> pPrint span -- Now we declare the datastructure that we are parsing. runCncParser :: String -> String -> [PStatement SrcSpan] runCncParser file str = let notcomment = filter (not . is_comment) $ scan_to_list str in -- FIXME: -- Here's a hack that's a bit ineffecient. We POST-FACTO put the right filename in the -- sourceloc decorations. It would be better to do it right the first time. -- -- NOTE [2010.07.23] This has another problem. Parse errors will say "unknown file". -- I guess I need to thread through a reader monad. map (mapDecor (srcSpanSetFileName file)) $ -- For now filter out comments before parsing: if null notcomment then error "ERROR: Specification file contains no CnC statements!" else parse_cnc $ notcomment is_comment ( L _ LComment _ ) = True is_comment _ = False lexStr :: Lexeme -> String lexStr (L _ _ str) = str tAL = toAtom . lexStr lexLoc :: Lexeme -> SrcLoc lexLoc (L (AlexPn n l c) _ _) = (SrcLoc "" l c) lexSpan :: Lexeme -> SrcSpan -- [2010.07.23] We can do a little better by looking at the length of -- the string and stretching the src location to include all of it. -- DANGER, we assume that Lexemes stay on one line!! (not true of multiline comments) lexSpan (L (AlexPn n l c) _ str) = let start = mkSrcLoc "" l c end = mkSrcLoc "" l (c + length str) in srcLocSpan start `combineSrcSpans` srcLocSpan end -- Not a span, just a single point: --lexPointSpan (L (AlexPn n l c) _ _) = srcLocSpan (SrcLoc "unknownfile" l c) lexPointSpan = srcLocSpan . lexLoc -- Combine the spans in two expressions. combExpSpans e1 e2 = combineSrcSpans (getDecor e1) (getDecor e2) quit = print "runCnc failed\n" }