{- BNF Converter: Abstract syntax Copyright (C) 2004 Author: Markus Forsberg, Aarne Ranta This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} module GetCF(tryReadCFP) where import Control.Monad ( when ) import CF import Utils import ParBNF import Data.List(nub,partition) import qualified AbsBNF as Abs import Data.Maybe (catMaybes) import Data.Either (partitionEithers) import ErrM import Data.Char import TypeChecker import Options readCF :: SharedOptions -> FilePath -> IO CFP readCF opts f = tryReadCFP opts f >>= return . fst tryReadCFP :: SharedOptions -> FilePath -> IO (CFP,Bool) tryReadCFP opts file = do putStrLn $ "\nReading grammar from " ++ file s <- readFile file let (cfp,msgs1) = getCFP (cnf opts) s cf = cfp2cf cfp msgs2 = case checkDefinitions cf of Bad err -> [err] Ok () -> [] msgs3 = checkTokens cf msg = msgs1++msgs2 -- ++ msgs3 -- in a future version ret = cfp let reserved = if anyTarget opts [TargetJava,TargetJava15] then [takeWhile (/='.') file] else [] case filter (not . isDefinedRule) $ notUniqueNames reserved cf of ns@(_:_) | not (anyTarget opts [TargetHaskell,TargetHaskellGADT,TargetOCAML]) -> do putStrLn $ "ERROR: names not unique: " ++ unwords ns return (ret,False) ns -> do case ns of _:_ -> do putStrLn $ "Warning: names not unique: " ++ unwords ns putStrLn "This can be an error in other back ends." _ -> return () putStrLn $ unlines msgs3 if not (null msg) then do putStrLn $ unlines msg return (ret,False) else do putStrLn $ show (length (rulesOfCF cf)) +++ "rules accepted\n" let c3s = [(b,e) | (b,e) <- fst (comments cf), length b > 2 || length e > 2] if null c3s then return () else do putStrLn "Warning: comment delimiters longer than 2 characters ignored in Haskell:" mapM_ putStrLn [b +++ "-" +++ e | (b,e) <- c3s] return (ret,True) {- case filter (not . isDefinedRule) $ notUniqueFuns cf of [] -> case (badInheritence cf) of [] -> return (ret,True) xs -> do putStrLn "Warning :" putStrLn $ " Bad Label name in Category(s) :" ++ unwords xs putStrLn $ " These categories have more than one Label, yet one of these" putStrLn $ " Labels has the same name as the Category. This will almost" putStrLn $ " certainly cause problems in languages other than Haskell.\n" return (ret,True) xs -> do putStrLn $ "Warning :" putStrLn $ " Non-unique label name(s) : " ++ unwords xs putStrLn $ " There may be problems with the pretty-printer.\n" case (badInheritence cf) of [] -> return (ret,True) xs -> do putStrLn $ "Warning :" putStrLn $ " Bad Label name in Category(s) :" ++ unwords xs putStrLn $ " These categories have more than one Label, yet one of these" putStrLn $ " Labels has the same name as the Category. This will almost" putStrLn $ " certainly cause problems in languages other than Haskell.\n" return (ret,True) -} nilCFP :: CFP nilCFP = CFG (([],([],[],[],[])),[]) getCFP :: Bool -> String -> (CFP,[String]) getCFP cnf s = case pGrammar . myLexer $ s of Bad s -> (nilCFP,[s]) Ok (Abs.Grammar defs) -> (cf0,msgs) where (pragma,rules) = partitionEithers $ concatMap (transDef cnf) defs msgs = catMaybes $ map (checkRule (cfp2cf cf0)) (rulesOfCF cf0) cf0 = revs srt srt = let literals = nub [lit | xs <- map rhsRule rules, Left lit <- xs, elem lit specialCatsP] (symbols,keywords) = partition notIdent reservedWords notIdent s = null s || not (isAlpha (head s)) || any (not . isIdentRest) s isIdentRest c = isAlphaNum c || c == '_' || c == '\'' reservedWords = nub [t | r <- rules, Right t <- rhsRule r] in CFG((pragma,(literals,symbols,keywords,[])),rules) revs cf1@(CFG((pragma,(literals,symbols,keywords,_)),rules)) = CFG((pragma,(literals,symbols,keywords,findAllReversibleCats (cfp2cf cf1))),rules) transDef :: Bool -> Abs.Def -> [Either Pragma RuleP] transDef cnf x = case x of Abs.Rule label cat items -> [Right $ Rule (transLabel label) (transCat cat) (map transItem items)] Abs.Comment str -> [Left $ CommentS str] Abs.Comments str0 str -> [Left $ CommentM (str0,str)] Abs.Token ident reg -> [Left $ TokenReg (transIdent ident) False reg] Abs.PosToken ident reg -> [Left $ TokenReg (transIdent ident) True reg] Abs.Entryp idents -> [Left $ EntryPoints (map transIdent idents)] Abs.Internal label cat items -> [Right $ Rule (transLabel label) (transCat cat) (Left internalCat:(map transItem items))] Abs.Separator size ident str -> map (Right . cf2cfpRule) $ separatorRules size ident str Abs.Terminator size ident str -> map (Right . cf2cfpRule) $ terminatorRules size ident str Abs.Delimiters a b c d e -> map (Right . cf2cfpRule) $ delimiterRules cnf a b c d e Abs.Coercions ident int -> map (Right . cf2cfpRule) $ coercionRules ident int Abs.Rules ident strs -> map (Right . cf2cfpRule) $ ebnfRules ident strs Abs.Layout ss -> [Left $ Layout ss] Abs.LayoutStop ss -> [Left $ LayoutStop ss] Abs.LayoutTop -> [Left $ LayoutTop] Abs.Function f xs e -> [Left $ FunDef (transIdent f) (map transArg xs) (transExp e)] delimiterRules :: Bool -> Abs.Cat -> String -> String -> Abs.Separation -> Abs.MinimumSize -> [Rule] delimiterRules False a0 l r sep size = [ Rule "_" as [Right l, Left (listCat x), Right r] ] ++ separationRules (Abs.IdCat $ Abs.Ident $ x) sep size where a = transCat a0 as = listCat a x = a ++ "_without_delimiters" delimiterRules True a0 l r (Abs.SepTerm "") size = delimiterRules True a0 l r Abs.SepNone size delimiterRules True a0 l r (Abs.SepSepar "") size = delimiterRules True a0 l r Abs.SepNone size delimiterRules True a0 l r sep size = [ -- recognizing a single element Rule "(:[])" a' (Left a : termin), -- optional terminator/separator -- glueing two sublists Rule "(++)" a' [Left a', Left a'], -- starting on either side with a delimiter Rule "[]" c [Right l], Rule (if optFinal then "(:[])" else "[]") d ([Left a | optFinal] ++ [Right r]), -- gathering chains Rule "(++)" c [Left c, Left a'], Rule "(++)" d [Left a', Left d], -- finally, put together left and right chains Rule "(++)" as [Left c,Left d]] ++ [ -- special rule for the empty list if necessary Rule "[]" as [Right l,Right r] | optEmpty] where a = transCat a0 as = listCat a a' = '@':'@':a c = '@':'{':a d = '@':'}':a -- optionally separated concat. of x and y categories. x // y = (Left x : [Right t | Abs.SepSepar t <- [sep]] ++ [Left y ]) termin = case sep of Abs.SepSepar t -> [Right t] Abs.SepTerm t -> [Right t] _ -> [] optFinal = case (sep,size) of (Abs.SepSepar t,_) -> True (Abs.SepTerm _,Abs.MNonempty) -> True (Abs.SepNone,Abs.MNonempty) -> True _ -> False optEmpty = case sep of Abs.SepSepar _ -> size == Abs.MEmpty _ -> False separationRules :: Abs.Cat -> Abs.Separation -> Abs.MinimumSize -> [Rule] separationRules c Abs.SepNone size = terminatorRules size c "" separationRules c (Abs.SepTerm t) size = terminatorRules size c t separationRules c (Abs.SepSepar t) size = separatorRules size c t separatorRules :: Abs.MinimumSize -> Abs.Cat -> String -> [Rule] separatorRules size c s = if null s then terminatorRules size c s else ifEmpty [ Rule "(:[])" cs [Left c'], Rule "(:)" cs [Left c', Right s, Left cs] ] where c' = transCat c cs = listCat c' ifEmpty rs = if (size == Abs.MNonempty) then rs else Rule "[]" cs [] : rs terminatorRules :: Abs.MinimumSize -> Abs.Cat -> String -> [Rule] terminatorRules size c s = [ ifEmpty, Rule "(:)" cs (Left c' : s' [Left cs]) ] where c' = transCat c cs = listCat c' s' its = if null s then its else (Right s : its) ifEmpty = if (size == Abs.MNonempty) then Rule "(:[])" cs ([Left c'] ++ if null s then [] else [Right s]) else Rule "[]" cs [] coercionRules :: Abs.Ident -> Integer -> [Rule] coercionRules (Abs.Ident c) n = Rule "_" c [Left (c ++ "1")] : [(Rule "_" (c ++ show (i-1)) [Left (c ++ show i)]) | i <- [2..n]] ++ [(Rule "_" (c ++ show n) [Right "(", Left c, Right ")"])] ebnfRules :: Abs.Ident -> [Abs.RHS] -> [Rule] ebnfRules (Abs.Ident c) rhss = [Rule (mkFun k c its) c (map transItem its) | (k, Abs.RHS its) <- zip [1 :: Int ..] rhss] where mkFun k c i = case i of [Abs.Terminal s] -> c' ++ "_" ++ mkName k s [Abs.NTerminal n] -> c' ++ identCat (transCat n) _ -> c' ++ "_" ++ show k c' = c --- normCat c mkName k s = if all (\c -> isAlphaNum c || elem c "_'") s then s else show k transItem :: Abs.Item -> Either Cat String transItem x = case x of Abs.Terminal str -> Right str Abs.NTerminal cat -> Left (transCat cat) transCat :: Abs.Cat -> Cat transCat x = case x of Abs.ListCat cat -> "[" ++ (transCat cat) ++ "]" Abs.IdCat id -> transIdent id transLabel :: Abs.Label -> (Fun,Prof) transLabel y = case y of Abs.LabNoP f -> let g = transLabelId f in (g,(g,[])) ---- should be Nothing Abs.LabP f p -> let g = transLabelId f in (g,(g, map transProf p)) Abs.LabPF f g p -> (transLabelId f,(transLabelId g, map transProf p)) Abs.LabF f g -> (transLabelId f,(transLabelId g, [])) where transLabelId x = case x of Abs.Id id -> transIdent id Abs.Wild -> "_" Abs.ListE -> "[]" Abs.ListCons -> "(:)" Abs.ListOne -> "(:[])" transProf (Abs.ProfIt bss as) = ([map fromInteger bs | Abs.Ints bs <- bss], map fromInteger as) transIdent :: Abs.Ident -> String transIdent x = case x of Abs.Ident str -> str transArg :: Abs.Arg -> String transArg (Abs.Arg x) = transIdent x transExp :: Abs.Exp -> Exp transExp e = case e of Abs.App x es -> App (transIdent x) (map transExp es) Abs.Var x -> App (transIdent x) [] Abs.Cons e1 e2 -> cons e1 (transExp e2) Abs.List es -> foldr cons nil es Abs.LitInt x -> LitInt x Abs.LitDouble x -> LitDouble x Abs.LitChar x -> LitChar x Abs.LitString x -> LitString x where cons e1 e2 = App "(:)" [transExp e1, e2] nil = App "[]" [] -------------------------------------------------------------------------------- --checkTokens :: CFG f -> [String] checkTokens cf = if null ns then [] else ["Warning : ", -- change to error in a future version " The following tokens accept the empty string: ", " "++unwords ns, " This is error-prone and will not be supported in the future."] where ns = map fst . filter (nullable.snd) $ tokenPragmas cf -- | Check if a regular expression is nullable (accepts the empty string) nullable :: Abs.Reg -> Bool nullable r = case r of Abs.RSeq r1 r2 -> nullable r1 && nullable r2 Abs.RAlt r1 r2 -> nullable r1 || nullable r2 Abs.RMinus r1 r2 -> nullable r1 && not (nullable r2) Abs.RStar _ -> True Abs.RPlus r1 -> nullable r1 Abs.ROpt _ -> True Abs.REps -> True Abs.RChar _ -> False Abs.RAlts _ -> False Abs.RSeqs s -> null s Abs.RDigit -> False Abs.RLetter -> False Abs.RUpper -> False Abs.RLower -> False Abs.RAny -> False -- we should actually check that -- (1) coercions are always between variants -- (2) no other digits are used checkRule :: CF -> RuleP -> Maybe String checkRule cf (Rule _ ('@':_) rhs) = Nothing -- Generated by a pragma; it's a trusted category checkRule cf (Rule (f,_) cat rhs) | badCoercion = Just $ "Bad coercion in rule" +++ s | badNil = Just $ "Bad empty list rule" +++ s | badOne = Just $ "Bad one-element list rule" +++ s | badCons = Just $ "Bad list construction rule" +++ s | badList = Just $ "Bad list formation rule" +++ s | badSpecial = Just $ "Bad special category rule" +++ s | badTypeName = Just $ "Bad type name" +++ unwords badtypes +++ "in" +++ s | badFunName = Just $ "Bad constructor name" +++ f +++ "in" +++ s | badMissing = Just $ "No production for" +++ unwords missing ++ ", appearing in rule" +++ s | otherwise = Nothing where s = f ++ "." +++ cat +++ "::=" +++ unwords (map (either id show) rhs) -- Todo: consider using the show instance of Rule c = normCat cat cs = [normCat c | Left c <- rhs] badCoercion = isCoercion f && not ([c] == cs) badNil = isNilFun f && not (isList c && null cs) badOne = isOneFun f && not (isList c && cs == [catOfList c]) badCons = isConsFun f && not (isList c && cs == [catOfList c, c]) badList = isList c && not (isCoercion f || isNilCons f) badSpecial = elem c specialCatsP && not (isCoercion f) badMissing = not (null missing) missing = filter nodef [c | Left c <- rhs] nodef t = notElem t defineds defineds = internalCat : tokenNames cf ++ specialCatsP ++ map valCat (rulesOfCF cf) badTypeName = not (null badtypes) badtypes = filter isBadType $ cat : [c | Left c <- rhs] isBadType c = not (isUpper (head c) || isList c || c == internalCat || (head c == '@') ) badFunName = not (all (\c -> isAlphaNum c || c == '_') f {-isUpper (head f)-} || isCoercion f || isNilCons f)