module GF.Grammar.CFG where
import GF.Data.Utilities
import PGF
import GF.Data.Relation
import Data.Map (Map)
import qualified Data.Map as Map
import Data.List
import Data.Set (Set)
import qualified Data.Set as Set
type Cat = String
data Symbol c t = NonTerminal c | Terminal t
deriving (Eq, Ord, Show)
type CFSymbol = Symbol Cat Token
data CFRule = CFRule {
lhsCat :: Cat,
ruleRhs :: [CFSymbol],
ruleName :: CFTerm
}
deriving (Eq, Ord, Show)
data CFTerm
= CFObj CId [CFTerm]
| CFAbs Int CFTerm
| CFApp CFTerm CFTerm
| CFRes Int
| CFVar Int
| CFMeta CId
deriving (Eq, Ord, Show)
data CFG = CFG { cfgStartCat :: Cat,
cfgExternalCats :: Set Cat,
cfgRules :: Map Cat (Set CFRule) }
deriving (Eq, Ord, Show)
removeCycles :: CFG -> CFG
removeCycles = onRules f
where f rs = filter (not . isCycle) rs
where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [NonTerminal c'] _ <- rs]
isCycle (CFRule c [NonTerminal c'] _) = isRelatedTo alias c' c
isCycle _ = False
bottomUpFilter :: CFG -> CFG
bottomUpFilter gr = fix grow (gr { cfgRules = Map.empty })
where grow g = g `unionCFG` filterCFG (all (okSym g) . ruleRhs) gr
okSym g = symbol (`elem` allCats g) (const True)
topDownFilter :: CFG -> CFG
topDownFilter cfg = filterCFGCats (`Set.member` keep) cfg
where
rhsCats = [ (lhsCat r, c') | r <- allRules cfg, c' <- filterCats (ruleRhs r) ]
uses = reflexiveClosure_ (allCats cfg) $ transitiveClosure $ mkRel rhsCats
keep = Set.unions $ map (allRelated uses) $ Set.toList $ cfgExternalCats cfg
mergeIdentical :: CFG -> CFG
mergeIdentical g = onRules (map subst) g
where
m = Map.fromList [(y,concat (intersperse "+" xs))
| (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList (cfgRules g)], y <- xs]
rulesKey = Set.map (\ (CFRule _ r n) -> (n,r))
subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n
substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m
purgeExternalCats :: CFG -> CFG
purgeExternalCats cfg = cfg { cfgExternalCats = Set.singleton (cfgStartCat cfg) }
removeLeftRecursion :: CFG -> CFG
removeLeftRecursion gr
= gr { cfgRules = groupProds $ concat [scheme1, scheme2, scheme3, scheme4] }
where
scheme1 = [CFRule a [x,NonTerminal a_x] n' |
a <- retainedLeftRecursive,
x <- properLeftCornersOf a,
not (isLeftRecursive x),
let a_x = mkCat (NonTerminal a) x,
a_x `Set.member` newCats,
let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0))
(\_ -> CFRes 0) x]
scheme2 = [CFRule a_x (beta++[NonTerminal a_b]) n' |
a <- retainedLeftRecursive,
b@(NonTerminal b') <- properLeftCornersOf a,
isLeftRecursive b,
CFRule _ (x:beta) n <- catRules gr b',
let a_x = mkCat (NonTerminal a) x,
let a_b = mkCat (NonTerminal a) b,
let i = length $ filterCats beta,
let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n)))
(\_ -> CFApp (CFRes i) n) x]
scheme3 = [CFRule a_x beta n' |
a <- retainedLeftRecursive,
x <- properLeftCornersOf a,
CFRule _ (x':beta) n <- catRules gr a,
x == x',
let a_x = mkCat (NonTerminal a) x,
let n' = symbol (\_ -> CFAbs 1 (shiftTerm n))
(\_ -> n) x]
scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . NonTerminal) cats
newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3))
shiftTerm :: CFTerm -> CFTerm
shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts)
shiftTerm (CFRes 0) = CFVar 1
shiftTerm (CFRes n) = CFRes (n1)
shiftTerm t = t
cats = allCats gr
rules = allRules gr
directLeftCorner = mkRel [(NonTerminal c,t) | CFRule c (t:_) _ <- allRules gr]
leftCorner = reflexiveClosure_ (map NonTerminal cats) $ transitiveClosure directLeftCorner
properLeftCorner = transitiveClosure directLeftCorner
properLeftCornersOf = Set.toList . allRelated properLeftCorner . NonTerminal
isProperLeftCornerOf = flip (isRelatedTo properLeftCorner)
leftRecursive = reflexiveElements properLeftCorner
isLeftRecursive = (`Set.member` leftRecursive)
retained = cfgStartCat gr `Set.insert`
Set.fromList [a | r <- allRules (filterCFGCats (not . isLeftRecursive . NonTerminal) gr),
NonTerminal a <- ruleRhs r]
isRetained = (`Set.member` retained)
retainedLeftRecursive = filter (isLeftRecursive . NonTerminal) $ Set.toList retained
mkCat :: CFSymbol -> CFSymbol -> Cat
mkCat x y = showSymbol x ++ "-" ++ showSymbol y
where showSymbol = symbol id show
mutRecCats :: Bool
-> CFG -> [Set Cat]
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, NonTerminal c' <- ss]
refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation
makeSimpleRegular :: CFG -> CFG
makeSimpleRegular = makeRegular . topDownFilter . bottomUpFilter . removeCycles
makeRegular :: CFG -> CFG
makeRegular g = g { cfgRules = groupProds $ concatMap trSet (mutRecCats True g) }
where trSet cs | allXLinear cs rs = rs
| otherwise = concatMap handleCat (Set.toList cs)
where rs = catSetRules g cs
handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))]
++ concatMap (makeRightLinearRules c) (catRules g c)
where c' = newCat c
makeRightLinearRules b' (CFRule c ss n) =
case ys of
[] -> newRule b' (xs ++ [NonTerminal (newCat c)]) n
(NonTerminal b:zs) -> newRule b' (xs ++ [NonTerminal b]) n
++ makeRightLinearRules (newCat b) (CFRule c zs n)
where (xs,ys) = break (`catElem` cs) ss
newRule c rhs n | rhs == [NonTerminal c] = []
| otherwise = [CFRule c rhs n]
newCat c = c ++ "$"
mkCFG :: Cat -> Set Cat -> [CFRule] -> CFG
mkCFG start ext rs = CFG { cfgStartCat = start, cfgExternalCats = ext, cfgRules = groupProds rs }
groupProds :: [CFRule] -> Map Cat (Set CFRule)
groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r))
uniqueFuns :: CFG -> CFG
uniqueFuns cfg = CFG {cfgStartCat = cfgStartCat cfg
,cfgExternalCats = cfgExternalCats cfg
,cfgRules = Map.fromList (snd (mapAccumL uniqueFunSet Set.empty (Map.toList (cfgRules cfg))))
}
where
uniqueFunSet funs (cat,rules) =
let (funs',rules') = mapAccumL uniqueFun funs (Set.toList rules)
in (funs',(cat,Set.fromList rules'))
uniqueFun funs (CFRule cat items (CFObj fun args)) = (Set.insert fun' funs,CFRule cat items (CFObj fun' args))
where
fun' = head [fun'|suffix<-"":map show ([2..]::[Int]),
let fun'=mkCId (showCId fun++suffix),
not (fun' `Set.member` funs)]
allRules :: CFG -> [CFRule]
allRules = concat . map Set.toList . Map.elems . cfgRules
allRulesGrouped :: CFG -> [(Cat,[CFRule])]
allRulesGrouped = Map.toList . Map.map Set.toList . cfgRules
allCats :: CFG -> [Cat]
allCats = Map.keys . cfgRules
allCats' :: CFG -> [Cat]
allCats' cfg = Set.toList (Map.keysSet (cfgRules cfg) `Set.union`
Set.fromList [c | rs <- Map.elems (cfgRules cfg),
r <- Set.toList rs,
NonTerminal c <- ruleRhs r])
catRules :: CFG -> Cat -> [CFRule]
catRules gr c = Set.toList $ Map.findWithDefault Set.empty c (cfgRules gr)
catSetRules :: CFG -> Set Cat -> [CFRule]
catSetRules gr cs = allRules $ filterCFGCats (`Set.member` cs) gr
mapCFGCats :: (Cat -> Cat) -> CFG -> CFG
mapCFGCats f cfg = mkCFG (f (cfgStartCat cfg))
(Set.map f (cfgExternalCats cfg))
[CFRule (f lhs) (map (mapSymbol f id) rhs) t | CFRule lhs rhs t <- allRules cfg]
onCFG :: (Map Cat (Set CFRule) -> Map Cat (Set CFRule)) -> CFG -> CFG
onCFG f cfg = cfg { cfgRules = f (cfgRules cfg) }
onRules :: ([CFRule] -> [CFRule]) -> CFG -> CFG
onRules f cfg = cfg { cfgRules = groupProds $ f $ allRules cfg }
cleanCFG :: CFG -> CFG
cleanCFG = onCFG (Map.filter (not . Set.null))
unionCFG :: CFG -> CFG -> CFG
unionCFG x y = onCFG (\rs -> Map.unionWith Set.union rs (cfgRules y)) x
filterCFG :: (CFRule -> Bool) -> CFG -> CFG
filterCFG p = cleanCFG . onCFG (Map.map (Set.filter p))
filterCFGCats :: (Cat -> Bool) -> CFG -> CFG
filterCFGCats p = onCFG (Map.filterWithKey (\c _ -> p c))
countCats :: CFG -> Int
countCats = Map.size . cfgRules . cleanCFG
countRules :: CFG -> Int
countRules = length . allRules
prCFG :: CFG -> String
prCFG = prProductions . map prRule . allRules
where
prRule r = (lhsCat r, unwords (map prSym (ruleRhs r)))
prSym = symbol id (\t -> "\""++ t ++"\"")
prProductions :: [(Cat,String)] -> String
prProductions prods =
unlines [rpad maxLHSWidth lhs ++ " ::= " ++ rhs | (lhs,rhs) <- prods]
where
maxLHSWidth = maximum $ 0:(map (length . fst) prods)
rpad n s = s ++ replicate (n length s) ' '
prCFTerm :: CFTerm -> String
prCFTerm = pr 0
where
pr p (CFObj f args) = paren p (showCId f ++ " (" ++ concat (intersperse "," (map (pr 0) args)) ++ ")")
pr p (CFAbs i t) = paren p ("\\x" ++ show i ++ ". " ++ pr 0 t)
pr p (CFApp t1 t2) = paren p (pr 1 t1 ++ "(" ++ pr 0 t2 ++ ")")
pr _ (CFRes i) = "$" ++ show i
pr _ (CFVar i) = "x" ++ show i
pr _ (CFMeta c) = "?" ++ showCId c
paren 0 x = x
paren 1 x = "(" ++ x ++ ")"
ruleFun :: CFRule -> CId
ruleFun (CFRule _ _ t) = f t
where f (CFObj n _) = n
f (CFApp _ x) = f x
f (CFAbs _ x) = f x
f _ = mkCId ""
anyUsedBy :: [Cat] -> CFRule -> Bool
anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
mkCFTerm :: String -> CFTerm
mkCFTerm n = CFObj (mkCId n) []
ruleIsNonRecursive :: Set Cat -> CFRule -> Bool
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
allXLinear :: Set Cat -> [CFRule] -> Bool
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
isRightLinear :: Set Cat
-> CFRule
-> Bool
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
isLeftLinear :: Set Cat
-> CFRule
-> Bool
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a
symbol fc ft (NonTerminal cat) = fc cat
symbol fc ft (Terminal tok) = ft tok
mapSymbol :: (c -> c') -> (t -> t') -> Symbol c t -> Symbol c' t'
mapSymbol fc ft = symbol (NonTerminal . fc) (Terminal . ft)
filterCats :: [Symbol c t] -> [c]
filterCats syms = [ cat | NonTerminal cat <- syms ]
filterToks :: [Symbol c t] -> [t]
filterToks syms = [ tok | Terminal tok <- syms ]
catElem :: Ord c => Symbol c t -> Set c -> Bool
catElem s cs = symbol (`Set.member` cs) (const False) s
noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool
noCatsInSet cs = not . any (`catElem` cs)