{-# LANGUAGE CPP #-}
{-| Analysis and design of this module
If there are SCCs then additional files are inevitable.
Separate files to define the keys are usually avoidable.
Separating Key definitions are needed only if the key imports form a SCC,
and even these may be sometimes avoided by declaring keys in boot files.
Choose to minimize the create of separate Key-desf definitions by
locating and breaking the key SCCs first. Use the "score" to pick a
single vertex at a time to greedily minimize the number of separate
Key-defs. After this initial step the set of separate Key-defs is
fixed.
With this understanding, there are FOUR renderings of a descriptor.
Two renderings have non-separate Key-defs (or no Key-defs at all).
Two renderings have separate Key-defs.
The two non-separate Key-defs renderings are:
"simple" :
* normal file with the type-def and any Key-defs
"type-boot" :
* normal file with the type-def and any Key-defs
- hs-boot file declares the type-def
The other two have separate Key-defs, in ".hs-boot" or "'Key.hs" files:
"key-type-boot" :
+ normal file with the type-def and the Key-defs
* hs-boot file declares the type-def and the Key-defs
"split-key,type-boot" :
+ normal file with the type-def and maybe imports keyfile
* keyfile file with the the Key-defs
- hs-boot file declares the type-def
In general, all nodes without keys could be rendered as "type-boot"
and all nodes with keys as "split-key,type-boot", which would break
all SCCs. But that is wasteful: 2 or 3 files each (2 for root
ProtoInfo file). And this waste will also lead to warnings from ghc
nagging about unneeded {-# SOURCE #-} pragmas.
Only the files marked with * have incoming and outgoing edges and NEED
to be considered. With enough {-# SOURCE #-} pragmas, the + are
just sources and - are just sinks.
Initially all renderings are optimistically Simple. Some are quickly
changed into TypeBoot by observing the modules which import foreign
keys and marking the reciprocal type imports as TypeBoot.
The next task is to break the SCCs which arise just from the foreign
key imports. The algorithm makes a graph of these and breaks all of
them by changing the one with the best score from a TypeBoot node into
a KeyTypeBoot node.
Note: The top protoInfo node will be rendered like "simple" as
TopProtoInfo and never change. The most that happens to the top
protoInfo node is that its targets get changed and some imports get
SOURCE pragmas.
Now considering both type and key imports as links, more SCCs
might arise. These are also scored. The thing to grasp is how
changing a message's rending is allowed to happen:
TopProtoInfo will never change
Simple may become TypeBoot
TypeBoot will never change
KeyTypeBoot may become SplitKeyTypeBoot
SplitKeyTypeBoot will never change
It always possible to choose a vertex in any SCC that can change,
which is not obvious. The deduction is that if all vertices in the
SCC are unchanging then there are no internal type import links; thus
the only loops being created are with foreign key imports. The
initial setup broke all SCCs made only of foreign key imports; thus
this stuck SCC is a contradiction.
The best score is the choice that reduces the size of the scc in the
next round (and secondarily increases the number of sub-SCCs).
The final Result is a Map of names to the non-Simple/TopProtoInfo
renderings and a list of "pairs" (a,p,b) where part 'p' of module 'a'
should import the type defined in 'b' using a SOURCE pragma. Keys
from messages rendered as KeyTypeBoot should be imported using SOURCE
pragmas. Keys from messages rendered as SplitKeyTypeBoot should be
imported from the auxiliary 'Key files.
The code below is more complicated in order to reduce the SOURCE
pragmas and avoid ghc's warnings. All files are tracked and SOURCE
pragmas are added in steps. This is unlikely to be perfect — some
extra SOURCE pragmas might be left over, but I do not have an example
of this happening. This also means a DescriptorInfo may have several
file parts and these may end up in different SCCs. To simplify
processing these different SCCs which share a DescriptorInfo are
merged by 'rejoinVertices'.
-}
module Text.ProtocolBuffers.ProtoCompile.BreakRecursion
( makeResult,displayResult,Result(..),VertexKind(..),Part(..),pKey,pfKey,getKind ) where
import Prelude hiding (pi)
import Control.Monad(guard,mplus)
import qualified Data.Foldable as F
import Data.Function(on)
import Data.Graph
import Data.List
import qualified Data.Map as Map
import Data.Map(Map)
import Data.Maybe(mapMaybe)
import qualified Data.Set as Set
import Data.Set(Set)
import Text.ProtocolBuffers.Basic
import Text.ProtocolBuffers.Identifiers
import Text.ProtocolBuffers.Reflections
import Debug.Trace(trace)
ecart :: String -> a -> a
ecart _ a = a
fst3 :: (a,b,c) -> a
fst3 (x,_,_) = x
snd3 :: (a,b,c) -> b
snd3 (_,x,_) = x
imp :: String -> a
imp s = error $ "Inconceivable! Text.ProtocolBuffers.ProtoCompile.BreakRecursion."++s
iguard :: Monad m => Bool -> String -> m ()
iguard True _ = return ()
iguard False s = imp s
-- The Gen.hs module will be working with these String types
type MKey = FMName String
pKey :: ProtoName -> MKey
pKey (ProtoName {haskellPrefix=a,parentModule=b,baseName=c}) = foldr1 dotFM . map promoteFM $ a++b++[c]
pfKey :: ProtoFName -> MKey
pfKey (ProtoFName {haskellPrefix'=a,parentModule'=b}) = foldr1 dotFM . map promoteFM $ a++b
-- Which reprensentation a message currently has
data VertexKind = TopProtoInfo
| Simple
| TypeBoot
| KeyTypeBoot
| SplitKeyTypeBoot
deriving (Show,Eq,Ord)
-- Which of the 3 sorts of files (maked with * in analysis) a vertex represents
data Part = Normal | Source | KeyFile deriving (Show,Eq,Ord)
-- Vertex data. A graph may have several nodes with the same value of
-- V and different values of Part.
data V = V { vMKey :: !MKey
, vNeedsKeys :: !(Set MKey)
, vKeysNeedsTypes :: !(Set MKey)
, vTypeNeedsTypes :: !(Set MKey) }
deriving Show
-- A link to a module's file
data Label = L !Part !MKey deriving (Show,Eq,Ord)
type E = (V,Label,[Label])
type G = [E]
type SCCs = [G]
-- The end product of this module is the Result value
data Result = Result { rKind :: Map MKey VertexKind
, rIBoot :: Set (MKey,Part,MKey)
, rIKey :: Set (MKey,MKey) }
deriving (Eq,Show)
displayResult :: Result -> String
displayResult (Result {rKind = kv, rIBoot = ab, rIKey=ab'keys }) = unlines $
[ "--- displayResult ----"
, "Modules which are not Simple"
] ++ map (\(k,v) -> indent . shows (fmName k) . (" has kind "++) $ show v) (Map.assocs kv) ++
[ "Module imports marked with SOURCE for Types"
] ++ map (indent . untriple) (Set.toAscList ab) ++
[ "Module imports marked with SOURCE or 'Key for keys"
] ++ map (indent . unpair) (Set.toAscList ab'keys)
where indent = (' ':).(' ':)
untriple (a,p,b) = fmName a ++ " " ++ show p ++ " : import {-# SOURCE #-} " ++ fmName b
unpair (a,b) = fmName a ++ " : import {-# SOURCE or 'Key #-} " ++ fmName b
showSCCs :: SCCs -> String
showSCCs gs = concatMap (\ g -> "\n>< SCC Graph ><\n"++concatMap showE g) gs
showG :: G -> String
showG g = '\n':concatMap showE g
showE :: E -> String
showE (v,n,ls) = unlines $ [ "( "++show n, " , "++show v, " , "++show ls, ")" ]
instance Monoid Result where
mempty = Result mempty mempty mempty
#if __GLASGOW_HASKELL__ >= 804
instance Semigroup Result where
r1 <> r2 =
#else
mappend r1 r2 =
#endif
Result { rKind = Map.unionWith max (rKind r1) (rKind r2)
, rIBoot = mappend (rIBoot r1) (rIBoot r2)
, rIKey = mappend (rIKey r1) (rIKey r2) }
getKind :: Result -> MKey -> VertexKind
getKind r = let m = rKind r in \n -> Map.findWithDefault Simple n m
getType :: VertexKind -> Part
getType TopProtoInfo = imp "getType: TopProtoInfo"
getType Simple = Normal
getType TypeBoot = Source
getType KeyTypeBoot = Source
getType SplitKeyTypeBoot = Source
getKey :: VertexKind -> Part
getKey TopProtoInfo = Normal
getKey Simple = Normal
getKey TypeBoot = Normal
getKey KeyTypeBoot = Source
getKey SplitKeyTypeBoot = KeyFile
-- 'makeResult' is the main function for this module
makeResult :: ProtoInfo -> Result
makeResult protoInfo =
let pvs@(p,vs) = makeVertices protoInfo
initResult = breakKeys pvs
sccs = cycles (makeG (p:vs) initResult)
answer = cull (p:vs) $ breakGraph initResult sccs
finalGraph = makeG (p:vs) answer
remainingProblems = cycles finalGraph
msg = unlines [ " KLAXON, RED SPINNING LIGHT, ETC."
, "! WARNING: hprotoc unexpectedly failed to disentangle all the mutually-recursive message definitions."
, "! PLEASE REPORT THIS FAILURE ALONG WITH THE PROTO FILE."
, "! The failed subset is:"
] ++ showSCCs remainingProblems ++ "\n"
in if null remainingProblems then ecart (showG finalGraph) answer
else trace msg answer
-- Build the graph using the vertices and the Result so far.
makeG :: [V] -> Result -> G
makeG vs r = concatMap (makeEdgesForV r) vs
-- Returns all as Simple and Normal. The fst V is from the ProtoInfo
-- the snd [V] is from the DescriptorInfo.
makeVertices :: ProtoInfo -> (V,[V])
makeVertices pi = answer where
answer = ( protoInfoV , map makeV (messages pi) ++ map makeVoneof (oneofs pi) )
protoInfoV = V { vMKey = pKey (protoMod pi)
, vNeedsKeys = mempty
, vKeysNeedsTypes = knt (extensionKeys pi)
, vTypeNeedsTypes = mempty }
makeV di = V { vMKey = pKey (descName di)
, vNeedsKeys = nk (knownKeys di)
, vKeysNeedsTypes = knt (keys di)
, vTypeNeedsTypes = Set.union (tnt (fields di)) (ont (descOneofs di)) }
makeVoneof oi = V { vMKey = pKey (oneofName oi)
, vNeedsKeys = mempty
, vKeysNeedsTypes = mempty
, vTypeNeedsTypes = (tnt . fmap snd . oneofFields) oi
}
allK = Set.fromList (pKey (protoMod pi) : map (pKey . descName) (messages pi))
allT = Set.fromList $ (map (pKey . descName) (messages pi)) ++ (map (pKey . oneofName) (oneofs pi))
tnt :: Seq FieldInfo -> Set MKey
tnt fs = Set.intersection allT $ Set.fromList $ map pKey . mapMaybe typeName . F.toList $ fs
ont :: Seq OneofInfo -> Set MKey
ont os = Set.intersection allT $ Set.fromList $ map (pKey . oneofName) . F.toList $ os
knt :: Seq KeyInfo -> Set MKey
knt ks =
let (pns, fsL) = unzip (F.toList ks)
fnt :: [FieldInfo] -> Set MKey
fnt fs = Set.fromList $ (map pKey . mapMaybe typeName $ fs) ++ (map (pfKey . fieldName) fs)
in Set.intersection allT $ Set.union (Set.fromList (map pKey pns)) (fnt fsL)
nk :: Seq FieldInfo -> Set MKey
nk fs = Set.intersection allK $ Set.fromList $ map (pfKey . fieldName) . F.toList $ fs
-- The only need for KeyTypeBoot (and SplitKeyTypeBoot) is to break
-- key-only import cycles. 'breakKeys' finds and breaks these SSCs by
-- marking files as KeyTypeBoot. Since foreign keys implies a
-- reciprocal type import, additional files can get changed to
-- TypeBoot and some incoming links marked to use Source.
breakKeys :: (V,[V]) -> Result
breakKeys (pv,vsOther) =
let vs = pv : vsOther
es = map makeInitialEdges vs where
makeInitialEdges v = (v,L Normal self,[ L Normal b | b <- Set.toList (vNeedsKeys v), b/=self ])
where self = vMKey v
-- For 'a'/='b': if 'a' needs key from 'b' then 'b' must need type from 'a'
-- this recursion means 'a' cannot be Simple so change to TypeBoot
startingResult = Result { rKind = needTypeBoot, rIBoot = mempty, rIKey = mempty }
needTypeBoot = Map.singleton (vMKey pv) TopProtoInfo `Map.union`
( Map.fromList . map (\(_,L _ a,_) -> (a,TypeBoot))
. filter (\(_,_,bLs) -> not (null bLs)) $ es )
-- break always moves things to KeyTypeBoot from TypeBoot (not
-- Simple) because they are in a Key-import SCC: this means they
-- are importing foreign keys and thus they are in needTypeBoot
breakSCCs :: Result -> SCCs -> Result
breakSCCs r sccs = r `mappend` mconcat (map breakSCC sccs)
breakSCC :: G -> Result
breakSCC [] = imp $ "breakKeys.breakSCC: The SCC cannot be empty!"
breakSCC es' = let (toBust,next'sccs) = snd $ maximumBy (compare `on` fst) (map f (pullEach es'))
where f ((v,_,_),es'') = let (s,sccs) = score es'' in (s,(v,sccs))
bk = vMKey toBust -- ZZZ
ik = Set.fromList [ (ek,bk) | ek <- map (vMKey . fst3) es', ek/=bk ] -- ZZZ
newResult = Result { rKind = Map.singleton bk KeyTypeBoot
, rIBoot = mempty
, rIKey = ik } -- ZZZ
in breakSCCs newResult next'sccs
-- Init boot marks some incoming links to use SOURCE
initBoot r = r { rIBoot = Set.fromList . concatMap withParts $ es }
where withParts (_,L _ a,bLs) = [ withPart a b | L _ b <- bLs ]
withPart a b = let p = getKey (getKind r b) in (b,p,a)
in initBoot $ breakSCCs startingResult (cycles es)
score :: G -> ( (Int,Int), SCCs )
score es = ((value,parts),sccs) where
sccs = cycles es
-- A length n SCC can be solved by changing at most (n-1) vertices
-- The value is the difference between the
-- old graph which required at most (pred . length) ed changes
-- and the new graphs which require at most (sum (map (pred . length) sccs)) changes
-- so a larger value is preferred
value = (pred . length) es - (sum (map (pred . length) sccs))
-- The number of parts is used as a potential tie breaker, prefering more parts
parts = length sccs -- # of pieces
-- select the non-trivial sccs from edges
cycles :: G -> SCCs
cycles = filter atLeastTwo . map flattenSCC . stronglyConnCompR
where atLeastTwo :: [a] -> Bool
atLeastTwo (_:_:_) = True
atLeastTwo _ = False
-- pull out each element as candidate and list without the element
pullEach :: [a] -> [(a,[a])]
pullEach = go id where go _ [] = []
go f (x:xs) = (x,f xs) : go (f . (x:)) xs
-- This builds an edge E from the vertex V and ensures that V has the
-- right vKind from the Result. This must make the same judgements as
-- Gen.hs does in importPN and import PFN
makeEdgesForV :: Result -> V -> [E]
makeEdgesForV r v =
let me = vMKey v; myKind = getK me
getK = getKind r; self p = L p me;
typeL p n = if Set.notMember (me,p,n) (rIBoot r) then L Normal n
else let checkSource = getType (getK n)
in if checkSource == Source then L Source n -- sanity check
else error "makeEdgesForV.typeL.getType.getK of n did not return Source!"
keyL n = if Set.notMember (me,n) (rIKey r) then L Normal n
else L (getKey (getK n)) n
sKNT (L p _) = Set.map (typeL p) (vKeysNeedsTypes v)
sTNT (L p _) = Set.map (typeL p) (vTypeNeedsTypes v)
sNK _ = Set.map keyL (vNeedsKeys v)
notMe set = [ e | e@(L _p o) <- Set.toList set, o/=me ]
standard = let s = self Normal in (v,s,notMe $ Set.unions [ sKNT s, sTNT s, sNK s])
source = let s = self Source in (v,s,[])
sourceKTB = let s = self Source in (v,s,notMe $ sKNT s)
standardSKTB = let s = self Normal in (v,s,notMe' $ Set.union (sNK s) (sTNT s))
where notMe' set = [ e | e@(L p o) <- Set.toList set, o/=me || p==KeyFile ]
keyfileSKTB = let s = self KeyFile in (v,s,Set.toList $ sKNT s)
in case myKind of -- commented out the purely SOURCE and SINK nodes:
TopProtoInfo -> [standard]
Simple -> [standard]
TypeBoot -> [standard,source]
KeyTypeBoot -> [standard,sourceKTB]
SplitKeyTypeBoot -> [standardSKTB,keyfileSKTB,source]
breakGraph :: Result -> SCCs -> Result
breakGraph r [] = ecart ("\nbreakGraph leaf answer\n"++displayResult r) $ r
breakGraph r sccs = ecart ("\nbreakGraph\n"++displayResult r) $
r `mappend` mconcat (map (breakCycle r) (rejoinVertices sccs))
-- I wonder if there is any input which leads to a module having
-- different parts in different SCCs. Rather than try and
-- over-analyze this wierd edge case this 'rejoinVertices' function
-- will detect it and join the SCCs.
rejoinVertices :: SCCs -> SCCs
rejoinVertices [] = []
rejoinVertices g@([_]) = g
rejoinVertices gs =
let vgs :: [(Set MKey,G)]
vgs = map (\ g -> (Set.fromList . map (vMKey . fst3) $ g,g)) gs
process [] = []
process ((_,g):[]) = [g]
process ((v,g):rest) = walk id rest where
walk p [] = g : process (p [])
walk p (x@(v',g'):rest') | Set.null (Set.intersection v v') = walk (p . (x:)) rest'
| otherwise = process ((Set.union v v',g++g') : p [])
in process vgs
{-
breakCycle is a work in progress. The ans' value tries to change
incoming type links to the Source file, then ans'R. The ans'R tries to
change outgoing links to point to Source files. The ans'TB changes
from Simple/Normal to TypeBoot (adding a source file). The ans'SKTB
changes from KeyTypeBoot/Source to SplitKeyTypeBoot.
The reason these changes are done in stages is to try and avoid ghc's
warnings that a {-# SOURCE #-} import is not not needed.
-}
breakCycle :: Result -> G -> Result
breakCycle oldR sccIn =
let bits = map snd3 sccIn -- trace
-- toCompare should never be null.
toCompare = mapMaybe f (pullEach sccIn) where
allV = Set.fromList (map (vMKey . fst3) sccIn)
f :: (E,[E]) -> Maybe ((Int, Int), (Result, SCCs))
f (e@(v,L p me,_bLs), es) = ecart (">< picking:\n"++showE e++
"\nfrom:"++show bits++
"\nscore: "++show observe++"\n") $
answer where
answer = case (getKind oldR me,p) of
(TopProtoInfo,Normal) -> ans'R
(Simple,Normal) -> ans'R `mplus` ans'TB -- ans' is part of ans'TB
(TypeBoot,Normal) -> ans' `mplus` ans'R
(KeyTypeBoot,Normal) -> ans' `mplus` ans'R
(KeyTypeBoot,Source) -> ans'RK `mplus` ans'SKTB -- ans' may be redundant
(SplitKeyTypeBoot,Normal) -> ans' `mplus` ans'R
(SplitKeyTypeBoot,KeyFile) -> ans'RK -- ans' may be redundant
(TypeBoot,Source) -> imp $
"breakCycle.toCompare.f cannot have (TypeBoot,Source) in SCC!" ++ eMsg
(SplitKeyTypeBoot,Source) -> imp $
"breakCycle.toCompare.f cannot have (SplitKeyTypeBoot,Source) in SCC!" ++ eMsg
_ -> imp $ "breakCycle.toCompare.f: impossible combination in SCC:"++ eMsg
observe = case answer of Nothing -> "Nothing"; Just (s,_) -> "Just "++show s -- trace
eMsg = '\n':unlines (map showE (e:es))
ans',ans'R,ans'TB,ans'SKTB :: Maybe ((Int, Int), (Result, SCCs))
ans' = if Set.null newIBoot then Nothing
else go $ oldR `mappend` Result { rKind = mempty
, rIBoot = newIBoot
, rIKey = mempty }
ans'R = if Set.null newIBootR then Nothing
else go $ oldR `mappend` Result { rKind = mempty
, rIBoot = newIBootR
, rIKey = mempty }
ans'RK = if Set.null newIBootRK then Nothing
else go $ oldR `mappend` Result { rKind = mempty
, rIBoot = newIBootRK
, rIKey = mempty }
ans'TB = go $ oldR `mappend` Result { rKind = Map.singleton me TypeBoot
, rIBoot = newIBoot -- do (TypeBoot,Normal) -> ans'
, rIKey = mempty }
ans'SKTB = go $ oldR `mappend` Result { rKind = Map.singleton me SplitKeyTypeBoot
, rIBoot = newIBootSKTB
, rIKey = Set.singleton (me,me) }
newIBoot,newIBootR,newIBootRK,newIBootSKTB :: Set (MKey,Part,MKey)
newIBoot = Set.fromList $ do
(va,L pa a,_) <- es
iguard (Set.member a allV) $
"breakCycle.toCompare.newIBoot sanity check 083425 failed:"++eMsg
guard (((pa == Normal) &&
(Set.member me (vTypeNeedsTypes va))) ||
((pa == getKey (getKind oldR a)) &&
(Set.member me (vKeysNeedsTypes va))))
let x=(a,pa,me)
guard (Set.notMember x (rIBoot oldR)) -- needed when used in newIBoot2
return x
newIBootR = Set.fromList $ do
b <- Set.toList (Set.union (vTypeNeedsTypes v) (vKeysNeedsTypes v))
guard (Set.member b allV)
guard (Source == getType (getKind oldR b))
guard (me /= b || p == KeyFile)
let x = (me,p,b)
guard (Set.notMember x (rIBoot oldR))
return x
newIBootRK = Set.fromList $ do
b <- Set.toList (vKeysNeedsTypes v)
guard (Set.member b allV)
guard (Source == getType (getKind oldR b))
guard (me /= b || p == KeyFile)
let x = (me,p,b)
guard (Set.notMember x (rIBoot oldR))
return x
newIBootSKTB = Set.union newIBoot . Set.fromList $
(if Set.member me (vKeysNeedsTypes v) then ((me,KeyFile,me):) else id) $ do
b <- Set.toList (vKeysNeedsTypes v)
guard (Set.member (me,Source,b) (rIBoot oldR)) -- copy from (me,Source,b)
let x = (me,KeyFile,b) -- copy to (me,KeyFile,b)
iguard (Set.notMember x (rIBoot oldR)) $
"breakCycle.toCompare.newIBoot2 KeyTypeBoot already had entries for KeyFile!:"++eMsg
return x
go :: Result -> Maybe ((Int, Int), (Result, SCCs))
go newR = let (s,sccs) = score (makeG (map fst3 (e:es)) newR)
in Just (s,(newR,sccs))
in ecart (">< breakCycle of "++show bits++"\n\n") $
if null toCompare
then imp $ "breakCycle: This SCC had no Simple or KeyTypeBoot nodes!\n"++ unlines (map show sccIn)
else let (newR,next'sccs) = snd $ maximumBy (compare `on` fst) toCompare
in breakGraph newR next'sccs
-- 'cull' tries to remove all the extra {-# SOURCE #-} pragmas. I am
-- not certain that repeating the 'cull' will make any difference.
cull :: [V] -> Result -> Result
cull vs rIn =
let trial :: Result -> (MKey,Part,MKey) -> Result
trial old x = let new = old { rIBoot = Set.delete x (rIBoot old) }
in if null (cycles (makeG vs new)) then new else old
rOut = foldl' trial rIn (Set.toList (rIBoot rIn))
in if rOut == rIn then rOut else cull vs rOut