-- | Control Dependence Graphs for the LLVM IR
--
-- This module follows the definition of control dependence of Cytron et al
-- (http://dl.acm.org/citation.cfm?doid=115372.115320):
--
-- Let X and Y be nodes in the CFG.  If X appears on every path from Y
-- to Exit, then X postdominates Y.  If X postdominates Y but X != Y,
-- then X strictly postdominates Y.
--
-- A CFG node Y is control dependent on a CFG node X if both:
--
--  * There is a non-null path p from X->Y such that Y postdominates
--    every node *after* X on p.
--
--  * The node Y does not strictly postdominate the node X.
--
-- This CDG formulation does not insert a dummy Start node to link
-- together all of the top-level nodes.  This just means that the set
-- of control dependencies can be empty if code will be executed
-- unconditionally.
module LLVM.Analysis.CDG (
  -- * Types
  CDG,
  HasCDG(..),
  -- * Constructor
  controlDependenceGraph,
  -- * Queries
  directControlDependencies,
  controlDependencies,
  ) where

import Control.Arrow ( (&&&) )
import qualified Data.Foldable as F
import Data.GraphViz
import Data.Map ( Map )
import qualified Data.Map as M
import Data.Monoid
import Data.Set ( Set )
import qualified Data.Set as S

import LLVM.Analysis
import LLVM.Analysis.CFG
import LLVM.Analysis.Dominance

class HasCDG a where
  getCDG :: a -> CDG

instance HasCDG CDG where
  getCDG = id

-- | Warning, this is an expensive instance to invoke as it constructs
-- the CDG.
instance HasCDG PostdominatorTree where
  getCDG = controlDependenceGraph

instance HasPostdomTree CDG where
  getPostdomTree (CDG pdt _) = pdt

instance HasCFG CDG where
  getCFG = getCFG . getPostdomTree

instance HasFunction CDG where
  getFunction = getFunction . getCFG

data CDG = CDG PostdominatorTree (Map BasicBlock [BasicBlock])

{- Note [CDG Format]

The CDG is a mapping BasicBlocks to the other BasicBlocks that they
are /directly/ control dependent on.

-}

-- | Construct the control dependence graph for a function (from its
-- CFG).  This follows the construction from chapter 9 of the
-- Munchnick Compiler Design and Implementation book.
--
-- For an input function F:
--
-- 1) Construct the CFG G for F
--
-- 2) Construct the postdominator tree PT for F
--
-- 3) Let S be the set of edges m->n in G such that n does not
--    postdominate m
--
-- 4) For each edge m->n in S, find the lowest common ancestor l of m
--    and n in the postdominator tree.  All nodes on the path from
--    l->n (not including l) in PT are control dependent on m.  If
--    there is no common ancestor (disconnected PDT because of
--    multiple exit nodes), the lowest common ancestor is then the
--    virtual exit node, so /all/ of the postdominators of n are
--    control dependent on m.
--
-- Note: the typical construction augments the CFG with a fake start
-- node.  Doing that here would be a bit complicated, so the graph
-- just isn't connected by a fake Start node.
controlDependenceGraph :: (HasCFG f, HasPostdomTree f) => f -> CDG
controlDependenceGraph flike =
  CDG pdt $ fmap S.toList $ foldr addPairs mempty (functionBody f)
  where
    cfg = getCFG flike
    f = getFunction cfg
    pdoms = M.fromList $ postdominators pdt
    pdt = getPostdomTree flike
    addPairs bM acc =
      foldr (addCDGEdge pdt pdoms bM) acc (basicBlockSuccessors cfg bM)


-- | Get the list of instructions that an instruction is control
-- dependent upon.  As noted above, the list will be empty if the
-- instruction is executed unconditionally.
controlDependencies :: (HasCDG cdg) => cdg -> Instruction -> [Instruction]
controlDependencies cdgLike i =
  go mempty (S.fromList directDeps) directDeps
  where
    cdg = getCDG cdgLike
    directDeps = directControlDependencies cdg i

    go _ acc [] = S.toList acc
    go visited acc (cdep:rest)
      | S.member cdep visited = go visited acc rest
      | otherwise =
        let newDeps = directControlDependencies cdg cdep
            rest' = rest ++ newDeps
        in go (S.insert cdep visited) (S.union acc (S.fromList newDeps)) rest'

-- | Get the list of instructions that an instruction is directly
-- control dependent upon (direct parents in the CDG).
directControlDependencies :: (HasCDG cdg) => cdg -> Instruction -> [Instruction]
directControlDependencies cdgLike i =
  maybe [] (map basicBlockTerminatorInstruction) (M.lookup bb m)
  where
    CDG _ m = getCDG cdgLike
    Just bb = instructionBasicBlock i

-- Implementation


-- | For each block M and each successor of M, N, add (M,N) if the
-- first instruction of N does not postdominate the terminator
-- instruction of M.
addCDGEdge :: PostdominatorTree -- ^ The postdominator tree
              -> Map Instruction [Instruction] -- ^ The entire postdom relation
              -> BasicBlock -- ^ M
              -> BasicBlock -- ^ N
              -> Map BasicBlock (Set BasicBlock)
              -> Map BasicBlock (Set BasicBlock)
addCDGEdge pdt pdoms bM bN acc
  -- If it is a postdominator, this is not an edge in S
  | postdominates pdt nEntry mTerm = acc
  -- Otherwise it is and we need to find a common ancestor in the
  -- PDT
  | otherwise = case commonAncestor mpdoms npdoms of
    Just l ->
      let cdepsOnM = bN : postdomBlocks (filter (/=l) npdoms)
      in foldr addControlDep acc cdepsOnM
    -- If there is no common ancestor, then all of the
    -- postdominators of n are control dependent on m.
    Nothing ->
      let deps = bN : postdomBlocks npdoms
      in foldr addControlDep acc deps
  where
    addControlDep b = M.insertWith S.union b (S.singleton bM)
    mTerm = basicBlockTerminatorInstruction bM
    nEntry : _ = basicBlockInstructions bN
    -- These lookups should never fail (unless the caller provided
    -- the postdominator tree for a different function).  the
    -- postdominators function just returns empty sets, and the
    -- function handles /every/ instruction in the input function.
    Just mpdoms = M.lookup mTerm pdoms
    Just npdoms = M.lookup nEntry pdoms

-- | Convert a list of Instructions into the list of their
-- BasicBlocks.  There are no repetitions in the result.
postdomBlocks :: [Instruction] -> [BasicBlock]
postdomBlocks = S.toList . foldr addInstBlock mempty
  where
    addInstBlock i acc =
      let Just bb = instructionBasicBlock i
      in S.insert bb acc

-- | Given two lists, find the first element they share in common (if
-- any).
commonAncestor :: [Instruction] -> [Instruction] -> Maybe Instruction
commonAncestor l1 = F.find (`elem` l1)

{- Note [CDG]

We can compute the CDG based on just the blocks in the graph.  All of
the instructions in a given basic block are always at the same level
in the CDG and depend on the same control decisions as the first
instruction in the block.

We also only need to store the blocks, since any instruction looked up
has a back-pointer to its block, which will let us look it up in the
CDG.

Start by finding the set S, where we just consider connected
BasicBlocks.

-}


-- Visualization

instance ToGraphviz CDG where
  toGraphviz = cdgGraphvizRepr

cdgGraphvizParams :: GraphvizParams n Instruction el BasicBlock Instruction
cdgGraphvizParams =
  defaultParams { fmtNode = \(_,l) -> [ toLabel (toValue l) ]
                , clusterID = Int . basicBlockUniqueId
                , clusterBy = nodeCluster
                , fmtCluster = formatCluster
                }
  where
    nodeCluster l@(_, i) =
      let Just bb = instructionBasicBlock i
      in C bb (N l)
    formatCluster bb = [GraphAttrs [toLabel (show (basicBlockName bb))]]

cdgGraphvizRepr :: CDG -> DotGraph Int
cdgGraphvizRepr cdg@(CDG _ bm) = graphElemsToDot cdgGraphvizParams ns es
  where
    f = getFunction cdg
    ns = map (instructionUniqueId &&& id) (functionInstructions f)
    es = concatMap blockEdges (functionBody f)

    blockEdges bb =
      case M.lookup bb bm of
        Nothing -> []
        Just deps ->
          -- Each instruction in BB gets an edge to the terminator
          -- of each dependency
          let depTerms = map basicBlockTerminatorInstruction deps
          in concatMap (addEdges depTerms) (basicBlockInstructions bb)
    addEdges depTerms i = map (addEdge i) depTerms
    addEdge i dterm =
      (instructionUniqueId i, instructionUniqueId dterm, ())