{-# LANGUAGE MagicHash, UnboxedTuples, CPP, ForeignFunctionInterface, GHCForeignImportPrim, UnliftedFFITypes, BangPatterns, RecordWildCards, DeriveFunctor, DeriveFoldable, DeriveTraversable, PatternGuards #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# OPTIONS_GHC -Wno-orphans #-}
{-|
Module      :  GHC.HeapView
Copyright   :  (c) 2012-2019 Joachim Breitner
License     :  BSD3
Maintainer  :  Joachim Breitner <mail@joachim-breitner.de>

With this module, you can investigate the heap representation of Haskell
values, i.e. to investigate sharing and lazy evaluation.
-}


module GHC.HeapView (
    -- * Heap data types
    GenClosure(..),
    Closure,
    allClosures,                            -- was allPtrs
    ClosureType(..),
    StgInfoTable(..),
    HalfWord,
    -- * Reading from the heap
    getClosureData,
    getBoxedClosureData,
    getClosureRaw,
    -- * Pretty printing
    ppClosure,
    -- * Heap maps
    -- $heapmap
    HeapTree(..),
    buildHeapTree,
    ppHeapTree,
    HeapGraphEntry(..),
    HeapGraphIndex,
    HeapGraph(..),
    lookupHeapGraph,
    heapGraphRoot,
    buildHeapGraph,
    multiBuildHeapGraph,
    addHeapGraph,
    annotateHeapGraph,
    updateHeapGraph,
    ppHeapGraph,
    -- * Boxes
    Box(..),
    asBox,
    areBoxesEqual,
    -- * Disassembler
    disassembleBCO,
    )
    where

import GHC.Exts         ( Any,
                          Ptr(..), Addr#, Int(..), Word(..),
                          ByteArray#, Array#, sizeofByteArray#, sizeofArray#, indexArray#, indexWordArray#,
                          unsafeCoerce# )

import GHC.Exts.Heap
import GHC.Exts.Heap.Constants

import GHC.Arr          (Array(..))

import Foreign          hiding ( void )
import Data.Char
import Data.List
import Data.Maybe       ( catMaybes )
import Data.Functor
import Data.Function
import qualified Data.Foldable as F
import qualified Data.Traversable as T
import qualified Data.IntMap as M
import Control.Monad
import Control.Monad.Trans.State
import Control.Monad.Trans.Class
import Control.Monad.IO.Class
import Control.Monad.Trans.Writer.Strict
import Control.Exception.Base (evaluate)

import GHC.Disassembler

#include "ghcautoconf.h"

#if __GLASGOW_HASKELL__ == 806
-- Deriving for Functor, Foldable and Traversable is missing in  GHC 8.6
-- will be available in GHC 8.8
deriving instance Functor GenClosure
deriving instance Foldable GenClosure
deriving instance Traversable GenClosure
#endif

instance Storable StgInfoTable where

   sizeOf itbl
      = sum
        [
         fieldSz ptrs itbl,
         fieldSz nptrs itbl,
         sizeOf (undefined :: HalfWord),
         fieldSz srtlen itbl
        ]

   alignment _
      = wORD_SIZE

   poke _a0 _itbl
      = error "Storable StgInfoTable is read-only"

   peek a0
      = flip (evalStateT) (castPtr a0)
      $ do
           ptrs'   <- load
           nptrs'  <- load
           tipe'   <- load
           srtlen' <- load
           return
              StgInfoTable {
                 entry  = Nothing,            -- Storable instance needed for EntryFunPtr!!
                 ptrs   = ptrs',
                 nptrs  = nptrs',
                 tipe   = toEnum (fromIntegral (tipe'::HalfWord)),
                 srtlen = srtlen',
                 code   = Nothing              -- Storable instance needed for ItblCodes
              }

fieldSz :: Storable b => (a -> b) -> a -> Int
fieldSz sel x = sizeOf (sel x)

load :: Storable a => PtrIO a
load = do addr <- advance
          lift (peek addr)

type PtrIO = StateT (Ptr Word8) IO

advance :: Storable a => PtrIO (Ptr a)
advance = StateT adv where
    adv addr = case castPtr addr of { addrCast -> return
        (addrCast, addr `plusPtr` sizeOfPointee addrCast) }

sizeOfPointee :: (Storable a) => Ptr a -> Int
sizeOfPointee addr = sizeOf (typeHack addr)
    where typeHack = undefined :: Ptr a -> a


foreign import prim "stg_unpackClosurezh" unpackClosurezh# :: Any -> (# Addr#, ByteArray#, Array# b #)

-- | This returns the raw representation of the given argument. The second
-- component of the triple are the words on the heap, and the third component
-- are those words that are actually pointers. Once back in Haskell word, the
-- 'Word'  may be outdated after a garbage collector run, but the corresponding
-- 'Box' will still point to the correct value.
getClosureRaw :: a -> IO (Ptr StgInfoTable, [Word], [Box])
getClosureRaw x =
    case unpackClosurezh# (unsafeCoerce# x) of
        (# iptr, dat, ptrs #) -> do
            let nelems = (I# (sizeofByteArray# dat)) `div` wORD_SIZE
                rawWords = [W# (indexWordArray# dat i) | I# i <- [0.. fromIntegral nelems -1] ]
                pelems = I# (sizeofArray# ptrs)
                ptrList = amap' Box $ Array 0 (pelems - 1) pelems ptrs
            -- This is just for good measure, and seems to be not important.
            mapM_ evaluate ptrList
            -- This seems to be required to avoid crashes as well
            void $ evaluate nelems
            -- The following deep evaluation is crucial to avoid crashes (but why)?
            mapM_ evaluate rawWords
            return (Ptr iptr, rawWords, ptrList)

-- From compiler/ghci/RtClosureInspect.hs
amap' :: (t -> b) -> Array Int t -> [b]
amap' f (Array i0 i _ arr#) = map g [0 .. i - i0]
    where g (I# i#) = case indexArray# arr# i# of
                          (# e #) -> f e

isChar :: GenClosure b -> Maybe Char
isChar (ConstrClosure { name = "C#", dataArgs = [ch], ptrArgs = []}) = Just (chr (fromIntegral ch))
isChar _ = Nothing

isCons :: GenClosure b -> Maybe (b, b)
isCons (ConstrClosure { name = ":", dataArgs = [], ptrArgs = [h,t]}) = Just (h,t)
isCons _ = Nothing

isTup :: GenClosure b -> Maybe [b]
isTup (ConstrClosure { dataArgs = [], ..}) =
    if length name >= 3 &&
       head name == '(' && last name == ')' &&
       all (==',') (tail (init name))
    then Just ptrArgs else Nothing
isTup _ = Nothing


isNil :: GenClosure b -> Bool
isNil (ConstrClosure { name = "[]", dataArgs = [], ptrArgs = []}) = True
isNil _ = False

-- | A pretty-printer that tries to generate valid Haskell for evalutated data.
-- It assumes that for the included boxes, you already replaced them by Strings
-- using 'Data.Foldable.map' or, if you need to do IO, 'Data.Foldable.mapM'.
--
-- The parameter gives the precedendence, to avoid avoidable parenthesises.
ppClosure :: (Int -> b -> String) -> Int -> GenClosure b -> String
ppClosure showBox prec c = case c of
    _ | Just ch <- isChar c -> app $
        ["C#", show ch]
    _ | Just (h,t) <- isCons c -> addBraces (5 <= prec) $
        showBox 5 h ++ " : " ++ showBox 4 t
    _ | Just vs <- isTup c ->
        "(" ++ intercalate "," (map (showBox 0) vs) ++ ")"
    ConstrClosure {..} -> app $
        name : map (showBox 10) ptrArgs ++ map show dataArgs
    ThunkClosure {..} -> app $
        "_thunk" : map (showBox 10) ptrArgs ++ map show dataArgs
    SelectorClosure {..} -> app
        ["_sel", showBox 10 selectee]
    IndClosure {..} -> app
        ["_ind", showBox 10 indirectee]
    BlackholeClosure {..} -> app
        ["_bh",  showBox 10 indirectee]
    APClosure {..} -> app $ map (showBox 10) $
        fun : payload
    PAPClosure {..} -> app $ map (showBox 10) $
        fun : payload
    APStackClosure {..} -> app $ map (showBox 10) $
        fun : payload
    BCOClosure {..} -> app
        ["_bco", showBox 10 bcoptrs]
    ArrWordsClosure {..} -> app
        ["toArray", "("++show (length arrWords) ++ " words)", intercalate "," (shorten (map show arrWords)) ]
    MutArrClosure {..} -> app
        --["toMutArray", "("++show (length mccPayload) ++ " ptrs)",  intercalate "," (shorten (map (showBox 10) mccPayload))]
        ["[", intercalate ", " (shorten (map (showBox 10) mccPayload)),"]"]
    MutVarClosure {..} -> app $
        ["_mutVar", (showBox 10) var]
    MVarClosure {..} -> app $
        ["MVar", (showBox 10) value]
    FunClosure {..} ->
        "_fun" ++ braceize (map (showBox 0) ptrArgs ++ map show dataArgs)
    BlockingQueueClosure {..} ->
        "_blockingQueue"
    IntClosure {..} -> app
        ["Int", show intVal]
    WordClosure {..} -> app
        ["Word", show wordVal]
    Int64Closure {..} -> app
        ["Int64", show int64Val]
    Word64Closure {..} -> app
        ["Word64", show word64Val]
    AddrClosure {..} -> app
        ["Addr", show addrVal]
    FloatClosure {..} -> app
        ["Float", show floatVal]
    DoubleClosure {..} -> app
        ["Double", show doubleVal]
    OtherClosure {..} ->
        "_other"
    UnsupportedClosure {..} ->
        "_unsupported"
#if MIN_VERSION_ghc_heap(8,10,1)
    -- copy-pasta'd from MutArrClosure:
    SmallMutArrClosure {..} -> app
        --["toMutArray", "("++show (length mccPayload) ++ " ptrs)",  intercalate "," (shorten (map (showBox 10) mccPayload))]
        ["[", intercalate ", " (shorten (map (showBox 10) mccPayload)),"]"]
    WeakClosure {..} ->
        "_weak"
#endif
  where
    app [a] = a  ++ "()"
    app xs = addBraces (10 <= prec) (intercalate " " xs)

    shorten xs = if length xs > 20 then take 20 xs ++ ["(and more)"] else xs

{- $heapmap

   For more global views of the heap, you can use heap maps. These come in
   variations, either a trees or as graphs, depending on
   whether you want to detect cycles and sharing or not.

   The entries of a 'HeapGraph' can be annotated with arbitrary values. Most
   operations expect this to be in the 'Monoid' class: They use 'mempty' to
   annotate closures added because the passed values reference them, and they
   use 'mappend' to combine the annotations when two values conincide, e.g.
   during 'updateHeapGraph'.
-}

-- | Heap maps as tree, i.e. no sharing, no cycles.
data HeapTree = HeapTree Box (GenClosure HeapTree) | EndOfHeapTree

heapTreeClosure :: HeapTree -> Maybe (GenClosure HeapTree)
heapTreeClosure (HeapTree _ c) = Just c
heapTreeClosure EndOfHeapTree = Nothing

-- | Constructing an 'HeapTree' from a boxed value. It takes a depth parameter
-- that prevents it from running ad infinitum for cyclic or infinite
-- structures.
buildHeapTree :: Int -> Box -> IO HeapTree
buildHeapTree 0 _ = do
    return $ EndOfHeapTree
buildHeapTree n b = do
    c <- getBoxedClosureData b
    c' <- T.mapM (buildHeapTree (n-1)) c
    return $ HeapTree b c'

-- | Pretty-Printing a heap Tree
--
-- Example output for @[Just 4, Nothing, *something*]@, where *something* is an
-- unevaluated expression depending on the command line argument.
--
-- >[Just (I# 4),Nothing,Just (_thunk ["arg1","arg2"])]
ppHeapTree :: HeapTree -> String
ppHeapTree = go 0
  where
    go _ EndOfHeapTree = "..."
    go prec t@(HeapTree _ c')
        | Just s <- isHeapTreeString t = show s
        | Just l <- isHeapTreeList t   = "[" ++ intercalate "," (map ppHeapTree l) ++ "]"
        | Just bc <- disassembleBCO heapTreeClosure c'
                                       = app ("_bco" : map (go 10) (concatMap F.toList bc))
        | otherwise                    = ppClosure go prec c'
      where
        app [a] = a ++ "()"
        app xs = addBraces (10 <= prec) (intercalate " " xs)

isHeapTreeList :: HeapTree -> Maybe ([HeapTree])
isHeapTreeList tree = do
    c <- heapTreeClosure tree
    if isNil c
      then return []
      else do
        (h,t) <- isCons c
        t' <- isHeapTreeList t
        return $ (:) h t'

isHeapTreeString :: HeapTree -> Maybe String
isHeapTreeString t = do
    list <- isHeapTreeList t
    -- We do not want to print empty lists as "" as we do not know that they
    -- are really strings.
    if (null list)
        then Nothing
        else mapM (isChar <=< heapTreeClosure) list

-- | For heap graphs, i.e. data structures that also represent sharing and
-- cyclic structures, these are the entries. If the referenced value is
-- @Nothing@, then we do not have that value in the map, most likely due to
-- exceeding the recursion bound passed to 'buildHeapGraph'.
--
-- Besides a pointer to the stored value and the closure representation we
-- also keep track of whether the value was still alive at the last update of the
-- heap graph. In addition we have a slot for arbitrary data, for the user's convenience.
data HeapGraphEntry a = HeapGraphEntry {
        hgeBox :: Box,
        hgeClosure :: GenClosure (Maybe HeapGraphIndex),
        hgeLive :: Bool,
        hgeData :: a}
    deriving (Show, Functor)
type HeapGraphIndex = Int

-- | The whole graph. The suggested interface is to only use 'lookupHeapGraph',
-- as the internal representation may change. Nevertheless, we export it here:
-- Sometimes the user knows better what he needs than we do.
newtype HeapGraph a = HeapGraph (M.IntMap (HeapGraphEntry a))
    deriving (Show)

lookupHeapGraph :: HeapGraphIndex -> (HeapGraph a) -> Maybe (HeapGraphEntry a)
lookupHeapGraph i (HeapGraph m) = M.lookup i m

heapGraphRoot :: HeapGraphIndex
heapGraphRoot = 0

-- | Creates a 'HeapGraph' for the value in the box, but not recursing further
-- than the given limit. The initial value has index 'heapGraphRoot'.
buildHeapGraph
   :: Monoid a
   => Int -- ^ Search limit
   -> a -- ^ Data value for the root
   -> Box -- ^ The value to start with
   -> IO (HeapGraph a)
buildHeapGraph limit rootD initialBox =
    fst <$> multiBuildHeapGraph limit [(rootD, initialBox)]

-- | Creates a 'HeapGraph' for the values in multiple boxes, but not recursing
--   further than the given limit.
--
--   Returns the 'HeapGraph' and the indices of initial values. The arbitrary
--   type @a@ can be used to make the connection between the input and the
--   resulting list of indices, and to store additional data.
multiBuildHeapGraph
    :: Monoid a
    => Int -- ^ Search limit
    -> [(a, Box)] -- ^ Starting values with associated data entry
    -> IO (HeapGraph a, [(a, HeapGraphIndex)])
multiBuildHeapGraph limit = generalBuildHeapGraph limit (HeapGraph M.empty)

-- | Adds an entry to an existing 'HeapGraph'.
--
--   Returns the updated 'HeapGraph' and the index of the added value.
addHeapGraph
    :: Monoid a
    => Int -- ^ Search limit
    -> a -- ^ Data to be stored with the added value
    -> Box -- ^ Value to add to the graph
    -> HeapGraph a -- ^ Graph to extend
    -> IO (HeapGraphIndex, HeapGraph a)
addHeapGraph limit d box hg = do
    (hg', [(_,i)]) <- generalBuildHeapGraph limit hg [(d,box)]
    return (i, hg')

-- | Adds the given annotation to the entry at the given index, using the
-- 'mappend' operation of its 'Monoid' instance.
annotateHeapGraph :: Monoid a => a -> HeapGraphIndex -> HeapGraph a -> HeapGraph a
annotateHeapGraph d i (HeapGraph hg) = HeapGraph $ M.update go i hg
  where
    go hge = Just $ hge { hgeData = hgeData hge <> d }

generalBuildHeapGraph
    :: Monoid a
    => Int
    -> HeapGraph a
    -> [(a,Box)]
    -> IO (HeapGraph a, [(a, HeapGraphIndex)])
generalBuildHeapGraph limit _ _ | limit <= 0 = error "buildHeapGraph: limit has to be positive"
generalBuildHeapGraph limit (HeapGraph hg) addBoxes = do
    -- First collect all boxes from the existing heap graph
    let boxList = [ (hgeBox hge, i) | (i, hge) <- M.toList hg ]
        indices | M.null hg = [0..]
                | otherwise = [1 + fst (M.findMax hg)..]

        initialState = (boxList, indices, [])
    -- It is ok to use the Monoid (IntMap a) instance here, because
    -- we will, besides the first time, use 'tell' only to add singletons not
    -- already there
    (is, hg') <- runWriterT (evalStateT run initialState)
    -- Now add the annotations of the root values
    let hg'' = foldl' (flip (uncurry annotateHeapGraph)) (HeapGraph hg') is
    return (hg'', is)
  where
    run = do
        lift $ tell hg -- Start with the initial map
        forM addBoxes $ \(d, b) -> do
            -- Cannot fail, as limit is not zero here
            Just i <- add limit b
            return (d, i)

    add 0  _ = return Nothing
    add n b = do
        -- If the box is in the map, return the index
        (existing,_,_) <- get
        mbI <- liftIO $ findM (areBoxesEqual b . fst) existing
        case mbI of
            Just (_,i) -> return $ Just i
            Nothing -> do
                -- Otherwise, allocate a new index
                i <- nextI
                -- And register it
                modify (\(x,y,z) -> ((b,i):x, y, z))
                -- Look up the closure
                c <- liftIO $ getBoxedClosureData b
                -- Find indicies for all boxes contained in the map
                c' <- T.mapM (add (n-1)) c
                -- Add add the resulting closure to the map
                lift $ tell (M.singleton i (HeapGraphEntry b c' True mempty))
                return $ Just i
    nextI = do
        i <- gets (head . (\(_,b,_) -> b))
        modify (\(a,b,c) -> (a, tail b, c))
        return i

-- | This function updates a heap graph to reflect the current state of
-- closures on the heap, conforming to the following specification.
--
--  * Every entry whose value has been garbage collected by now is marked as
--    dead by setting 'hgeLive' to @False@
--  * Every entry whose value is still live gets the 'hgeClosure' field updated
--    and newly referenced closures are, up to the given depth, added to the graph.
--  * A map mapping previous indicies to the corresponding new indicies is returned as well.
--  * The closure at 'heapGraphRoot' stays at 'heapGraphRoot'
updateHeapGraph :: Monoid a => Int -> HeapGraph a -> IO (HeapGraph a, HeapGraphIndex -> HeapGraphIndex)
updateHeapGraph limit (HeapGraph startHG) = do
    (hg', indexMap) <- runWriterT $ foldM go (HeapGraph M.empty) (M.toList startHG)
    return (hg', (M.!) indexMap)
  where
    go hg (i, hge) = do
        (j, hg') <- liftIO $ addHeapGraph limit (hgeData hge) (hgeBox hge) hg
        tell (M.singleton i j)
        return hg'

-- | Pretty-prints a HeapGraph. The resulting string contains newlines. Example
-- for @let s = \"Ki\" in (s, s, cycle \"Ho\")@:
--
-- >let x1 = "Ki"
-- >    x6 = C# 'H' : C# 'o' : x6
-- >in (x1,x1,x6)
ppHeapGraph :: HeapGraph a -> String
ppHeapGraph (HeapGraph m) = letWrapper ++ ppRef 0 (Just heapGraphRoot)
  where
    -- All variables occuring more than once
    bindings = boundMultipleTimes (HeapGraph m) [heapGraphRoot]

    letWrapper =
        if null bindings
        then ""
        else "let " ++ intercalate "\n    " (map ppBinding bindings) ++ "\nin "

    bindingLetter i = case hgeClosure (iToE i) of
        ThunkClosure {..} -> 't'
        SelectorClosure {..} -> 't'
        APClosure {..} -> 't'
        PAPClosure {..} -> 'f'
        BCOClosure {..} -> 't'
        FunClosure {..} -> 'f'
        _ -> 'x'

    ppBindingMap = M.fromList $
        concat $
        map (zipWith (\j (i,c) -> (i, [c] ++ show j)) [(1::Int)..]) $
        groupBy ((==) `on` snd) $
        sortBy (compare `on` snd)
        [ (i, bindingLetter i) | i <- bindings ]

    ppVar i = ppBindingMap M.! i
    ppBinding i = ppVar i ++ " = " ++ ppEntry 0 (iToE i)

    ppEntry prec hge
        | Just s <- isString hge = show s
        | Just l <- isList hge   = "[" ++ intercalate "," (map (ppRef 0) l) ++ "]"
        | Just bc <- disassembleBCO (fmap (hgeClosure . iToE)) (hgeClosure hge)
                                       = app ("_bco" : map (ppRef 10) (concatMap F.toList bc))
        | otherwise = ppClosure ppRef prec (hgeClosure hge)
      where
        app [a] = a  ++ "()"
        app xs = addBraces (10 <= prec) (intercalate " " xs)

    ppRef _ Nothing = "..."
    ppRef prec (Just i) | i `elem` bindings = ppVar i
                        | otherwise = ppEntry prec (iToE i)
    iToE i = m M.! i

    iToUnboundE i = if i `elem` bindings then Nothing else M.lookup i m

    isList :: HeapGraphEntry a -> Maybe ([Maybe HeapGraphIndex])
    isList hge =
        if isNil (hgeClosure hge)
          then return []
          else do
            (h,t) <- isCons (hgeClosure hge)
            ti <- t
            e <- iToUnboundE ti
            t' <- isList e
            return $ (:) h t'

    isString :: HeapGraphEntry a -> Maybe String
    isString e = do
        list <- isList e
        -- We do not want to print empty lists as "" as we do not know that they
        -- are really strings.
        if (null list)
            then Nothing
            else mapM (isChar . hgeClosure <=< iToUnboundE <=< id) list


-- | In the given HeapMap, list all indices that are used more than once. The
-- second parameter adds external references, commonly @[heapGraphRoot]@.
boundMultipleTimes :: HeapGraph a -> [HeapGraphIndex] -> [HeapGraphIndex]
boundMultipleTimes (HeapGraph m) roots = map head $ filter (not.null) $ map tail $ group $ sort $
     roots ++ concatMap (catMaybes . allClosures . hgeClosure) (M.elems m)

-- | This function integrates the disassembler in "GHC.Disassembler". The first
-- argument should a function that dereferences the pointer in the closure to a
-- closure.
--
-- If any of these return 'Nothing', then 'disassembleBCO' returns Nothing
disassembleBCO :: (a -> Maybe (GenClosure b)) -> GenClosure a -> Maybe [BCI b]
-- Disable the assembler
disassembleBCO _ _ | id True = Nothing
disassembleBCO deref (BCOClosure {..}) = do
    opsC <- deref instrs
    litsC <- deref literals
    ptrsC  <- deref bcoptrs
    return $ disassemble (mccPayload ptrsC) (arrWords litsC) (toBytes (bytes opsC) (arrWords opsC))
disassembleBCO _ _ = Nothing

-- Utilities

findM :: (a -> IO Bool) -> [a] -> IO (Maybe a)
findM _p [] = return Nothing
findM p (x:xs) = do
    b <- p x
    if b then return (Just x) else findM p xs

addBraces :: Bool -> String -> String
addBraces True t = "(" ++ t ++ ")"
addBraces False t = t

braceize :: [String] -> String
braceize [] = ""
braceize xs = "{" ++ intercalate "," xs ++ "}"