{-| This module defines the commonly used data structures and basic types of the heap profiling framework. Profiling information is a sequence of time-stamped samples, therefore the ideal data structure should have an efficient snoc operation. Also, it should make it easy to extract an interval given by a start and an end time. On top of the raw data, we also want to access some statistics as efficiently as possible. We can separate two phases: looking at the profile during execution and later. In the first case we might not want statistics, just live monitoring, while we probably want to analyse archived profiles more deeply. Therefore, it makes sense to define two separate data structures for these two purposes, and give them a common interface for extracting the necessary data. The simple case is covered by the 'Profile' type defined here, while a more complex structure providing fast off-line queries is defined in the "Profiling.Heap.Stats" module. -} module Profiling.Heap.Types ( CostCentreId , CostCentreName , Time , Cost , ProfileSample -- * Profile data structure , Profile(..) , emptyProfile -- * Query interface , ProfileQuery(..) -- * Streaming interface , ProfileSink , SinkInput(..) ) where import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as S import Data.Int import Data.IntMap (IntMap) import qualified Data.IntMap as IM import Data.List import Data.Trie (Trie) import qualified Data.Trie as T {-| The 'ProfileQuery' class contains all kinds of reading operations. The minimal definition consists of 'job', 'date', 'ccNames' and 'samples'. All the statistics have default implementations, which are mostly okay for a single query, but they are generally highly inefficient. -} class ProfileQuery p where -- | Job information (command line). job :: p -> String -- | Job start time. date :: p -> String -- | Cost centre id to name mapping. ccNames :: p -> IntMap CostCentreName -- | Find cost centre name by id. ccName :: p -> Int -> CostCentreName ccName p ccId = IM.findWithDefault S.empty ccId (ccNames p) -- | The measurements in a list ordered by time. samples :: p -> [(Time,ProfileSample)] -- | The samples between two given times. samplesIvl :: p -> Time -> Time -> [(Time,ProfileSample)] samplesIvl p t1 t2 = takeWhile (( Time minTime p | null smp = 0 | otherwise = fst (head smp) where smp = samples p -- | The time of the last sample. maxTime :: p -> Time maxTime p | null smp = 0 | otherwise = fst (last smp) where smp = samples p -- | The highest individual cost at any time. maxCost :: p -> Cost maxCost p = maximum $ 0:[c | (_,s) <- samples p, (_,c) <- s] -- | The highest total cost at any time. maxCostTotal :: p -> Cost maxCostTotal p = maximum $ 0:[sum (map snd s) | (_,s) <- samples p] -- | The highest individual cost in the interval. maxCostIvl :: p -> Time -> Time -> Cost maxCostIvl p t1 t2 = maximum $ 0:[c | (_,s) <- samplesIvl p t1 t2, (_,c) <- s] -- | The highest total cost in the interval. maxCostTotalIvl :: p -> Time -> Time -> Cost maxCostTotalIvl p t1 t2 = maximum $ 0:[sum (map snd s) | (_,s) <- samplesIvl p t1 t2] -- | The total cost of each cost centre. Not a time integral; -- samples are simply summed. integral :: p -> ProfileSample integral = integral' . samples -- | The total cost of each cost centre in the interval. integralIvl :: p -> Time -> Time -> ProfileSample integralIvl p t1 t2 = integral' (samplesIvl p t1 t2) integral' :: [(Time,ProfileSample)] -> ProfileSample integral' = IM.assocs . foldl' accumSample IM.empty where accumSample acc = foldl' accumCost acc . snd accumCost acc (ccid,cost) = IM.alter (Just . maybe cost (+cost)) ccid acc {-| A raw heap profile that's easy to grow further, therefore it is used during loading. -} data Profile = Profile { prSamples :: ![(Time,ProfileSample)] -- ^ Samples in decreasing time order (latest first). , prNames :: !(IntMap CostCentreName) -- ^ A map from cost centre ids to names. , prNamesInv :: !(Trie CostCentreId) -- ^ A map from cost centre names to ids. , prJob :: !String -- ^ Information about the job (command line). , prDate :: !String -- ^ Job start time and date. } deriving Eq instance Show Profile where show p = unlines $ ["Job: " ++ prJob p ,"Date: " ++ prDate p ,"Name mappings:"] ++ (map show . IM.assocs . prNames) p ++ ["Measurements:"] ++ (map show . prSamples) p instance ProfileQuery Profile where job = prJob date = prDate ccNames = prNames samples = reverse . prSamples {-| An initial 'Profile' structure that can be used in accumulations. -} emptyProfile :: Profile emptyProfile = Profile { prSamples = [] , prNames = IM.empty , prNamesInv = T.empty , prJob = "" , prDate = "" } {-| Cost centres are identified by integers for simplicity (so we can use IntMap). -} type CostCentreId = Int {-| At this level cost centre names have no internal structure that we would care about. While in some cases they reflect the call hierarchy, we are not splitting them at this point, because all kinds of names can appear here. -} type CostCentreName = ByteString {-| Time is measured in seconds. -} type Time = Double {-| Costs are measured in bytes. -} type Cost = Int64 {-| A sampling point is simply a list of cost centres with the associated cost. There is no need for a fancy data structure here, since we normally process every value in this collection, and it's usually not big either, only holding a few dozen entries at most. -} type ProfileSample = [(CostCentreId,Cost)] {-| We might not want to hold on to all the past output, just do some stream processing. We can achieve this using a callback function that's invoked whenever a new profile sample is available. The type of this function can be 'ProfileSink'. Besides the actual costs, it is also necessary to send over the names that belong to the short cost centre identifiers as well as the fact that no more data will come. The 'SinkInput' type expresses these possibilities. -} type ProfileSink = SinkInput -> IO () data SinkInput -- | A snapshot of costs at a given time. = SinkSample !Time !ProfileSample -- | The name behind a cost centre id used in the samples. | SinkId !CostCentreId !CostCentreName -- | Indication that no more data will come. | SinkStop deriving (Eq, Show)