-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | low-level performance statistics -- -- A set of tools to measure time performance. @package perf @version 0.3.0 -- | tick uses the rdtsc chipset to measure time performance of a -- computation. -- -- The measurement unit - a Cycle - is one oscillation of the chip -- crystal as measured by the rdtsc instruction which inspects the -- TSC register. -- -- For reference, a computer with a frequency of 2 GHz means that one -- cycle is equivalent to 0.5 nanoseconds. module Perf.Cycle -- | an unwrapped Word64 type Cycle = Word64 -- | tick_ measures the number of cycles it takes to read the rdtsc chip -- twice: the difference is then how long it took to read the clock the -- second time. -- -- Below are indicative measurements using tick_: -- --
--   >>> onetick <- tick_
--   
--   >>> ticks' <- replicateM 10 tick_
--   
--   >>> manyticks <- replicateM 1000000 tick_
--   
--   >>> let average = L.fold ((/) <$> L.sum <*> L.genericLength)
--   
--   >>> let avticks = average (fromIntegral <$> manyticks)
--   
--   >>> let qticks = deciles 10 manyticks
--   
--   >>> let tick999 = percentile 0.999 manyticks
--   
-- --
--   one tick_: 78 cycles
--   next 10: [20,18,20,20,20,20,18,16,20,20]
--   average over 1m: 20.08 cycles
--   99.999% perc: 7,986
--   99.9% perc: 50.97
--   99th perc:  24.99
--   40th perc:  18.37
--   [min, 10th, 20th, .. 90th, max]:
--   12.00 16.60 17.39 17.88 18.37 18.86 19.46 20.11 20.75 23.04 5.447e5
--   
-- -- The distribution of tick_ measurements is highly skewed, with the -- maximum being around 50k cycles, which is of the order of a GC. The -- important point on the distribution is around the 30th to 50th -- percentile, where you get a clean measure, usually free of GC activity -- and cache miss-fires tick_ :: IO Cycle -- | Warm up the register, to avoid a high first measurement. Without a -- warmup, one or more larger values can occur at the start of a -- measurement spree, and often are in the zone of an L2 miss. -- --
--   >>> t <- tick_ -- first measure can be very high
--   
--   >>> _ <- warmup 100
--   
--   >>> t <- tick_ -- should be around 20 (3k for ghci)
--   
warmup :: Int -> IO Double -- | `tick f a` strictly evaluates f and a, then deeply evaluates f a, -- returning a (Cycle, f a) -- --
--   >>> _ <- warmup 100
--   
--   >>> (cs, _) <- tick f a
--   
-- --
--   sum to 1000
--   first measure: 1202 cycles
--   second measure: 18 cycles
--   
-- -- Note that feeding the same computation through tick twice will tend to -- kick off sharing (aka memoization aka let floating). Given the -- importance of sharing to GHC optimisations this is the intended -- behaviour. If you want to turn this off then see -fn--full-laziness -- (and maybe -fno-cse). tick :: (NFData b) => (a -> b) -> a -> IO (Cycle, b) -- | tick where the arguments are lazy, so measurement may include -- evluation of thunks that may constitute f and/or a tick' :: (NFData b) => (a -> b) -> a -> IO (Cycle, b) -- | measures and deeply evaluates an `IO a` -- --
--   >>> (cs, _) <- tickIO (pure (f a))
--   
tickIO :: (NFData a) => IO a -> IO (Cycle, a) -- | n measurements of a tick -- -- returns a list of Cycles and the last evaluated f a -- -- GHC is very good at finding ways to share computation, and anything -- measuring a computation multiple times is a prime candidate for -- aggresive ghc treatment. Internally, ticks uses a noinline pragma and -- a noinline on tick to help reduce the chances of memoization, but this -- is an inexact science in the hands of he author, at least, so -- interpret with caution. -- --
--   >>> let n = 1000
--   
--   >>> (cs, fa) <- ticks n f a
--   
-- -- Baseline speed can be highly senistive to the nature of the function -- trimmings. Polymorphic functions can tend to be slightly slower, and -- functions with lambda expressions can experience dramatic slowdowns. -- --
--   fMono :: Int -> Int
--   fMono x = foldl' (+) 0 [1 .. x]
--   fPoly :: (Enum b, Num b, Additive b) => b -> b
--   fPoly x = foldl' (+) 0 [1 .. x]
--   fLambda :: Int -> Int
--   fLambda = \x -> foldl' (+) 0 [1 .. x]
--   
-- --
--   sum to 1000 n = 1000 prime run: 1.13e3
--   run                       first     2nd     3rd     4th     5th  40th %
--   ticks                    1.06e3     712     702     704     676    682 cycles
--   ticks (lambda)           1.19e3     718     682     684     678    682 cycles
--   ticks (poly)             1.64e3  1.34e3  1.32e3  1.32e3  1.32e3 1.31e3 cycles
--   
ticks :: (NFData b) => Int -> (a -> b) -> a -> IO ([Cycle], b) -- | n measuremenst of a tickIO -- -- returns an IO tuple; list of Cycles and the last evaluated f a -- --
--   >>> (cs, fa) <- ticksIO n (pure $ f a)
--   
-- --
--   ticksIO                     834     752     688     714     690    709 cycles
--   ticksIO (lambda)            822     690     720     686     688    683 cycles
--   ticksIO (poly)           1.01e3     688     684     682     712    686 cycles
--   
ticksIO :: (NFData a) => Int -> IO a -> IO ([Cycle], a) -- | make a series of measurements on a list of a's to be applied to f, for -- a tick function. -- -- Tends to be fragile to sharing issues, but very useful to determine -- computation Order -- --
--   ns ticks n f [1,10,100,1000]
--   
-- --
--   sum to's [1,10,100,1000]
--   tickns n fMono:  17.8 23.5 100 678
--   
ns :: (NFData b) => (a -> IO ([Cycle], b)) -> [a] -> IO ([[Cycle]], [b]) -- | WHNF version tickWHNF :: (a -> b) -> a -> IO (Cycle, b) -- | WHNF version tickWHNF' :: (a -> b) -> a -> IO (Cycle, b) -- | WHNF version tickWHNFIO :: IO a -> IO (Cycle, a) -- | WHNF version ticksWHNF :: Int -> (a -> b) -> a -> IO ([Cycle], b) -- | WHNF version ticksWHNFIO :: Int -> IO a -> IO ([Cycle], a) -- | average of a Cycle foldable -- --
--   cAv <- average <$> ticks n f a
--   
average :: (Foldable f) => f Cycle -> Double -- | compute deciles -- --
--   c5 <- decile 5 <$> ticks n f a
--   
deciles :: (Functor f, Foldable f) => Int -> f Cycle -> [Double] -- | compute a percentile -- --
--   c <- percentile 0.4 <$> ticks n f a
--   
percentile :: (Functor f, Foldable f) => Double -> f Cycle -> Double instance NumHask.Algebra.Additive.AdditiveMagma Perf.Cycle.Cycle instance NumHask.Algebra.Additive.AdditiveUnital Perf.Cycle.Cycle instance NumHask.Algebra.Additive.AdditiveAssociative Perf.Cycle.Cycle instance NumHask.Algebra.Additive.AdditiveCommutative Perf.Cycle.Cycle instance NumHask.Algebra.Additive.Additive Perf.Cycle.Cycle instance NumHask.Algebra.Additive.AdditiveInvertible Perf.Cycle.Cycle instance NumHask.Algebra.Additive.AdditiveGroup Perf.Cycle.Cycle instance NumHask.Algebra.Integral.ToInteger Perf.Cycle.Cycle -- | Specification of a performance measurement type suitable for the -- PerfT monad transformer. module Perf.Measure -- | A Measure consists of a monadic effect prior to measuring, a monadic -- effect to finalise the measurement, and the value measured -- -- For example, the measure specified below will return 1 every time -- measurement is requested, thus forming the base of a simple counter -- for loopy code. -- --
--   >>> let count = Measure 0 (pure ()) (pure 1)
--   
data Measure m b Measure :: b -> m a -> (a -> m b) -> Measure m b [measure] :: Measure m b -> b [prestep] :: Measure m b -> m a [poststep] :: Measure m b -> a -> m b -- | Measure a single effect. -- --
--   >>> r <- runMeasure count (pure "joy")
--   
--   >>> r
--   (1,"joy")
--   
runMeasure :: (MonadIO m) => Measure m b -> m a -> m (b, a) -- | Measure once, but run an effect multiple times. -- --
--   >>> r <- runMeasureN 1000 count (pure "joys")
--   
--   >>> r
--   (1,"joys")
--   
runMeasureN :: (MonadIO m) => Int -> Measure m b -> m a -> m (b, a) -- | cost of a measurement in terms of the Measure's own units -- --
--   >>> r <- cost count
--   
--   >>> r
--   1
--   
cost :: (MonadIO m) => Measure m b -> m b -- | a measure using getCPUTime from System.CPUTime (unit is -- picoseconds) -- --
--   >>> r <- runMeasure cputime (pure $ foldl' (+) 0 [0..1000])
--   
-- --
--   (34000000,500500)
--   
cputime :: Measure IO Integer -- | a measure using getCurrentTime (unit is NominalDiffTime -- which prints as seconds) -- --
--   >>> r <- runMeasure realtime (pure $ foldl' (+) 0 [0..1000])
--   
-- --
--   (0.000046s,500500)
--   
realtime :: Measure IO NominalDiffTime -- | a measure used to count iterations -- --
--   >>> r <- runMeasure count (pure ())
--   
--   >>> r
--   (1,())
--   
count :: Measure IO Int -- | a Measure using the rdtsc chip set (units are in cycles) -- --
--   >>> r <- runMeasureN 1000 cycles (pure ())
--   
-- --
--   (120540,()) -- ghci-level
--   (18673,())  -- compiled with -O2
--   
cycles :: Measure IO Cycle instance NumHask.Algebra.Additive.AdditiveMagma Data.Time.Clock.UTC.NominalDiffTime instance NumHask.Algebra.Additive.AdditiveUnital Data.Time.Clock.UTC.NominalDiffTime instance NumHask.Algebra.Additive.AdditiveAssociative Data.Time.Clock.UTC.NominalDiffTime instance NumHask.Algebra.Additive.AdditiveCommutative Data.Time.Clock.UTC.NominalDiffTime instance NumHask.Algebra.Additive.Additive Data.Time.Clock.UTC.NominalDiffTime instance NumHask.Algebra.Additive.AdditiveInvertible Data.Time.Clock.UTC.NominalDiffTime instance NumHask.Algebra.Additive.AdditiveGroup Data.Time.Clock.UTC.NominalDiffTime -- | PerfT is a monad transformer designed to collect performance -- information. The transformer can be used to add performance measurent -- to an existing code base using Measures. -- -- For example, here's some code doing some cheesey stuff: -- --
--   -- prior to Perfification
--   result <- do
--       txt <- readFile "examples/examples.hs"
--       let n = Text.length txt
--       let x = foldl' (+) 0 [1..n]
--       putStrLn $ "sum of one to number of characters is: " <>
--           (show x :: Text)
--       pure (n, x)
--   
-- -- And here's the code after Perfification, measuring performance -- of the components. -- --
--   (result', ms) <- runPerfT $ do
--           txt <- perf "file read" cycles $ readFile "examples/examples.hs"
--           n <- perf "length" cycles $ pure (Text.length txt)
--           x <- perf "sum" cycles $ pure (foldl' (+) 0 [1..n])
--           perf "print to screen" cycles $
--               putStrLn $ "sum of one to number of characters is: " <>
--               (show x :: Text)
--           pure (n, x)
--   
-- -- Running the code produces a tuple of the original computation results, -- and a Map of performance measurements that were specified. Indicative -- results: -- --
--   file read                               4.92e5 cycles
--   length                                  1.60e6 cycles
--   print to screen                         1.06e5 cycles
--   sum                                     8.12e3 cycles
--   
module Perf -- | PerfT is polymorphic in the type of measurement being performed. The -- monad stores and produces a Map of labelled measurement values data PerfT m b a -- | The obligatory transformer over Identity type Perf b a = PerfT Identity b a -- | Lift a monadic computation to a PerfT m, providing a label and a -- Measure. perf :: (MonadIO m, Additive b) => Text -> Measure m b -> m a -> PerfT m b a -- | Lift a monadic computation to a PerfT m, and carry out the computation -- multiple times. perfN :: (MonadIO m, Semigroup b, Monoid b) => Int -> Text -> Measure m b -> m a -> PerfT m b a -- | Consume the PerfT layer and return a (result, measurement). -- --
--   >>> :set -XOverloadedStrings
--   
--   >>> (cs, result) <- runPerfT $ perf "sum" cycles (pure $ foldl' (+) 0 [0..10000])
--   
-- --
--   (50005000,fromList [("sum",562028)])
--   
runPerfT :: PerfT m b a -> m (a, Map Text b) -- | Consume the PerfT layer and return the original monadic result. -- Fingers crossed, PerfT structure should be completely compiled away. -- --
--   >>> result <- evalPerfT $ perf "sum" cycles (pure $ foldl' (+) 0 [0..10000])
--   
-- --
--   50005000
--   
evalPerfT :: (Monad m) => PerfT m b a -> m a -- | Consume a PerfT layer and return the measurement. -- --
--   >>> cs <- execPerfT $ perf "sum" cycles (pure $ foldl' (+) 0 [0..10000])
--   
-- --
--   fromList [("sum",562028)]
--   
execPerfT :: (Monad m) => PerfT m b a -> m (Map Text b) instance GHC.Base.Monad m => GHC.Base.Monad (Perf.PerfT m b) instance GHC.Base.Monad m => GHC.Base.Applicative (Perf.PerfT m b) instance GHC.Base.Functor m => GHC.Base.Functor (Perf.PerfT m b) instance Control.Monad.IO.Class.MonadIO m => Control.Monad.IO.Class.MonadIO (Perf.PerfT m b)