module ParkBench.Driver ( benchmark1, Pull1 (..), benchmark, Pull, Pulls, pulls, pull, ) where import Data.IORef import Data.List.NonEmpty (NonEmpty (..)) import qualified Data.List.NonEmpty as NonEmpty import ParkBench.Prelude import ParkBench.RtsStats (RtsStats) import ParkBench.Statistics newtype Pull1 a = Pull1 (IO (Estimate a, Pull1 a)) -- | Like 'benchmark', but optimized for only running one benchmark. benchmark1 :: forall a. Roll a => (Word64 -> IO (Timed a)) -> Pull1 a benchmark1 run = Pull1 do t <- run 1 let another :: Estimate a -> Pull1 a another e0 = Pull1 do t2 <- run n pure (andAnother (updateEstimate n t2 e0)) where n = next e0 andAnother :: Estimate a -> (Estimate a, Pull1 a) andAnother e = (e, another e) pure (andAnother (initialEstimate t)) {-# SPECIALIZE benchmark1 :: (Word64 -> IO (Timed RtsStats)) -> Pull1 RtsStats #-} benchmark :: forall a. Roll a => (Word64 -> IO (Timed a)) -> IO (IO (Estimate a), Pull a) benchmark run = do t <- run 1 let e = initialEstimate t ref <- newIORef e let another :: Estimate a -> IO (Pull a) another e0 = do t2 <- run n let !e1 = updateEstimate n t2 e0 writeIORef ref e1 pure (andAnother e1) where n = next e0 andAnother :: Estimate a -> Pull a andAnother e0 = Pull (w2r (samples e0) * nanoseconds (mean e0)) (another e0) pure (readIORef ref, andAnother e) {-# SPECIALIZE benchmark :: (Word64 -> IO (Timed RtsStats)) -> IO (IO (Estimate RtsStats), Pull RtsStats) #-} -- target runs that take 0.1 seconds (e.g. 500_000_000 would be 0.5 seconds) next :: Estimate a -> Word64 next Estimate {mean = Timed nanoseconds _, samples} = max 1 (min samples (floor (100_000_000 / nanoseconds))) data Pull a = Pull -- amount of time this pull has gotten {-# UNPACK #-} !Rational !(IO (Pull a)) isMoreUrgentThan :: Pull a -> Pull a -> Bool Pull t0 _ `isMoreUrgentThan` Pull t1 _ = t0 < t1 -- | A @Pulls@ represents the suspended state of a collection of 1+ benchmarks. data Pulls a = -- Most benchmark runs are probably only comparing 1-3 things, so we optimize those cases. P1 !(Pull a) | P2 !(Pull a) !(Pull a) | P3 !(Pull a) !(Pull a) !(Pull a) | -- invariant: 4+ elements Pn_ ![Pull a] pattern Pn :: Pull a -> [Pull a] -> Pulls a pattern Pn p ps <- Pn_ (p : ps) {-# COMPLETE P1, P2, P3, Pn #-} -- | Construct a 'Pulls' from a non-empty list of 'Pull'. pulls :: NonEmpty (Pull a) -> Pulls a pulls = pulls' . NonEmpty.sortWith \(Pull t _) -> t pulls' :: NonEmpty (Pull a) -> Pulls a pulls' = \case a :| [] -> P1 a a :| [b] -> P2 a b a :| [b, c] -> P3 a b c a :| as -> Pn_ (a : as) -- | Pull on a 'Pulls', which blocks until the benchmark that has heretofore accumulated the smallest amount of runtime -- runs once more. -- -- Returns the 'Pulls' to use next time, which reflects the latest benchmark run that just completed. pull :: Pulls a -> IO (Pulls a) pull = \case P1 (Pull _ p0) -> do p <- p0 pure (P1 p) P2 (Pull _ p0) q -> do p <- p0 pure if q `isMoreUrgentThan` p then P2 q p else P2 p q P3 (Pull _ p0) q r -> do p <- p0 pure if q `isMoreUrgentThan` p then if r `isMoreUrgentThan` p then P3 q r p else P3 q p r else P3 p q r Pn (Pull _ p0) ps -> do p <- p0 pure (Pn_ (insertPull p ps)) insertPull :: Pull a -> [Pull a] -> [Pull a] insertPull p0 = \case [] -> [p0] p1 : ps -> if p0 `isMoreUrgentThan` p1 then p0 : p1 : ps else p1 : insertPull p0 ps