{-# LANGUAGE FlexibleInstances #-} -- A benchmark of shift/reset: Filinski's representing non-determinism monads -- -- The benchmark is taken from Sec 6.1 of -- Martin Gasbichler, Michael Sperber: Final Shift for Call/cc: Direct -- Implementation of Shift and Reset, ICFP'02, pp. 271-282. -- http://www-pu.informatik.uni-tuebingen.de/users/sperber/papers/shift-reset-direct.pdf -- This code is a straightforward translation of bench_nondet.ml -- -- This is a micro-benchmark: it is very non-determinism-intensive. It is -- *not* representative: the benchmark does nothing else but -- concatenates lists. The List monad does this directly; whereas -- continuation monads do the concatenation with more overhead (e.g., -- building the closures representing continuations). Therefore, -- the List monad here outperforms all other implementations of -- non-determinism. -- It should be stressed that the delimited control is optimized -- for the case where control operations are infrequent, so we pay -- as we go. The use of the delimited control operators is more -- expensive, but the code that does not use delimited control does not -- have to pay anything for delimited control. -- Again, in the present micro-benchmark, there is hardly any code that -- does not use non-determinism, so the overhead of delimited control -- is very noticeable. That is why this benchmark is good at estimating -- the overhead of different implementations of delimited control. -- To compile this code -- ghc --make -O2 -main-is Bench_nondet.main_list5 Bench_nondet.hs -- To run this code -- GHCRTS="-tstderr" /usr/bin/time ./Bench_nondet module Bench_nondet where import Control.Monad.CC.CCExc -- import Control.Monad.CC.CCCxe -- import Control.Monad.CC.CCRef import Data.List (sort) import Control.Monad.Identity import Control.Monad (MonadPlus(..), liftM2, msum) -- import System.CPUTime -- Small language with non-determinism: just like the one in our DSL-WC paper int :: MonadPlus repr => Int -> repr Int int x = return x add :: MonadPlus repr => repr Int -> repr Int -> repr Int add xs ys = liftM2 (+) xs ys lam :: MonadPlus repr => (repr a -> repr b) -> repr (a -> repr b) lam f = return $ f . return app :: MonadPlus repr => repr (a -> repr b) -> (repr a -> repr b) app xs ys = do {x <- xs; y <- ys; x y} amb :: MonadPlus repr => [repr Int] -> repr Int amb = msum -- Benchmark cases test_ww :: MonadPlus repr => repr Int test_ww = let f = lam (\x -> add (add x (amb [int 6, int 4, int 2, int 8])) (amb [int 2, int 4, int 5, int 4, int 1])) in f `app` amb [int 0, int 2, int 3, int 4, int 5, int 32] ww_answer = sort [8, 10, 11, 10, 7, 6, 8, 9, 8, 5, 4, 6, 7, 6, 3, 10, 12, 13, 12, 9, 10, 12, 13, 12, 9, 8, 10, 11, 10, 7, 6, 8, 9, 8, 5, 12, 14, 15, 14, 11, 11, 13, 14, 13, 10, 9, 11, 12, 11, 8, 7, 9, 10, 9, 6, 13, 15, 16, 15, 12, 12, 14, 15, 14, 11, 10, 12, 13, 12, 9, 8, 10, 11, 10, 7, 14, 16, 17, 16, 13, 13, 15, 16, 15, 12, 11, 13, 14, 13, 10, 9, 11, 12, 11, 8, 15, 17, 18, 17, 14, 40, 42, 43, 42, 39, 38, 40, 41, 40, 37, 36, 38, 39, 38, 35, 42, 44, 45, 44, 41] -- Real benchmark cases test_www :: MonadPlus repr => repr Int test_www = let f = lam (\x -> add (add x (amb [int 6, int 4, int 2, int 8])) (amb [int 2, int 4, int 5, int 4, int 1])) in f `app` (f `app` amb [int 0, int 2, int 3, int 4, int 5, int 32]) test_wwww :: MonadPlus repr => repr Int test_wwww = let f = lam (\x -> add (add x (amb [int 6, int 4, int 2, int 8])) (amb [int 2, int 4, int 5, int 4, int 1])) in f `app` (f `app` (f `app` amb [int 0, int 2, int 3, int 4, int 5, int 32])) test_w5 :: MonadPlus repr => repr Int test_w5 = let f = lam (\x -> add (add x (amb [int 6, int 4, int 2, int 8])) (amb [int 2, int 4, int 5, int 4, int 1])) in f `app` (f `app` (f `app` (f `app` amb [int 0, int 2, int 3, int 4, int 5, int 32]))) -- Different implementations of our language (MonadPlus) -- The List monad: Non-determinism monad as a list of successes run_list :: [Int] -> [Int] run_list = id testl1 = (==) [101, 201, 102, 202] . run_list $ add (amb [int 1, int 2]) (amb [int 100, int 200]) testl2 = ww_answer == sort (run_list test_ww) -- CPS-monad, implemented by hand; it must be quite efficient therefore newtype CPS a = CPS{unCPS:: (a -> [Int]) -> [Int]} instance Monad CPS where return x = CPS $ \k -> k x m >>= f = CPS $ \k -> unCPS m (\a -> unCPS (f a) k) instance MonadPlus CPS where mzero = CPS $ \_ -> [] mplus m1 m2 = CPS $ \k -> unCPS m1 k ++ unCPS m2 k run_cps :: CPS Int -> [Int] run_cps m = unCPS m (\x -> [x]) testc1 = (==) [101, 201, 102, 202] . run_cps $ add (amb [int 1, int 2]) (amb [int 100, int 200]) testc2 = ww_answer == sort (run_cps test_ww) -- CCEx monad -- Not a very optimal implementation of mplus (a tree would be better) -- But is suffices as a benchmark of different implementations of CC instance Monad m => MonadPlus (CC (PS [Int]) m) where mzero = abortP ps (return []) mplus m1 m2 = takeSubCont ps (\k -> liftM2 (++) (pushPrompt ps (pushSubCont k m1)) (pushPrompt ps (pushSubCont k m2))) run_dir :: CC (PS [Int]) Identity Int -> [Int] run_dir m = runIdentity . runCC $ pushPrompt ps (m >>= return . (:[])) testd1 = (==) [101, 201, 102, 202] . run_dir $ add (amb [int 1, int 2]) (amb [int 100, int 200]) testd2 = ww_answer == sort (run_dir test_ww) {- -- CCRef monad -- Need a reader-monad layer to propagate the prompt newtype CCR m a = CCR{unCCR :: Prompt m [Int] -> CC m a} instance Monad m => Monad (CCR m) where return x = CCR $ \_ -> return x m >>= f = CCR $ \p -> unCCR m p >>= \v -> unCCR (f v) p -- Not a very optimal implementation of mplus (a tree would be better) -- But is suffices as a benchmark of different implementations of CC instance (Monad m, Mutation m) => MonadPlus (CCR m) where mzero = CCR $ \p -> abortP p (return []) mplus m1 m2 = CCR $ \p -> takeSubCont p (\k -> liftM2 (++) (pushDelimSubCont k (unCCR m1 p)) (pushDelimSubCont k (unCCR m2 p))) run_ref :: CCR IO Int -> IO [Int] run_ref m = runCC $ do p <- newPrompt pushPrompt p (unCCR m p >>= return . (:[])) testr1 = ((return . ((==) [101, 201, 102, 202])) =<<) . run_ref $ add (amb [int 1, int 2]) (amb [int 100, int 200]) testr2 = do r <- run_ref test_ww return $ ww_answer == sort r main_ref5io = do l <- run_ref test_w5 print $ length l == 960000 -} -- Benchmarks themselves main_list3 = print $ 2400 == (length . run_list $ test_www) main_list4 = print $ 48000 == (length . run_list $ test_wwww) main_list5 = print $ 960000 == (length . run_list $ test_w5) main_cps3 = print $ 2400 == (length . run_cps $ test_www) main_cps4 = print $ 48000 == (length . run_cps $ test_wwww) main_cps5 = print $ 960000 == (length . run_cps $ test_w5) -- We expect the direct implementation to be slower since CC is the transformer, -- whereas CPS is not. The latter is hand-written for a specific answer-type. main_dir3 = print $ 2400 == (length . run_dir $ test_www) main_dir4 = print $ 48000 == (length . run_dir $ test_wwww) main_dir5 = print $ 960000 == (length . run_dir $ test_w5) -- Instantiate CC to the IO as the base monad, attempting to quantify the -- effect of the Identity transformer main_dir5io = do l <- runCC $ pushPrompt ps (test_w5 >>= return . (:[])) print $ length l == 960000 {- Median of 5 runs main_list5 <> 0.30 real 0.30 user 0.00 sys main_cps5 <> 0.60 real 0.58 user 0.01 sys main_dir5 (CCExc implementation) <> 4.48 real 4.22 user 0.24 sys main_dir5io (CCExc implementation) <> 5.45 real 5.18 user 0.21 sys main_dir5 (CCCxe implementation) ./Bench_nondet +RTS -tstderr True <> 4.70 real 4.44 user 0.23 sys main_dir5io (CCCxe implementation) ./Bench_nondet +RTS -tstderr True <> 4.72 real 4.44 user 0.23 sys main_ref5io (without pushDelimSubCont) ./Bench_nondet +RTS -tstderr True <> 68.94 real 67.51 user 1.03 sys main_ref5io (with pushDelimSubCont) ./Bench_nondet +RTS -tstderr True <> 20.50 real 19.92 user 0.46 sys -}