{-# LANGUAGE BangPatterns #-} module Main where import qualified Data.TLS.GHC as GHC import qualified Data.TLS.GCC as GCC import qualified Data.TLS.PThread as PT import Criterion import Criterion.Types import Criterion.Main import Data.Atomics.Counter import Control.Monad import Control.Concurrent.MVar import Data.IORef import GHC.Conc import System.Environment -------------------------------------------------------------------------------- main :: IO () main = do numProc <- getNumProcessors n <- getNumCapabilities when (n == 1) $ do putStrLn "HACK: using setNumCapabilities to bump it up... should set this in the .cabal" setNumCapabilities numProc numCap <- getNumCapabilities putStrLn $ "Benchmarking platform with "++show numProc++ " processors, while currently using "++show numCap++" threads." -- Substitute in default command line args: args <- getArgs let args' = if null args then words $ " --regress=allocated:iters --regress=bytesCopied:iters --regress=cycles:iters "++ " --regress=numGcs:iters --regress=mutatorWallSeconds:iters --regress=gcWallSeconds:iters "++ " --regress=cpuTime:iters " ++ " -o tls_report.html " -- ++ " --raw tls_report.criterion " else args threadify fn = bgroup "" [ fn (threads, suff) | threads <- [1..numCap*4], let suff = "_" ++ show threads ++"io_"++ show numCap++"os" ] mkTests name mkTLS getTLS freeTLS = bgroup name [ -- bench ("counter/getTLS/incrCntr"++suff) $ -- benchPar0 threads (GHC.mkTLS (newCounter 0)) -- (\t -> incrCounter_ 1 =<< GHC.getTLS t) threadify $ \ (threads,suff) -> bench ("counter/getTLS/readIORef"++suff) $ benchPar0 threads (mkTLS (newIORef ())) (\t -> readIORef =<< getTLS t) freeTLS ] withArgs args' $ defaultMain $ [ mkTests "PThread" PT.mkTLS PT.getTLS PT.freeTLS , mkTests "GHC" GHC.mkTLS GHC.getTLS GHC.freeTLS ] {- bgroup "infrastructure" [ bench ("benchPar1"++suff) $ benchPar1 threads (return ()) , bench ("benchPar0"++suff) $ benchPar0 threads (return ()) (\_ -> return ()) -- , bench ("benchPar2"++suff) $ benchPar2 threads (return ()) ], -} -- | ] where ---------------------------------------------------------------------------------------------------- benchPar0 :: Int -> IO a -> (a -> IO ()) -> (a -> IO ()) -> Benchmarkable benchPar0 numT new fn shutd = Benchmarkable $ \ iters -> do x <- new numCap <- getNumCapabilities -- We compute the number of iterations such that the time would be -- flat IFF parallelism works perfectly up to numCapabilities, and -- then load balancing works perfectly when # threads exceeds -- numCapabilities. let totalIters = (fromIntegral iters) * (max numCap numT) perThread = totalIters `quot` numT mvs <- forM [0..numT-1] $ \ n -> do v <- newEmptyMVar _ <- forkOn n $ do rep perThread (fn x) putMVar v () return v forM_ mvs takeMVar -- Shut down only when all threads are finished with it: shutd x {-# INLINE benchPar0 #-} -- | Benchmarking the same action on ALL of N threads. -- This version uses MVar synchronization. benchPar1 :: Int -> IO () -> Benchmarkable benchPar1 num act = Benchmarkable $ \ iters -> do mvs <- forM [0..num-1] $ \ n -> do v <- newEmptyMVar _ <- forkOn n $ do rep (fromIntegral iters) act putMVar v () return v forM_ mvs takeMVar {-# INLINE benchPar1 #-} -- | This version never blocks on an MVar. benchPar2 :: Int -> IO () -> Benchmarkable benchPar2 num act = Benchmarkable $ \ iters -> do done <- newCounter 0 let waitCounter = do x <- readCounter done unless (num == x) waitCounter go = do rep (fromIntegral iters) act incrCounter_ 1 done waitCounter forM_ [1..num-1] $ \ n -> forkOn n go go {-# INLINE benchPar2 #-} -- | My own forM for inclusive numeric ranges (not requiring deforestation optimizations). for_ :: Monad m => Int -> Int -> (Int -> m ()) -> m () for_ start end _fn | start > end = error "for_: start is greater than end" for_ start end fn = loop start where loop !i | i > end = return () | otherwise = do fn i; loop (i+1) {-# INLINE for_ #-} rep :: Monad m => Int -> (m ()) -> m () rep n m = for_ 1 n (\_ -> m) {-# INLINE rep #-}