module Benchmark where import Shellish hiding ( run ) import Control.Applicative import Control.Arrow ( first, second ) import Data.Array.Vector import Data.Char import Data.Function ( on ) import Data.List import Data.Either ( rights ) import Data.Maybe import Statistics.Sample import System.Directory import System.Environment import System.FilePath( (), (<.>), splitDirectories, joinPath ) import System.IO import qualified Text.Tabular as Tab import TabularRST as TR import System.Exit import Text.Printf import Text.Regex.Posix( (=~) ) import Data.Time.Clock import Control.Monad.Error import Control.Monad.State( liftIO ) import Control.Exception( throw ) import Control.Concurrent( forkIO ) import Control.Concurrent.Chan( newChan, writeChan, readChan, Chan ) import System.Console.CmdArgs (isLoud) import System.Process( runInteractiveProcess, runInteractiveCommand, waitForProcess ) import Data.IORef import System.IO.Unsafe import Text.JSON import qualified System.IO.UTF8 as UTF8 precision :: Int precision = 1 maybeVMFlush :: IORef (x -> IO ()) maybeVMFlush = unsafePerformIO $ newIORef $ const $ return () type Darcs = [String] -> Command String type BenchmarkCmd a = Darcs -> TestRepo -> Command a data MemTime = MemTime Rational Double deriving (Read, Show, Ord, Eq) data MemTimeOutput = MemTimeOutput { mtTimeMean :: Double , mtTimeDev :: Double , mtSampleSize :: Int , mtUnit :: TimeUnit , mtMemMean :: Rational } deriving (Show) mkMemTimeOutput :: [MemTime] -> TimeUnit -> MemTimeOutput mkMemTimeOutput xs u = MemTimeOutput { mtTimeMean = mean tv , mtTimeDev = stdDev tv , mtMemMean = toRational (mean mv) , mtSampleSize = lengthU tv , mtUnit = u } where tv = toU [ t | MemTime _ t <- xs ] mv = toU [ fromRational m | MemTime m _ <- xs ] data TestRepo = TestRepo { trName :: String , trCoreName :: String -- ^ (variants only) name of the orig repo , trPath :: FilePath -- ^ relative to the config file , trAnnotate :: Maybe FilePath -- ^ relative to repo, eg. @Just "README"@ , trVariants :: [Variant] } deriving (Read, Show, Eq, Ord) instance JSON TestRepo where readJSON (JSObject o) = TestRepo <$> jlookup "name" <*> jlookup "name" -- 2nd time for trCoreName <*> jlookup "path" <*> jlookupMaybe "annotate" <*> (map toVariant . (DefaultVariant :) <$> jlookupMaybeList "variants") where jlookup a = case lookup a (fromJSObject o) of Nothing -> fail "Unable to read TestRepo" Just v -> readJSON v jlookupMaybe a = case lookup a (fromJSObject o) of Nothing -> return Nothing Just JSNull -> return Nothing Just v -> Just <$> readJSON v jlookupMaybeList a = case lookup a (fromJSObject o) of Nothing -> return [] Just v -> readJSONs v readJSON _ = fail "Unable to read TestRepo" showJSON = error "showJSON not defined for TestRepo yet" data TestBinary = TestBinary String deriving (Show, Read) data TimeUnit = MilliSeconds | Seconds instance Show TimeUnit where show MilliSeconds = "ms" show Seconds = "s" multiplier :: TimeUnit -> Double multiplier MilliSeconds = 1000 multiplier Seconds = 1 data Benchmark a = Idempotent TimeUnit String (BenchmarkCmd a) | Destructive TimeUnit String (BenchmarkCmd a) | Description String instance Show (Benchmark a) where show = description -- FIXME: is this right? -- note that the order of the variants is reflected in the tables data VariantName = DefaultVariant | OptimizePristineVariant deriving (Eq, Ord, Read, Show) instance JSON VariantName where readJSON (JSString s) = case fromJSString s of "optimize-pristine" -> return OptimizePristineVariant x -> fail $ "Unknown variant: " ++ x readJSON _ = fail "Unable to VariantName" showJSON = error "showJSON not defined for VariantName yet" data Variant = Variant { vId :: VariantName , vShortName :: String , vDescription :: String , vSuffix :: String } deriving (Eq, Ord, Show, Read) toVariant :: VariantName -> Variant toVariant n@DefaultVariant = Variant n "default" "default (hashed)" "" toVariant n@OptimizePristineVariant = Variant n "opt pris" "optimize --pristine" "op" data Test a = Test (Benchmark a) TestRepo TestBinary deriving (Show) copyTree :: FilePath -> FilePath -> IO () copyTree from to = do subs <- (\\ [".", ".."]) `fmap` getDirectoryContents from createDirectory to forM_ subs $ \item -> do is_dir <- doesDirectoryExist (from item) is_file <- doesFileExist (from item) when is_dir $ copyTree (from item) (to item) when is_file $ copyFile (from item) (to item) description :: Benchmark a -> String description (Idempotent _ d _) = d description (Destructive _ d _) = d description (Description d) = d timeUnit :: Benchmark a -> TimeUnit timeUnit (Idempotent u _ _) = u timeUnit (Destructive u _ _) = u timeUnit (Description _) = Seconds reset :: Command () reset = do resetMemoryUsed resetTimeUsed exec :: Benchmark a -> FilePath -> TestRepo -> Command a exec (Idempotent _ _ cmd) darcs_path tr = do cd "_playground" verbose "cd _playground" cmd (darcs darcs_path) tr exec (Destructive _ _ cmd) darcs_path tr = do cd "_playground" let cleanup = verbose "cd .. ; rm -rf _playground" >> cd ".." >> rm_rf "_playground" res <- cmd (darcs darcs_path) tr `catchError` \e -> (cleanup >> throw e) cleanup return res exec (Description _) _ _ = fail "Cannot run description-only benchmark." defaultrepo, sources :: FilePath -> FilePath defaultrepo path = path "_darcs" "prefs" "defaultrepo" sources path = path "_darcs" "prefs" "sources" prepare :: String -> Command () prepare origrepo = do progress "!" >> verbose "rm -rf _playground" rm_rf "_playground" liftIO $ createDirectory "_playground" let playrepo = "_playground" "repo" isrepo <- liftIO $ doesDirectoryExist (origrepo "_darcs") unless isrepo $ fail $ origrepo ++ ": Not a darcs repository!" progress "." >> verbose ("cp -a '" ++ origrepo ++ "' '" ++ playrepo ++ "'") liftIO $ copyTree origrepo playrepo progress "." >> verbose ("# sanitize " ++ playrepo) wd <- pwd liftIO $ do writeFile (defaultrepo playrepo) (wd origrepo) removeFile (sources playrepo) `catch` \_ -> return () prepareIfDifferent :: String -> Command () prepareIfDifferent origrepo = do let playrepo = "_playground" "repo" exist <- test_e "_playground" current' <- if exist then liftIO $ readFile (defaultrepo playrepo) else return "" let current = reverse (dropWhile (=='\n') $ reverse current') wd <- pwd if (exist && current == wd origrepo) then progress "..." >> verbose ("# leaving " ++ playrepo ++ " alone") else prepare origrepo -- | Run a benchmark as many times as it takes to pass a minimum threshold -- of time or iterations (whichever comes first) -- Useful for very small benchmarks adaptive :: Double -- ^ seconds -> (Int,Int) -- ^ min, max iterations -> Command MemTime -> Command [MemTime] adaptive thresh (iters_min,iters_max) cmd = cmd >> -- just once to warm up the disk cache (eliminate a source of variance) go thresh iters_max [] where go t i acc | i <= 0 || (t <= 0 && i <= iters_enough) = return acc go t i acc = do verbose $ "# adaptive: iterations remaining: " ++ show i ++ " time remaining: " ++ show t mt@(MemTime _ t2) <- cmd go (t - t2) (i - 1) (mt : acc) iters_enough = iters_max - iters_min run :: Test a -> Command (Maybe MemTimeOutput) run (Test benchmark tr (TestBinary bin)) = do (Just `fmap` run') `catchError` \e -> do echo_n_err $ " error: " ++ show e return Nothing where run' = do progress $ bin ++ " " ++ description benchmark ++ " [" ++ trName tr ++ "]: " verbose $ "\n# testing; binary = " ++ bin ++ ", benchmark = " ++ description benchmark ++ ", repository = " ++ trName tr exe <- which $ bin darcs_path <- case exe of Nothing -> canonize bin Just p -> return p times <- adaptive 10 (3,100) . sub $ do prepareIfDifferent (trPath tr) liftIO (readIORef maybeVMFlush >>= \go -> go darcs_path) timed (exec benchmark darcs_path tr) let result = mkMemTimeOutput times (timeUnit benchmark) spaces = 45 - (length bin + length (description benchmark) + length (trName tr)) result_str = unwords [ formatTimeResult result, formatMemoryResult result, formatSampleSize result ] progress $ (replicate spaces ' ') ++ result_str ++ "\n" verbose $ "# result: " ++ result_str return result formatNumber :: (PrintfArg a, Fractional a) => a -> String formatNumber = printf $ "%."++(show precision)++"f" formatSampleSize :: MemTimeOutput -> String formatSampleSize mt = show (mtSampleSize mt) ++ "x" formatTimeResult :: MemTimeOutput -> String formatTimeResult mt = formatNumber (adjust (mtTimeMean mt)) ++ show (mtUnit mt) ++ " d=" ++ formatNumber (mtTimeDev mt) where adjust = (*) (multiplier (mtUnit mt)) formatMemoryResult :: MemTimeOutput -> String formatMemoryResult mt = formatNumber ((realToFrac (mtMemMean mt / (1024*1024))) :: Float) ++ "M" tabulateRepo :: (MemTimeOutput -> String) -- ^ formatter -> String -> [(Test a, Maybe MemTimeOutput)] -> Tab.Table String String String tabulateRepo format repo results = Tab.Table rowhdrs colhdrs rows where rowhdrs = Tab.Group Tab.NoLine $ map Tab.Header rownames colhdrs = Tab.Group Tab.SingleLine $ map Tab.Header colnames colnames = nub [ label | (Test _ _ (TestBinary label), _) <- interesting ] rownames = nub [ description bench | (Test bench _ _, _) <- interesting ] interesting = [ test | test@(Test _ r _, _) <- results, trName r == repo ] rows = [ concat [ fmt $ find (match row column) interesting | column <- colnames ] | row <- rownames ] match bench binary (Test bench' _ (TestBinary binary'), _) = bench == description bench' && binary == binary' fmt (Just (_, Just x)) = [ format x ] fmt _ = [ "-" ] type BenchmarkTable = Tab.Table String String String tabulate :: [(Test a, Maybe MemTimeOutput)] -> [(String, (BenchmarkTable, BenchmarkTable))] tabulate results = map (second tables) repoGroups where tables [] = error "tabulate error - empty list" tables trs@(tr0:_) = ( mergeVariantTables (trVariants tr0) $ map (mkTimeTable . trName) trs , mergeVariantTables (trVariants tr0) $ map (mkMemTable . trName) trs ) repoGroups = buckets trCoreName $ nub [ r | (Test _ r _, _) <- results ] mkTimeTable r = tabulateRepo formatTimeResult r results mkMemTable r = tabulateRepo formatMemoryResult r results buckets :: Ord b => (a -> b) -> [a] -> [ (b,[a]) ] buckets f = map (first head . unzip) . groupBy ((==) `on` fst) . sortBy (compare `on` fst) . map (\x -> (f x, x)) -- For now we'll try just putting them side by side mergeVariantTables :: [Variant] -> [BenchmarkTable] -> BenchmarkTable mergeVariantTables _ [] = error "can't merge empty tables" mergeVariantTables _ [t] = t -- don't do anything weird if there's only one variant mergeVariantTables variants tbls@(t0:_) = Tab.Table rowhdrs colhdrs rows where rowhdrs = getRowhdrs t0 colhdrs = Tab.Group Tab.NoLine . map Tab.Header . appendVariants . rights . Tab.flattenHeader $ getColHdrs t0 rows = map concat . transpose . map getRows $ tbls -- getRowhdrs (Tab.Table rh _ _) = rh getColHdrs (Tab.Table _ ch _) = ch getRows (Tab.Table _ _ rs) = rs -- appendVariants hs = concatMap (\v -> map (\h -> appendVariant h v) hs) variants appendVariant h v = case vId v of DefaultVariant -> h _ -> vSuffix v ++ " " ++ h timed :: Command a -> Command MemTime timed a = do resetMemoryUsed t1 <- liftIO $ getCurrentTime a t2 <- liftIO $ getCurrentTime mem <- memoryUsed resetMemoryUsed return $ MemTime (fromIntegral mem) (realToFrac $ diffUTCTime t2 t1) check_darcs :: String -> IO () check_darcs cmd = do (_,outH,_,procH) <- runInteractiveCommand $ cmd ++ " --version" out <- strictGetContents outH waitForProcess procH case out of '2':'.':_ -> return () _ -> fail $ cmd ++ ": Not darcs 2.x binary." verbose :: String -> Command () verbose m = liftIO $ do loud <- isLoud when loud $ UTF8.hPutStrLn stderr m progress :: String -> Command () progress m = liftIO $ do loud <- isLoud unless loud $ UTF8.hPutStr stderr m drain :: Handle -> Bool -> IO (Chan String) drain h verb = do chan <- newChan let work acc = do line <- hGetLine h when verb $ putStrLn ("## " ++ line) work (acc ++ line) `catch` \_ -> writeChan chan acc forkIO $ work "" return chan darcs :: String -> [String] -> Command String darcs cmd args' = do stats_f <- liftIO $ do tmpdir <- getTemporaryDirectory (f, h) <- openTempFile tmpdir "darcs-stats-XXXX" hClose h return f let args = args' ++ ["+RTS", "-s" ++ stats_f, "-RTS"] loud <- liftIO isLoud verbose . unwords $ cmd:args (res, _, stats) <- liftIO $ do mPlayground <- seekPlayground `fmap` getCurrentDirectory mEnv <- case mPlayground of Nothing -> return Nothing Just p -> (Just . replace "HOME" p) `fmap` getEnvironment (_,outH,errH,procH) <- runInteractiveProcess cmd args Nothing mEnv res' <- drain outH loud errs' <- drain errH loud ex <- waitForProcess procH stats <- do c <- readFile stats_f removeFile stats_f `catch` \e -> hPutStrLn stderr (show e) return c `catch` \_ -> return "" errs <- readChan errs' case ex of ExitSuccess -> return () ExitFailure n -> fail $ "darcs failed with error code " ++ show n ++ "\nsaying: " ++ errs res <- readChan res' return (res, errs, stats) let bytes = (stats =~ "([0-9, ]+) M[bB] total memory in use") :: String mem = (read (filter (`elem` "0123456789") bytes) :: Int) recordMemoryUsed $ mem * 1024 * 1024 return res where replace k v xs = (k,v) : filter ((/= k) . fst) xs seekPlayground :: FilePath -> Maybe FilePath seekPlayground dir = if playground `elem` pieces then Just . joinPath . reverse . dropWhile (/= playground) . reverse $ pieces else Nothing where pieces = splitDirectories dir playground = "_playground" mkVariant :: String -> String -> Variant -> Command () mkVariant origrepo darcs_path v = case vId v of OptimizePristineVariant -> do isrepo <- liftIO $ doesDirectoryExist (origrepo "_darcs") unless isrepo $ fail $ origrepo ++ ": Not a darcs repository!" variant_isrepo <- liftIO $ doesDirectoryExist (variant_repo "_darcs") unless variant_isrepo $ do echo $ "Setting up " ++ vDescription v ++ " variant of " ++ origrepo verbose ("cp -a '" ++ origrepo ++ "' '" ++ variant_repo ++ "'") liftIO $ copyTree origrepo variant_repo verbose ("# sanitize " ++ variant_repo) liftIO $ removeFile (sources variant_repo) `catch` \_ -> return () darcs darcs_path [ "optimize", "--pristine", "--repodir", variant_repo ] return () DefaultVariant -> return () where variant_repo = variantRepoName v origrepo variantRepoName :: Variant -> String -> String variantRepoName (Variant { vId = DefaultVariant }) x = x variantRepoName v x = "variant" <.> stripped x ++ "-" ++ vSuffix v where stripped y | "repo." `isPrefixOf` y = stripped (drop 5 y) | "-hashed" `isSuffixOf` y = take (length y - 7) y | otherwise = y setupVariants :: [TestRepo] -> TestBinary -> Command () setupVariants repos (TestBinary bin) = sequence_ [ mkVariant (trPath repo) bin variant | repo <- repos, variant <- trVariants repo ] benchMany :: [TestRepo] -> [TestBinary] -> [Benchmark a] -> Command [(Test a, Maybe MemTimeOutput)] benchMany repos bins benches = do fmap concat $ forM (map repoAndVariants repos) $ \rs -> do res <- sequence [ do let test = Test bench repo bin memtime <- run test return (test, memtime) | repo <- rs, bin <- bins, bench <- benches ] case tabulate res of [] -> return () [(_,(t,_))] -> echo_n $ TR.render id id id t _ -> error "Not expecting more than one table for a repo and its variants" return res where repoAndVariants r = map (r `tweakVia`) (trVariants r) tweakVia tr v = case vId v of DefaultVariant -> tr _ -> tr { trPath = variantRepoName v (trPath tr) , trName = trName tr ++ " " ++ vShortName v } renderMany :: [(Test a, Maybe MemTimeOutput)] -> Command () renderMany t = do echo . unlines $ [ "Copy and paste below" , "=====================================================" , "" ] ++ map detail [ "Machine description", "Year", "CPU", "Memory", "Hard disk", "Notes" ] ++ [ "" , "NB:" , "" , def "d" "std deviation" ] ++ map (describe . toVariant) [ OptimizePristineVariant ] echo "" sequence_ [ do echo r echo $ replicate (length r) '-' ++ "\n" echo_n $ TR.render id id id t_tab echo_n $ TR.render id id id m_tab | (r, (t_tab,m_tab)) <- tabulate t ] where detail k = k ++ "\n ????" describe v = def (vSuffix v) (vDescription v ++ " variant") def k v = "* " ++ k ++ " = " ++ v