{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} -- | @futhark bench@ module Futhark.CLI.Bench (main) where import Control.Arrow (first) import Control.Exception import Control.Monad import Control.Monad.Except hiding (throwError) import qualified Data.ByteString.Char8 as SBS import qualified Data.ByteString.Lazy.Char8 as LBS import Data.Either import Data.Function ((&)) import Data.IORef import Data.List (sortBy) import qualified Data.Map as M import Data.Maybe import Data.Ord import qualified Data.Text as T import qualified Data.Text.IO as T import Data.Time.Clock (NominalDiffTime, UTCTime, diffUTCTime, getCurrentTime) import qualified Data.Vector.Unboxed as U import Futhark.Bench import Futhark.Server import Futhark.Test import Futhark.Util (atMostChars, fancyTerminal, pmapIO) import Futhark.Util.Console import Futhark.Util.Options import Futhark.Util.Pretty (prettyText) import Futhark.Util.ProgressBar import Statistics.Resampling (Estimator (..), resample) import Statistics.Resampling.Bootstrap (bootstrapBCA) import Statistics.Types (cl95, confIntLDX, confIntUDX, estError, estPoint) import System.Console.ANSI (clearLine) import System.Directory import System.Environment import System.Exit import System.FilePath import System.IO import System.Random.MWC (create) import Text.Printf import Text.Regex.TDFA data BenchOptions = BenchOptions { optBackend :: String, optFuthark :: Maybe String, optRunner :: String, optMinRuns :: Int, optMinTime :: NominalDiffTime, optExtraOptions :: [String], optCompilerOptions :: [String], optJSON :: Maybe FilePath, optTimeout :: Int, optSkipCompilation :: Bool, optExcludeCase :: [String], optIgnoreFiles :: [Regex], optEntryPoint :: Maybe String, optTuning :: Maybe String, optCacheExt :: Maybe String, optConvergencePhase :: Bool, optConvergenceMaxTime :: NominalDiffTime, optConcurrency :: Maybe Int, optVerbose :: Int, optTestSpec :: Maybe FilePath } initialBenchOptions :: BenchOptions initialBenchOptions = BenchOptions { optBackend = "c", optFuthark = Nothing, optRunner = "", optMinRuns = 10, optMinTime = 0.5, optExtraOptions = [], optCompilerOptions = [], optJSON = Nothing, optTimeout = -1, optSkipCompilation = False, optExcludeCase = ["nobench", "disable"], optIgnoreFiles = [], optEntryPoint = Nothing, optTuning = Just "tuning", optCacheExt = Nothing, optConvergencePhase = True, optConvergenceMaxTime = 5 * 60, optConcurrency = Nothing, optVerbose = 0, optTestSpec = Nothing } runBenchmarks :: BenchOptions -> [FilePath] -> IO () runBenchmarks opts paths = do -- We force line buffering to ensure that we produce running output. -- Otherwise, CI tools and the like may believe we are hung and kill -- us. hSetBuffering stdout LineBuffering benchmarks <- filter (not . ignored . fst) <$> testSpecsFromPathsOrDie paths -- Try to avoid concurrency at both program and data set level. let opts' = if length paths /= 1 then opts {optConcurrency = Just 1} else opts (skipped_benchmarks, compiled_benchmarks) <- partitionEithers <$> pmapIO (optConcurrency opts) (compileBenchmark opts') benchmarks when (anyFailedToCompile skipped_benchmarks) exitFailure putStrLn $ "Reporting arithmetic mean runtime of at least " <> show (optMinRuns opts) <> " runs for each dataset (min " <> show (optMinTime opts) <> ")." when (optConvergencePhase opts) . putStrLn $ "More runs automatically performed for up to " <> show (optConvergenceMaxTime opts) <> " to ensure accurate measurement." futhark <- FutharkExe . compFuthark <$> compileOptions opts maybe_results <- mapM (runBenchmark opts futhark) (sortBy (comparing fst) compiled_benchmarks) let results = concat $ catMaybes maybe_results case optJSON opts of Nothing -> pure () Just file -> LBS.writeFile file $ encodeBenchResults results when (any isNothing maybe_results || anyFailed results) exitFailure where ignored f = any (`match` f) $ optIgnoreFiles opts anyFailed :: [BenchResult] -> Bool anyFailed = any failedBenchResult where failedBenchResult (BenchResult _ xs) = any failedResult xs failedResult (DataResult _ Left {}) = True failedResult _ = False anyFailedToCompile :: [SkipReason] -> Bool anyFailedToCompile = not . all (== Skipped) data SkipReason = Skipped | FailedToCompile deriving (Eq) compileOptions :: BenchOptions -> IO CompileOptions compileOptions opts = do futhark <- maybe getExecutablePath pure $ optFuthark opts pure $ CompileOptions { compFuthark = futhark, compBackend = optBackend opts, compOptions = optCompilerOptions opts } compileBenchmark :: BenchOptions -> (FilePath, ProgramTest) -> IO (Either SkipReason (FilePath, [InputOutputs])) compileBenchmark opts (program, program_spec) = do spec <- maybe (pure program_spec) testSpecFromFileOrDie $ optTestSpec opts case testAction spec of RunCases cases _ _ | "nobench" `notElem` testTags spec, "disable" `notElem` testTags spec, any hasRuns cases -> if optSkipCompilation opts then do exists <- doesFileExist $ binaryName program if exists then pure $ Right (program, cases) else do putStrLn $ binaryName program ++ " does not exist, but --skip-compilation passed." pure $ Left FailedToCompile else do putStr $ "Compiling " ++ program ++ "...\n" compile_opts <- compileOptions opts res <- prepareBenchmarkProgram (optConcurrency opts) compile_opts program cases case res of Left (err, errstr) -> do putStrLn $ inRed err maybe (pure ()) SBS.putStrLn errstr pure $ Left FailedToCompile Right () -> pure $ Right (program, cases) _ -> pure $ Left Skipped where hasRuns (InputOutputs _ runs) = not $ null runs withProgramServer :: FilePath -> FilePath -> [String] -> (Server -> IO a) -> IO (Maybe a) withProgramServer program runner extra_options f = do -- Explicitly prefixing the current directory is necessary for -- readProcessWithExitCode to find the binary when binOutputf has -- no path component. let binOutputf = dropExtension program binpath = "." binOutputf (to_run, to_run_args) | null runner = (binpath, extra_options) | otherwise = (runner, binpath : extra_options) liftIO $ (Just <$> withServer (futharkServerCfg to_run to_run_args) f) `catch` onError where onError :: SomeException -> IO (Maybe a) onError e = do putStrLn $ inBold $ inRed $ "\nFailed to run " ++ program putStrLn $ inRed $ show e pure Nothing -- Truncate dataset name display after this many characters. maxDatasetNameLength :: Int maxDatasetNameLength = 40 runBenchmark :: BenchOptions -> FutharkExe -> (FilePath, [InputOutputs]) -> IO (Maybe [BenchResult]) runBenchmark opts futhark (program, cases) = do (tuning_opts, tuning_desc) <- determineTuning (optTuning opts) program let runopts = optExtraOptions opts ++ tuning_opts ++ determineCache (optCacheExt opts) program withProgramServer program (optRunner opts) runopts $ \server -> mapM (forInputOutputs server tuning_desc) $ filter relevant cases where forInputOutputs server tuning_desc (InputOutputs entry_name runs) = do putStr $ inBold $ "\n" ++ program' ++ tuning_desc ++ ":\n" BenchResult program' . catMaybes <$> mapM (runBenchmarkCase server opts futhark program entry_name pad_to) runs where program' = if entry_name == "main" then program else program ++ ":" ++ T.unpack entry_name relevant = maybe (const True) (==) (optEntryPoint opts) . T.unpack . iosEntryPoint pad_to = foldl max 0 $ concatMap (map (length . atMostChars maxDatasetNameLength . runDescription) . iosTestRuns) cases runOptions :: ((Int, Maybe Double) -> IO ()) -> BenchOptions -> RunOptions runOptions f opts = RunOptions { runMinRuns = optMinRuns opts, runMinTime = optMinTime opts, runTimeout = optTimeout opts, runVerbose = optVerbose opts, runConvergencePhase = optConvergencePhase opts, runConvergenceMaxTime = optConvergenceMaxTime opts, runResultAction = f } descString :: String -> Int -> String descString desc pad_to = desc ++ ": " ++ replicate (pad_to - length desc) ' ' progress :: Double -> T.Text progress elapsed = progressBar ( ProgressBar { progressBarSteps = 10, progressBarBound = 1, progressBarElapsed = elapsed } ) interimResult :: Int -> Int -> Double -> T.Text interimResult us_sum runs elapsed = T.pack (printf "%10.0fμs " avg) <> progress elapsed <> (" " <> prettyText runs <> " runs") where avg :: Double avg = fromIntegral us_sum / fromIntegral runs convergenceBar :: (T.Text -> IO ()) -> IORef Int -> Int -> Int -> Double -> IO () convergenceBar p spin_count us_sum i rse' = do spin_idx <- readIORef spin_count let spin = progressSpinner spin_idx p $ T.pack $ printf "%10.0fμs %s (RSE of mean: %2.4f; %4d runs)" avg spin rse' i writeIORef spin_count (spin_idx + 1) where avg :: Double avg = fromIntegral us_sum / fromIntegral i data BenchPhase = Initial | Convergence mkProgressPrompt :: BenchOptions -> Int -> String -> UTCTime -> IO ((Maybe Int, Maybe Double) -> IO ()) mkProgressPrompt opts pad_to dataset_desc start_time | fancyTerminal = do count <- newIORef (0, 0) phase_var <- newIORef Initial spin_count <- newIORef 0 pure $ \(us, rse) -> do putStr "\r" -- Go to start of line. let p s = T.putStr $ T.pack (descString (atMostChars maxDatasetNameLength dataset_desc) pad_to) <> s (us_sum, i) <- readIORef count now <- liftIO getCurrentTime let determineProgress i' = let time_elapsed = toDouble (realToFrac (diffUTCTime now start_time) / optMinTime opts) runs_elapsed = fromIntegral i' / fromIntegral (optMinRuns opts) in -- The progress bar is the _shortest_ of the -- time-based or runs-based estimate. This is -- intended to avoid a situation where the progress -- bar is full but stuff is still happening. On the -- other hand, it means it can sometimes shrink. min time_elapsed runs_elapsed phase <- readIORef phase_var case (us, phase, rse) of (Nothing, _, _) -> let elapsed = determineProgress i in p $ T.pack (replicate 13 ' ') <> progress elapsed (Just us', Initial, Nothing) -> do let us_sum' = us_sum + us' i' = i + 1 writeIORef count (us_sum', i') let elapsed = determineProgress i' p $ interimResult us_sum' i' elapsed (Just us', Initial, Just rse') -> do -- Switched from phase 1 to convergence; discard all -- prior results. writeIORef count (us', 1) writeIORef phase_var Convergence convergenceBar p spin_count us' 1 rse' (Just us', Convergence, Just rse') -> do let us_sum' = us_sum + us' i' = i + 1 writeIORef count (us_sum', i') convergenceBar p spin_count us_sum' i' rse' (Just _, Convergence, Nothing) -> pure () -- Probably should not happen. putStr " " -- Just to move the cursor away from the progress bar. hFlush stdout | otherwise = do putStr $ descString dataset_desc pad_to hFlush stdout pure $ const $ pure () where toDouble = fromRational . toRational reportResult :: [RunResult] -> (Double, Double) -> IO () reportResult results (ci_lower, ci_upper) = do let runtimes = map (fromIntegral . runMicroseconds) results avg = sum runtimes / fromIntegral (length runtimes) :: Double putStrLn $ printf "%10.0fμs (95%% CI: [%10.1f, %10.1f])" avg ci_lower ci_upper runBenchmarkCase :: Server -> BenchOptions -> FutharkExe -> FilePath -> T.Text -> Int -> TestRun -> IO (Maybe DataResult) runBenchmarkCase _ _ _ _ _ _ (TestRun _ _ RunTimeFailure {} _ _) = pure Nothing -- Not our concern, we are not a testing tool. runBenchmarkCase _ opts _ _ _ _ (TestRun tags _ _ _ _) | any (`elem` tags) $ optExcludeCase opts = pure Nothing runBenchmarkCase server opts futhark program entry pad_to tr@(TestRun _ input_spec (Succeeds expected_spec) _ dataset_desc) = do start_time <- liftIO getCurrentTime prompt <- mkProgressPrompt opts pad_to dataset_desc start_time -- Report the dataset name before running the program, so that if an -- error occurs it's easier to see where. prompt (Nothing, Nothing) res <- benchmarkDataset server (runOptions (prompt . first Just) opts) futhark program entry input_spec expected_spec (testRunReferenceOutput program entry tr) when fancyTerminal $ do clearLine putStr "\r" putStr $ descString (atMostChars maxDatasetNameLength dataset_desc) pad_to case res of Left err -> liftIO $ do putStrLn "" putStrLn $ inRed $ T.unpack err pure $ Just $ DataResult dataset_desc $ Left err Right (runtimes, errout) -> do let vec_runtimes = U.fromList $ map (fromIntegral . runMicroseconds) runtimes g <- create resampled <- liftIO $ resample g [Mean] 70000 vec_runtimes let bootstrapCI = ( estPoint boot - confIntLDX (estError boot), estPoint boot + confIntUDX (estError boot) ) where boot = head $ bootstrapBCA cl95 vec_runtimes resampled reportResult runtimes bootstrapCI -- We throw away the 'errout' because it is almost always -- useless and adds too much to the .json file size. This -- behaviour could be moved into a command line option if we -- wish. Result runtimes (getMemoryUsage errout) Nothing & Right & DataResult dataset_desc & Just & pure getMemoryUsage :: T.Text -> M.Map T.Text Int getMemoryUsage t = foldMap matchMap $ T.lines t where mem_regex = "Peak memory usage for space '([^']+)': ([0-9]+) bytes." :: T.Text matchMap line = case (line =~ mem_regex :: (T.Text, T.Text, T.Text, [T.Text])) of (_, _, _, [device, bytes]) -> M.singleton device (read $ T.unpack bytes) _ -> mempty commandLineOptions :: [FunOptDescr BenchOptions] commandLineOptions = [ Option "r" ["runs"] ( ReqArg ( \n -> case reads n of [(n', "")] | n' > 0 -> Right $ \config -> config { optMinRuns = n' } _ -> Left . optionsError $ "'" ++ n ++ "' is not a positive integer." ) "RUNS" ) "Run each test case this many times.", Option [] ["backend"] ( ReqArg (\backend -> Right $ \config -> config {optBackend = backend}) "PROGRAM" ) "The compiler used (defaults to 'futhark-c').", Option [] ["futhark"] ( ReqArg (\prog -> Right $ \config -> config {optFuthark = Just prog}) "PROGRAM" ) "The binary used for operations (defaults to same binary as 'futhark bench').", Option [] ["runner"] (ReqArg (\prog -> Right $ \config -> config {optRunner = prog}) "PROGRAM") "The program used to run the Futhark-generated programs (defaults to nothing).", Option "p" ["pass-option"] ( ReqArg ( \opt -> Right $ \config -> config {optExtraOptions = opt : optExtraOptions config} ) "OPT" ) "Pass this option to programs being run.", Option [] ["pass-compiler-option"] ( ReqArg ( \opt -> Right $ \config -> config {optCompilerOptions = opt : optCompilerOptions config} ) "OPT" ) "Pass this option to the compiler (or typechecker if in -t mode).", Option [] ["json"] ( ReqArg ( \file -> Right $ \config -> config {optJSON = Just file} ) "FILE" ) "Scatter results in JSON format here.", Option [] ["timeout"] ( ReqArg ( \n -> case reads n of [(n', "")] | n' < max_timeout -> Right $ \config -> config {optTimeout = fromIntegral n'} _ -> Left . optionsError $ "'" ++ n ++ "' is not an integer smaller than" ++ show max_timeout ++ "." ) "SECONDS" ) "Number of seconds before a dataset is aborted.", Option [] ["skip-compilation"] (NoArg $ Right $ \config -> config {optSkipCompilation = True}) "Use already compiled program.", Option [] ["exclude-case"] ( ReqArg ( \s -> Right $ \config -> config {optExcludeCase = s : optExcludeCase config} ) "TAG" ) "Do not run test cases with this tag.", Option [] ["ignore-files"] ( ReqArg ( \s -> Right $ \config -> config {optIgnoreFiles = makeRegex s : optIgnoreFiles config} ) "REGEX" ) "Ignore files matching this regular expression.", Option "e" ["entry-point"] ( ReqArg ( \s -> Right $ \config -> config {optEntryPoint = Just s} ) "NAME" ) "Only run this entry point.", Option [] ["tuning"] ( ReqArg (\s -> Right $ \config -> config {optTuning = Just s}) "EXTENSION" ) "Look for tuning files with this extension (defaults to .tuning).", Option [] ["cache-extension"] ( ReqArg (\s -> Right $ \config -> config {optCacheExt = Just s}) "EXTENSION" ) "Use cache files with this extension (none by default).", Option [] ["no-tuning"] (NoArg $ Right $ \config -> config {optTuning = Nothing}) "Do not load tuning files.", Option [] ["no-convergence-phase"] (NoArg $ Right $ \config -> config {optConvergencePhase = False}) "Do not run convergence phase.", Option [] ["convergence-max-seconds"] ( ReqArg ( \n -> case reads n of [(n', "")] | n' > 0 -> Right $ \config -> config {optConvergenceMaxTime = fromInteger n'} _ -> Left . optionsError $ "'" ++ n ++ "' is not a positive integer." ) "NUM" ) "Limit convergence phase to this number of seconds.", Option [] ["concurrency"] ( ReqArg ( \n -> case reads n of [(n', "")] | n' > 0 -> Right $ \config -> config {optConcurrency = Just n'} _ -> Left . optionsError $ "'" ++ n ++ "' is not a positive integer." ) "NUM" ) "Number of benchmarks to prepare (not run) concurrently.", Option [] ["spec-file"] (ReqArg (\s -> Right $ \config -> config {optTestSpec = Just s}) "FILE") "Use test specification from this file.", Option "v" ["verbose"] (NoArg $ Right $ \config -> config {optVerbose = optVerbose config + 1}) "Enable logging. Pass multiple times for more." ] where max_timeout :: Int max_timeout = maxBound `div` 1000000 excludeBackend :: BenchOptions -> BenchOptions excludeBackend config = config {optExcludeCase = "no_" <> optBackend config : optExcludeCase config} -- | Run @futhark bench@. main :: String -> [String] -> IO () main = mainWithOptions initialBenchOptions commandLineOptions "options... programs..." $ \progs config -> case progs of [] -> Nothing _ -> Just $ runBenchmarks (excludeBackend config) progs