{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE FlexibleContexts #-} module Futhark.CLI.Autotune (main) where import Control.Monad import qualified Data.ByteString.Char8 as SBS import Data.Time.Clock.POSIX import Data.Tree import Data.List import Data.Maybe import Text.Read (readMaybe) import Text.Regex.TDFA import qualified Data.Text as T import System.Environment (getExecutablePath) import System.Exit import System.Process import System.FilePath import System.Console.GetOpt import Futhark.Bench import Futhark.Test import Futhark.Util.Options data AutotuneOptions = AutotuneOptions { optBackend :: String , optFuthark :: Maybe String , optRuns :: Int , optTuning :: Maybe String , optExtraOptions :: [String] , optVerbose :: Int } initialAutotuneOptions :: AutotuneOptions initialAutotuneOptions = AutotuneOptions "opencl" Nothing 10 (Just "tuning") [] 0 compileOptions :: AutotuneOptions -> IO CompileOptions compileOptions opts = do futhark <- maybe getExecutablePath return $ optFuthark opts return $ CompileOptions { compFuthark = futhark , compBackend = optBackend opts } runOptions :: Path -> Int -> AutotuneOptions -> RunOptions runOptions path timeout_s opts = RunOptions { runRunner = "" , runRuns = optRuns opts , runExtraOptions = "-L" : map opt path ++ optExtraOptions opts , runTimeout = timeout_s } where opt (name, val) = "--size=" ++ name ++ "=" ++ show val type Path = [(String, Int)] regexGroups :: Regex -> String -> Maybe [String] regexGroups regex s = do (_, _, _, groups) <- matchM regex s :: Maybe (String, String, String, [String]) Just groups comparisons :: String -> [(String,Int)] comparisons = mapMaybe isComparison . lines where regex = makeRegex ("Compared ([^ ]+) <= (-?[0-9]+)" :: String) isComparison l = do [thresh, val] <- regexGroups regex l val' <- readMaybe val return (thresh, val') data RunPurpose = RunSample -- ^ Only a single run. | RunBenchmark -- ^ As many runs as needed. type RunDataset = Path -> RunPurpose -> IO (Either String ([(String, Int)], Double)) type DatasetName = String prepare :: AutotuneOptions -> FilePath -> IO [(DatasetName, RunDataset)] prepare opts prog = do spec <- testSpecFromFile prog copts <- compileOptions opts truns <- case testAction spec of RunCases ios _ _ | Just runs <- iosTestRuns <$> find ((=="main") . iosEntryPoint) ios -> do res <- prepareBenchmarkProgram copts prog ios case res of Left (err, errstr) -> do putStrLn err maybe (return ()) SBS.putStrLn errstr exitFailure Right () -> return runs _ -> fail "Program does not have a 'main' entry point with datasets." let runnableDataset trun = case runExpectedResult trun of Succeeds expected | null (runTags trun `intersect` ["notune", "disable"]) -> Just (runDescription trun, run trun expected) _ -> Nothing -- We wish to let datasets run for the untuned time + 20% + 1 second. let timeout elapsed = ceiling (elapsed * 1.2) + 1 forM (mapMaybe runnableDataset truns) $ \(dataset, do_run) -> do bef <- toRational <$> getPOSIXTime res <- do_run 60000 [] RunBenchmark aft <- toRational <$> getPOSIXTime case res of Left err -> do putStrLn $ "Error when running " ++ prog ++ ":" putStrLn err exitFailure Right _ -> do let t = timeout $ aft - bef putStrLn $ "Calculated timeout for " ++ dataset ++ " : " ++ show t ++ "s" return (dataset, do_run t) where run trun expected timeout path purpose = do let opts' = case purpose of RunSample -> opts { optRuns = 1 } RunBenchmark -> opts averageRuntime (runres, errout) = (comparisons (T.unpack errout), fromIntegral (sum (map runMicroseconds runres)) / fromIntegral (optRuns opts)) ropts = runOptions path timeout opts' when (optVerbose opts > 1) $ putStrLn $ "Running with options: " ++ unwords (runExtraOptions ropts) either (Left . T.unpack) (Right . averageRuntime) <$> benchmarkDataset ropts prog "main" (runInput trun) expected (testRunReferenceOutput prog "main" trun) --- Benchmarking a program data DatasetResult = DatasetResult [(String, Int)] Double deriving Show --- Finding initial comparisons. --- Extracting threshold hierarchy. type ThresholdForest = Forest (String, Bool) thresholdMin, thresholdMax :: Int thresholdMin = 1 thresholdMax = 2000000000 -- | Depth-first list of thresholds to tune in order, and a -- corresponding assignment of ancestor thresholds to ensure that they -- are used. tuningPaths :: ThresholdForest -> [(String, Path)] tuningPaths = concatMap (treePaths []) where treePaths ancestors (Node (v, _) children) = concatMap (onChild ancestors v) children ++ [(v, ancestors)] onChild ancestors v child@(Node (_, cmp) _) = treePaths (ancestors++[(v, t cmp)]) child t False = thresholdMax t True = thresholdMin thresholdForest :: FilePath -> IO ThresholdForest thresholdForest prog = do thresholds <- getThresholds <$> readProcess ("." dropExtension prog) ["--print-sizes"] "" let root (v, _) = ((v, False), []) return $ unfoldForest (unfold thresholds) $ map root $ filter (null . snd) thresholds where getThresholds = mapMaybe findThreshold . lines regex = makeRegex ("(.*)\\ \\(threshold\\ \\((.*)\\)\\)" :: String) findThreshold :: String -> Maybe (String, [(String, Bool)]) findThreshold l = do [grp1, grp2] <- regexGroups regex l return (grp1, filter (not . null . fst) $ map (\x -> if "!" `isPrefixOf` x then (drop 1 x, False) else (x, True)) $ words grp2) unfold thresholds ((parent, parent_cmp), ancestors) = let ancestors' = parent : ancestors isChild (v, v_ancestors) = do cmp <- lookup parent v_ancestors guard $ sort (map fst v_ancestors) == sort (parent : ancestors) return ((v, cmp), ancestors') in ((parent, parent_cmp), mapMaybe isChild thresholds) --- Doing the atual tuning intersectRanges :: [(Int, Int)] -> (Int, Int) intersectRanges = foldl' f (thresholdMin, thresholdMax) where f (xmin, xmax) (ymin, ymax) = -- XXX: what happens when the intersection is empty? (xmin `max` ymin, xmax `min` ymax) tuneThreshold :: AutotuneOptions -> [(DatasetName, RunDataset)] -> Path -> (String, Path) -> IO Path tuneThreshold opts datasets already_tuned (v, v_path) = do ranges <- forM datasets $ \(dataset_name, run) -> do putStrLn $ unwords ["Tuning", v, "on dataset", dataset_name] sample_run <- run path RunSample case sample_run of Left err -> do -- If the sampling run fails, we treat it as zero information. -- One of our ancestor thresholds will have be set such that -- this path is never taken. when (optVerbose opts > 0) $ putStrLn $ "Sampling run failed:\n" ++ err return (thresholdMin, thresholdMax) Right (cmps, _) -> case lookup v cmps of Nothing -> do -- A missing comparison is not necessarily a bug - it may -- simply mean that this comparison is inside a loop or -- branch that is never reached for this dataset. In such -- cases, the optimal range is universal. when (optVerbose opts > 0) $ putStrLn "Irrelevant for dataset.\n" return (thresholdMin, thresholdMax) Just e_par -> do t_run <- run path_t RunBenchmark f_run <- run path_f RunBenchmark let prefer_t = (thresholdMin, e_par) prefer_f = (e_par+1, thresholdMax) case (t_run, f_run) of (Left err, _) -> do when (optVerbose opts > 0) $ putStrLn $ "True comparison run failed:\n" ++ err return prefer_f (_, Left err) -> do when (optVerbose opts > 0) $ putStrLn $ "False comparison run failed:\n" ++ err return prefer_t (Right (_, runtime_t), Right (_, runtime_f)) -> if runtime_t < runtime_f then do when (optVerbose opts > 0) $ putStrLn "True branch is fastest." return prefer_t else do when (optVerbose opts > 0) $ putStrLn "False branch is fastest." return prefer_f let (_lower, upper) = intersectRanges ranges return $ (v,upper) : already_tuned where path = already_tuned ++ v_path path_t = (v, thresholdMin) : path path_f = (v, thresholdMax) : path --- CLI tune :: AutotuneOptions -> FilePath -> IO Path tune opts prog = do putStrLn $ "Compiling " ++ prog ++ "..." datasets <- prepare opts prog forest <- thresholdForest prog when (optVerbose opts > 0) $ putStrLn $ ("Threshold forest:\n"++) $ drawForest $ map (fmap show) forest foldM (tuneThreshold opts datasets) [] $ tuningPaths forest runAutotuner :: AutotuneOptions -> FilePath -> IO () runAutotuner opts prog = do best <- tune opts prog let tuning = unlines $ do (s, n) <- sortOn fst best return $ s ++ "=" ++ show n case optTuning opts of Nothing -> return () Just suffix -> do writeFile (prog <.> suffix) tuning putStrLn $ "Wrote " ++ prog <.> suffix putStrLn $ "Result of autotuning:\n" ++ tuning commandLineOptions :: [FunOptDescr AutotuneOptions] commandLineOptions = [ Option "r" ["runs"] (ReqArg (\n -> case reads n of [(n', "")] | n' >= 0 -> Right $ \config -> config { optRuns = n' } _ -> Left $ error $ "'" ++ n ++ "' is not a non-negative integer.") "RUNS") "Run each test case this many times." , Option [] ["backend"] (ReqArg (\backend -> Right $ \config -> config { optBackend = backend }) "BACKEND") "The compiler used (defaults to 'opencl')." , Option [] ["futhark"] (ReqArg (\prog -> Right $ \config -> config { optFuthark = Just prog }) "PROGRAM") "The binary used for operations (defaults to 'futhark')." , Option [] ["tuning"] (ReqArg (\s -> Right $ \config -> config { optTuning = Just s }) "EXTENSION") "Write tuning files with this extension (default: .tuning)." , Option "v" ["verbose"] (NoArg $ Right $ \config -> config { optVerbose = optVerbose config + 1 }) "Enable logging. Pass multiple times for more." ] main :: String -> [String] -> IO () main = mainWithOptions initialAutotuneOptions commandLineOptions "options... program" $ \progs config -> case progs of [prog] -> Just $ runAutotuner config prog _ -> Nothing