{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, NamedFieldPuns, CPP #-} {-# LANGUAGE DeriveGeneric, StandaloneDeriving #-} -- | All the core types used by the rest of the HSBencher codebase. module HSBencher.Types ( -- * Benchmark building -- | The basic types for describing a single benchmark. mkBenchmark, Benchmark(..), RunFlags, CompileFlags, -- * Build method interface and applicability -- | A build method is applicable to a subset of target files -- (`FilePredicate`) and has a particular interface that HSbencher relies -- upon. BuildMethod(..), BuildResult(..), FilePredicate(..), filePredCheck, -- * Benchmark configuration spaces -- | Describe how many different ways you want to run your -- benchmarks. BenchSpace(..), ParamSetting(..), enumerateBenchSpace, compileOptsOnly, isCompileTime, toCompileFlags, toRunFlags, toEnvVars, toCmdPaths, BuildID, makeBuildID, DefaultParamMeaning(..), -- * HSBench Driver Configuration Config(..), BenchM, #ifdef FUSION_TABLES FusionConfig(..), #endif -- * Subprocesses and system commands CommandDescr(..), RunResult(..), emptyRunResult, SubProcess(..), LineHarvester(..), orHarvest, -- * Benchmark outputs for upload BenchmarkResult(..), emptyBenchmarkResult, -- * For convenience -- large records demand pretty-printing doc ) where import Control.Monad.Reader import Data.Char import Data.Word import Data.List import Data.Monoid import qualified Data.Map as M import Data.Maybe (catMaybes) import System.FilePath import System.Directory import System.Process (CmdSpec(..)) import qualified Data.ByteString.Char8 as B import qualified System.IO.Streams as Strm import Text.PrettyPrint.GenericPretty (Out(doc,docPrec), Generic) #ifdef FUSION_TABLES import Network.Google.FusionTables (TableId) #endif ---------------------------------------------------------------------------------------------------- -- Benchmark Build Methods ---------------------------------------------------------------------------------------------------- type EnvVars = [(String,String)] -- | The arguments passed (in a build-method specific way) to the running benchmark. type RunFlags = [String] -- | The arguments passed (in a build-method specific way) into the compilation process. type CompileFlags = [String] -- | Maps canonical command names, e.g. 'ghc', to absolute system paths. type PathRegistry = M.Map String String -- | A description of a set of files. The description may take one of multiple -- forms. data FilePredicate = WithExtension String -- ^ E.g. ".hs", WITH the dot. | IsExactly String -- ^ E.g. "Makefile" -- | SatisfiesPredicate (String -> Bool) | InDirectoryWithExactlyOne FilePredicate -- ^ A common pattern. For example, we can build a file foo.c, if it lives in a -- directory with exactly one "Makefile". | PredOr FilePredicate FilePredicate -- ^ Logical or. | AnyFile -- TODO: Allow arbitrary function predicates also. deriving (Show, Generic, Ord, Eq) -- instance Show FilePredicate where -- show (WithExtension s) = "" -- | This function gives meaning to the `FilePred` type. -- It returns a filepath to signal "True" and Nothing otherwise. filePredCheck :: FilePredicate -> FilePath -> IO (Maybe FilePath) filePredCheck pred path = let filename = takeFileName path in case pred of AnyFile -> return (Just path) IsExactly str -> return$ if str == filename then Just path else Nothing WithExtension ext -> return$ if takeExtension filename == ext then Just path else Nothing PredOr p1 p2 -> do x <- filePredCheck p1 path case x of Just _ -> return x Nothing -> filePredCheck p2 path InDirectoryWithExactlyOne p2 -> do ls <- getDirectoryContents (takeDirectory path) ls' <- fmap catMaybes $ mapM (filePredCheck p2) ls case ls' of [x] -> return (Just$ takeDirectory path x) _ -> return Nothing -- instance Show FilePredicate where -- show (WithExtension s) = "" -- | The result of doing a build. Note that `compile` can will throw an exception if compilation fails. data BuildResult = StandAloneBinary FilePath -- ^ This binary can be copied and executed whenever. | RunInPlace (RunFlags -> EnvVars -> CommandDescr) -- ^ In this case the build return what you need to do the benchmark run, but the -- directory contents cannot be touched until after than run is finished. instance Show BuildResult where show (StandAloneBinary p) = "StandAloneBinary "++p -- show (RunInPlace fn) = "RunInPlace "++show (fn [] []) show (RunInPlace fn) = "RunInPlace " -- | A completely encapsulated method of building benchmarks. Cabal and Makefiles -- are two examples of this. The user may extend it with their own methods. data BuildMethod = BuildMethod { methodName :: String -- ^ Identifies this build method for humans. -- , buildsFiles :: FilePredicate -- , canBuild :: FilePath -> IO Bool , canBuild :: FilePredicate -- ^ Can this method build a given file/directory? , concurrentBuild :: Bool -- ^ More than one build can happen at once. This -- implies that compile always returns StandAloneBinary. , compile :: PathRegistry -> BuildID -> CompileFlags -> FilePath -> BenchM BuildResult , clean :: PathRegistry -> BuildID -> FilePath -> BenchM () -- ^ Clean any left-over build results. , setThreads :: Maybe (Int -> [ParamSetting]) -- ^ Synthesize a list of compile/runtime settings that -- will control the number of threads. } instance Show BuildMethod where show BuildMethod{methodName, canBuild} = "" ---------------------------------------------------------------------------------------------------- -- HSBench Configuration ---------------------------------------------------------------------------------------------------- -- | A monad for benchamrking. This provides access to configuration options, but -- really, its main purpose is enabling logging. type BenchM a = ReaderT Config IO a -- | The global configuration for benchmarking. WARNING! This is an internal data -- structure. You shouldn't really use it. data Config = Config { benchlist :: [Benchmark DefaultParamMeaning] , benchsetName :: Maybe String -- ^ What identifies this set of benchmarks? Used to create fusion table. , benchversion :: (String, Double) -- ^ benchlist file name and version number (e.g. X.Y) -- , threadsettings :: [Int] -- ^ A list of #threads to test. 0 signifies non-threaded mode. , runTimeOut :: Maybe Double -- ^ Timeout for running benchmarks (if not specified by the benchmark specifically) , maxthreads :: Int , trials :: Int -- ^ number of runs of each configuration , skipTo :: Maybe Int -- ^ Where to start in the config space. , runID :: Maybe String -- ^ An over-ride for the run ID. , ciBuildID :: Maybe String -- ^ The build ID from the continuous integration system. , shortrun :: Bool , doClean :: Bool , keepgoing :: Bool -- ^ keep going after error , pathRegistry :: PathRegistry -- ^ Paths to executables. , hostname :: String , startTime :: Integer -- ^ Seconds since Epoch. , resultsFile :: String -- ^ Where to put timing results. , logFile :: String -- ^ Where to put more verbose testing output. , gitInfo :: (String,String,Int) -- ^ Branch, revision hash, depth. , buildMethods :: [BuildMethod] -- ^ Starts with cabal/make/ghc, can be extended by user. -- These are all LINES-streams (implicit newlines). , logOut :: Strm.OutputStream B.ByteString , resultsOut :: Strm.OutputStream B.ByteString , stdOut :: Strm.OutputStream B.ByteString -- A set of environment variable configurations to test , envs :: [[(String, String)]] , argsBeforeFlags :: Bool -- ^ A global setting to control whether executables are given -- their 'flags/params' after their regular arguments. -- This is here because some executables don't use proper command line parsing. , harvesters :: LineHarvester -- ^ A stack of line harvesters that gather RunResult details. , doFusionUpload :: Bool #ifdef FUSION_TABLES , fusionConfig :: FusionConfig #endif } deriving Show #ifdef FUSION_TABLES data FusionConfig = FusionConfig { fusionTableID :: Maybe TableId -- ^ This must be Just whenever doFusionUpload is true. , fusionClientID :: Maybe String , fusionClientSecret :: Maybe String , serverColumns :: [String] -- ^ Record the ordering of columns server side. } deriving Show #endif instance Show (Strm.OutputStream a) where show _ = "" ---------------------------------------------------------------------------------------------------- -- Configuration Spaces ---------------------------------------------------------------------------------------------------- -- | The all-inclusive datatype for a single Benchmark. Do NOT construct values of -- this type directly. Rather, you should make your code robust against future -- addition of fields to this datatype. Use `mkBenchmark` followed by customizing -- only the fields you need. data Benchmark a = Benchmark { target :: FilePath -- ^ The target file or directory. , cmdargs :: [String] -- ^ Command line argument to feed the benchmark executable. , configs :: BenchSpace a -- ^ The configration space to iterate over. , progname :: Maybe String -- ^ Optional name to use INSTEAD of the basename from `target`. , benchTimeOut :: Maybe Double -- ^ Specific timeout for this benchmark. Overrides global setting. } deriving (Eq, Show, Ord, Generic) -- | Make a Benchmark data structure given the core, required set of fields, and uses -- defaults to fill in the rest. Takes target, cmdargs, configs. mkBenchmark :: FilePath -> [String] -> BenchSpace a -> Benchmark a mkBenchmark target cmdargs configs = Benchmark {target, cmdargs, configs, progname=Nothing, benchTimeOut=Nothing } -- | A datatype for describing (generating) benchmark configuration spaces. -- This is accomplished by nested conjunctions and disjunctions. -- For example, varying threads from 1-32 would be a 32-way Or. Combining that -- with profiling on/off (product) would create a 64-config space. -- -- While the ParamSetting provides an *implementation* of the behavior, this -- datatype can also be decorated with a (more easily machine readable) meaning of -- the corresponding setting. For example, indicating that the setting controls -- the number of threads. data BenchSpace meaning = And [BenchSpace meaning] | Or [BenchSpace meaning] | Set meaning ParamSetting deriving (Show,Eq,Ord,Read, Generic) data DefaultParamMeaning = Threads Int -- ^ Set the number of threads. | Variant String -- ^ Which scheduler/implementation/etc. | NoMeaning deriving (Show,Eq,Ord,Read, Generic) -- | Exhaustively compute all configurations described by a benchmark configuration space. enumerateBenchSpace :: BenchSpace a -> [ [(a,ParamSetting)] ] enumerateBenchSpace bs = case bs of Set m p -> [ [(m,p)] ] Or ls -> concatMap enumerateBenchSpace ls And ls -> loop ls where loop [] = [ [] ] -- And [] => one config loop [lst] = enumerateBenchSpace lst loop (hd:tl) = let confs = enumerateBenchSpace hd in [ c++r | c <- confs , r <- loop tl ] -- | Is it a setting that affects compile time? isCompileTime :: ParamSetting -> Bool isCompileTime CompileParam{} = True isCompileTime CmdPath {} = True isCompileTime RuntimeParam{} = False isCompileTime RuntimeEnv {} = False -- | Extract the parameters that affect the compile-time arguments. toCompileFlags :: [(a,ParamSetting)] -> CompileFlags toCompileFlags [] = [] toCompileFlags ((_,CompileParam s1) : tl) = s1 : toCompileFlags tl toCompileFlags (_ : tl) = toCompileFlags tl -- | Extract the parameters that affect the runtime arguments. toRunFlags :: [(a,ParamSetting)] -> RunFlags toRunFlags [] = [] toRunFlags ((_,RuntimeParam s1) : tl) = (s1) : toRunFlags tl toRunFlags (_ : tl) = toRunFlags tl toCmdPaths :: [(a,ParamSetting)] -> [(String,String)] toCmdPaths = catMaybes . map fn where fn (_,CmdPath c p) = Just (c,p) fn _ = Nothing toEnvVars :: [(a,ParamSetting)] -> [(String,String)] toEnvVars [] = [] toEnvVars ((_,RuntimeEnv s1 s2) : tl) = (s1,s2) : toEnvVars tl toEnvVars (_ : tl) = toEnvVars tl -- | A BuildID should uniquely identify a particular (compile-time) configuration, -- but consist only of characters that would be reasonable to put in a filename. -- This is used to keep build results from colliding. type BuildID = String -- | Performs a simple reformatting (stripping disallowed characters) to create a -- build ID corresponding to a set of compile flags. To make it unique we also -- append the target path. makeBuildID :: FilePath -> CompileFlags -> BuildID makeBuildID target strs = encodedTarget ++ (intercalate "_" $ map (filter charAllowed) strs) where charAllowed = isAlphaNum encodedTarget = map (\ c -> if charAllowed c then c else '_') target -- | Strip all runtime options, leaving only compile-time options. This is useful -- for figuring out how many separate compiles need to happen. compileOptsOnly :: BenchSpace a -> BenchSpace a compileOptsOnly x = case loop x of Nothing -> And [] Just b -> b where loop bs = case bs of And ls -> mayb$ And$ catMaybes$ map loop ls Or ls -> mayb$ Or $ catMaybes$ map loop ls Set m (CompileParam {}) -> Just bs Set m (CmdPath {}) -> Just bs -- These affect compilation also... Set _ _ -> Nothing mayb (And []) = Nothing mayb (Or []) = Nothing mayb x = Just x test1 = Or (map (Set () . RuntimeEnv "CILK_NPROCS" . show) [1..32]) test2 = Or$ map (Set () . RuntimeParam . ("-A"++)) ["1M", "2M"] test3 = And [test1, test2] -- | Different types of parameters that may be set or varied. data ParamSetting = RuntimeParam String -- ^ String contains runtime options, expanded and tokenized by the shell. | CompileParam String -- ^ String contains compile-time options, expanded and tokenized by the shell. | RuntimeEnv String String -- ^ The name of the env var and its value, respectively. -- For now Env Vars ONLY affect runtime. | CmdPath String String -- ^ Takes CMD PATH, and establishes a benchmark-private setting to use PATH for CMD. -- For example `CmdPath "ghc" "ghc-7.6.3"`. -- | Threads Int -- ^ Shorthand: builtin support for changing the number of -- threads across a number of separate build methods. -- | TimeOut Double -- ^ Set the timeout for this benchmark. deriving (Show, Eq, Read, Ord, Generic) ---------------------------------------------------------------------------------------------------- -- Subprocesses and system commands ---------------------------------------------------------------------------------------------------- -- | A self-contained description of a runnable command. Similar to -- System.Process.CreateProcess but slightly simpler. data CommandDescr = CommandDescr { command :: CmdSpec -- ^ Executable and arguments , envVars :: [(String, String)] -- ^ Environment variables to APPEND to current env. , timeout :: Maybe Double -- ^ Optional timeout in seconds. , workingDir :: Maybe FilePath -- ^ Optional working directory to switch to before -- running command. } deriving (Show,Eq,Ord,Read,Generic) -- Umm... these should be defined in base: deriving instance Eq CmdSpec deriving instance Show CmdSpec deriving instance Ord CmdSpec deriving instance Read CmdSpec -- | Measured results from running a subprocess (benchmark). data RunResult = RunCompleted { realtime :: Double -- ^ Benchmark time in seconds, may be different than total process time. , productivity :: Maybe Double -- ^ Seconds , allocRate :: Maybe Word64 -- ^ Bytes allocated per mutator-second , memFootprint :: Maybe Word64 -- ^ High water mark of allocated memory, in bytes. } | RunTimeOut | ExitError Int -- ^ Contains the returned error code. deriving (Eq,Show) emptyRunResult :: RunResult emptyRunResult = RunCompleted (-1.0) Nothing Nothing Nothing -- | A running subprocess. data SubProcess = SubProcess { wait :: IO RunResult , process_out :: Strm.InputStream B.ByteString -- ^ A stream of lines. , process_err :: Strm.InputStream B.ByteString -- ^ A stream of lines. } instance Out ParamSetting instance Out FilePredicate instance Out DefaultParamMeaning instance Out a => Out (BenchSpace a) instance Out a => Out (Benchmark a) instance (Out k, Out v) => Out (M.Map k v) where docPrec n m = docPrec n $ M.toList m doc = docPrec 0 -- | A line harvester takes a single line of input and possible extracts data from it -- which it can then add to a RunResult. -- -- The boolean result indicates whether the line was used or not. newtype LineHarvester = LineHarvester (B.ByteString -> (RunResult -> RunResult, Bool)) -- newtype LineHarvester = LineHarvester (B.ByteString -> Maybe (RunResult -> RunResult)) -- | We can stack up line harvesters. ALL of them get to run on each line. instance Monoid LineHarvester where mempty = LineHarvester (\ _ -> (id,False)) mappend (LineHarvester lh1) (LineHarvester lh2) = LineHarvester $ \ ln -> let (f,b1) = lh1 ln (g,b2) = lh2 ln in (f . g, b1 || b2) -- | Run the second harvester only if the first fails. orHarvest :: LineHarvester -> LineHarvester -> LineHarvester orHarvest (LineHarvester lh1) (LineHarvester lh2) = LineHarvester $ \ ln -> case lh1 ln of x@(_,True) -> x (_,False) -> lh2 ln instance Show LineHarvester where show _ = "" ---------------------------------------------------------------------------------------------------- -- Benchmark Results Upload ---------------------------------------------------------------------------------------------------- -- | This contains all the contextual information for a single benchmark run, which -- makes up a "row" in a table of benchmark results. -- Note that multiple "trials" (actual executions) go into a single BenchmarkResult data BenchmarkResult = BenchmarkResult { _PROGNAME :: String , _VARIANT :: String , _ARGS :: [String] , _HOSTNAME :: String , _RUNID :: String , _CI_BUILD_ID :: String , _THREADS :: Int , _DATETIME :: String -- Datetime , _MINTIME :: Double , _MEDIANTIME :: Double , _MAXTIME :: Double , _MINTIME_PRODUCTIVITY :: Maybe Double , _MEDIANTIME_PRODUCTIVITY :: Maybe Double , _MAXTIME_PRODUCTIVITY :: Maybe Double , _ALLTIMES :: String -- ^ Space separated list of numbers. , _TRIALS :: Int , _COMPILER :: String , _COMPILE_FLAGS :: String , _RUNTIME_FLAGS :: String , _ENV_VARS :: String , _BENCH_VERSION :: String , _BENCH_FILE :: String , _UNAME :: String , _PROCESSOR :: String , _TOPOLOGY :: String , _GIT_BRANCH :: String , _GIT_HASH :: String , _GIT_DEPTH :: Int , _WHO :: String , _ETC_ISSUE :: String , _LSPCI :: String , _FULL_LOG :: String , _MEDIANTIME_ALLOCRATE :: Maybe Word64 , _MEDIANTIME_MEMFOOTPRINT :: Maybe Word64 } -- | A default value, useful for filling in only the fields that are relevant to a particular benchmark. emptyBenchmarkResult :: BenchmarkResult emptyBenchmarkResult = BenchmarkResult { _PROGNAME = "" , _VARIANT = "" , _ARGS = [] , _HOSTNAME = "" , _RUNID = "" , _CI_BUILD_ID = "" , _THREADS = 0 , _DATETIME = "" , _MINTIME = 0.0 , _MEDIANTIME = 0.0 , _MAXTIME = 0.0 , _MINTIME_PRODUCTIVITY = Nothing , _MEDIANTIME_PRODUCTIVITY = Nothing , _MAXTIME_PRODUCTIVITY = Nothing , _ALLTIMES = "" , _TRIALS = 1 , _COMPILER = "" , _COMPILE_FLAGS = "" , _RUNTIME_FLAGS = "" , _ENV_VARS = "" , _BENCH_VERSION = "" , _BENCH_FILE = "" , _UNAME = "" , _PROCESSOR = "" , _TOPOLOGY = "" , _GIT_BRANCH = "" , _GIT_HASH = "" , _GIT_DEPTH = -1 , _WHO = "" , _ETC_ISSUE = "" , _LSPCI = "" , _FULL_LOG = "" , _MEDIANTIME_ALLOCRATE = Nothing , _MEDIANTIME_MEMFOOTPRINT = Nothing }