{- Copyright (C) 2010-2013 Dr. Alistair Ward This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -} {- | [@AUTHOR@] Dr. Alistair Ward [@DESCRIPTION@] * Contains the entry-point of the application. * Processes the command-line arguments. * Delegates the task to 'Squeeze.findBestFit', potentially on multiple threads. -} module Main(main) where import Control.Applicative((<*>), (<$>)) import Control.Arrow((&&&)) import qualified Control.Concurrent import qualified Control.Monad import qualified Control.Monad.Writer import qualified Data.List import qualified Data.Maybe import qualified Data.Ord import qualified Data.Version import qualified Distribution.Package import qualified Distribution.Text import qualified Distribution.Verbosity import qualified Distribution.Version import qualified Factory.Math.Probability import qualified Paths_squeeze as Paths --Either local stub, or package-instance autogenerated by 'Setup.hs build'. import qualified Squeeze.Control.Concurrent.DivideAndConquer as Control.Concurrent.DivideAndConquer import qualified Squeeze.Data.CommandOptions as Data.CommandOptions import qualified Squeeze.Data.File as Data.File import qualified Squeeze.Data.FileCombination as Data.FileCombination import qualified Squeeze.Squeeze as Squeeze import qualified Squeeze.Test.Performance as Test.Performance import qualified Squeeze.Test.QuickChecks as Test.QuickChecks import qualified System.Console.GetOpt as G import qualified System.Environment import qualified System.Exit import qualified System.FilePath import qualified System.IO import qualified System.IO.Error import qualified Text.Printf import qualified ToolShed.Defaultable import qualified ToolShed.SelfValidate import qualified ToolShed.System.TimeAction -- | Coerce the polymorphic data-type to concrete instance, in order that it's fields may be read from the command-line. type CommandOptions' = Data.CommandOptions.CommandOptions Rational --'Double' would also be a suitable type-parameter. -- | Used to thread user-defined command-line options, though the list of functions which implement them. type CommandLineAction = CommandOptions' -> IO CommandOptions' --Supplied as the type-argument to 'G.OptDescr'. -- | On failure to parse the specified string, returns an explanatory error. read' :: Read a => String -> String -> a read' errorMessage s = case reads s of [(x, "")] -> x _ -> error $ errorMessage ++ show s -- | On failure to parse a command-line argument, returns an explanatory error. readCommandArg :: Read a => String -> a readCommandArg = read' "failed to parse command-line argument " -- | Reads a bounded integral from the command-line, guarding against overflow. readBoundedIntegral :: Integral i => String -> i readBoundedIntegral s | fromIntegral bounded /= unbounded = error $ "integral value exceeds permissible bounds; " ++ show unbounded ++ "." | otherwise = bounded where unbounded = readCommandArg s bounded = fromInteger unbounded {- | * Recursively bisects the task, distributing the sub-tasks to 'Squeeze.findBestFit', to utilise the available CPU-cores. * Recombines the part solutions to finds the single monotonically increasing list of file-combinations matching the original criteria. * CAVEAT: whilst the ultimate solution is similar, regardless of the specified number of CPU-cores available, the path leading to it typically differs. -} distribute :: RealFrac ratio => Int -- ^ The number of CPU-cores available. -> Data.CommandOptions.CommandOptions ratio -> [Data.File.FileSizeAndPath] -- ^ The unordered list of files & sizes. -> IO [Data.FileCombination.FileCombination] distribute _ _ [] = return [] distribute 1 commandOptions fileSizeAndPathList = let solutionSizeBounds = Data.CommandOptions.solutionSizeBounds commandOptions in do Control.Monad.when (Data.CommandOptions.getVerbosity commandOptions == maxBound) . System.IO.hPutStrLn System.IO.stderr $ "Solution-size bounds " ++ show solutionSizeBounds return $ Squeeze.findBestFit solutionSizeBounds fileSizeAndPathList --Single-threaded. distribute numCapabilities commandOptions fileSizeAndPathList = let distribute' = distribute $ numCapabilities `div` 2 --Partially apply. (selectedFileSizeAndPath, remainingFileSizeAndPaths) = Data.List.minimumBy (Data.Ord.comparing Data.File.getSize) &&& filter (/= selectedFileSizeAndPath) $ fileSizeAndPathList --Balance the load on the two threads, by selecting the smallest file rather than the largest; this makes a big difference ! in do Control.Monad.when (Data.CommandOptions.getVerbosity commandOptions == maxBound) . System.IO.hPutStrLn System.IO.stderr $ show numCapabilities ++ " CPU-cores => bisecting task into those, with & without, " ++ show selectedFileSizeAndPath fileCombinationsExcludingSelected <- distribute' commandOptions remainingFileSizeAndPaths --Recurse using all the original space, but a reduced list of files. fileCombinationsIncludingSelected <- map ( Data.FileCombination.prepend selectedFileSizeAndPath --Prepend to all file-combinations, the file we previously removed. ) <$> distribute' ( commandOptions { Data.CommandOptions.getMaximumBytes = Data.CommandOptions.getMaximumBytes commandOptions - Data.File.getSize selectedFileSizeAndPath } --Reduce the maximum size; there's no need to account for the minimum, since this is defined as a ratio of the maximum. ) remainingFileSizeAndPaths --Recurse. Control.Concurrent.DivideAndConquer.divideAndConquer Data.FileCombination.risingMergeByAggregateFileSize fileCombinationsExcludingSelected fileCombinationsIncludingSelected --Merge the part-solutions. {- | * Parses the command-line arguments, to determine 'Data.CommandOptions.CommandOptions', which over-ride the default value. * Any arguments which follow known 'Data.CommandOptions.CommandOptions', are interpreted as file-names to consider when attempting to find a suitable fit for the specified space-constraints. * If the specified file-name is /-/, then the actual file-names are read from /standard input/, to augment any other non-options specified. * Delegates the donkey-work to 'distribute'. Because this may take a long time, it prints the results in real time, rather than batching until the optimum has been determined. -} main :: IO () main = do progName <- System.Environment.getProgName let defaultCommandOptions :: CommandOptions' defaultCommandOptions = ToolShed.Defaultable.defaultValue defaultRandomSeed :: Int defaultRandomSeed = 0 optDescrList :: [G.OptDescr CommandLineAction] optDescrList = [ -- String [String] (G.ArgDescr CommandLineAction) String G.Option "?" ["help"] (G.NoArg $ const printUsage) "Display this help, & then exit.", G.Option "" ["verbosity"] ( setVerbosity `G.ReqArg` show [minBound :: Distribution.Verbosity.Verbosity .. maxBound] ) ("Define the log-level; default '" ++ show (Data.CommandOptions.getVerbosity defaultCommandOptions) ++ "'. CAVEAT: to be effective, it must precede other options." ), G.Option "v" ["version"] (G.NoArg $ const printVersion) "Print version-information, & then exit.", G.Option "z" ["includeEmpty"] (setIncludeEmpty `G.OptArg` "") ("Whether empty files & directories may be included in any solution; default '" ++ show (Data.CommandOptions.getIncludeEmpty defaultCommandOptions) ++ "'."), G.Option "M" ["maximumBytes"] (setMaximumBytes `G.ReqArg` "") ("The maximum bytes of available space; default '" ++ show (Data.CommandOptions.getMaximumBytes defaultCommandOptions) ++ "'."), G.Option "m" ["minimumUsageRatio"] (setMinimumUsageRatio `G.ReqArg` "") ("The minimum acceptable space usage-ratio; default '" ++ show (realToFrac $ Data.CommandOptions.getMinimumUsageRatio defaultCommandOptions :: Double) ++ "'."), G.Option "q" ["runQuickChecks"] (G.NoArg runQuickChecks) "Run Quick-checks using arbitrary data, & then exit.", G.Option "r" ["randomSeed"] (G.OptArg setRandomSeed "") ("Seed the random number-generator with the specified integer, to produce a repeatable pseudo-random sequence as required for performance-testing. If this option is unspecified then the seed is unpredictable, but if only its argument is unspecified then the seed defaults to '" ++ show defaultRandomSeed ++ "'. CAVEAT: to be effective, it must precede either 'testPerformanceContinuous' or 'testPerformanceDiscrete'."), G.Option "" ["testPerformanceContinuous"] ( testPerformanceContinuous `G.ReqArg` "(, )" ) "Measure the CPU-seconds required for the specified number of randomly generated virtual files, the size of which conform to the specified continuous probability-distribution; & then exit.", G.Option "" ["testPerformanceDiscrete"] ( testPerformanceDiscrete `G.ReqArg` "(, )" ) "Measure the CPU-seconds required for the specified number of randomly generated virtual files, the size of which conform to the specified discrete probability-distribution; & then exit." ] where setMaximumBytes, setMinimumUsageRatio, setVerbosity, testPerformanceContinuous, testPerformanceDiscrete :: String -> CommandLineAction setMaximumBytes arg commandOptions = return {-to IO-monad-} commandOptions { Data.CommandOptions.getMaximumBytes = readCommandArg arg } setMinimumUsageRatio arg commandOptions = return {-to IO-monad-} commandOptions { Data.CommandOptions.getMinimumUsageRatio = realToFrac (readCommandArg arg :: Double) } setVerbosity arg commandOptions = return {-to IO-monad-} commandOptions { Data.CommandOptions.getVerbosity = readCommandArg arg } testPerformanceContinuous arg commandOptions | not $ ToolShed.SelfValidate.isValid commandOptions = fail $ ToolShed.SelfValidate.getFirstError commandOptions | otherwise = do ToolShed.System.TimeAction.printCPUSeconds $ Test.Performance.run commandOptions fileCount probabilityDistribution >>= mapM_ print {-force evaluation-} System.Exit.exitWith System.Exit.ExitSuccess where fileCount :: Int probabilityDistribution :: Factory.Math.Probability.ContinuousDistribution Double (fileCount, probabilityDistribution) = readCommandArg arg testPerformanceDiscrete arg commandOptions | not $ ToolShed.SelfValidate.isValid commandOptions = fail $ ToolShed.SelfValidate.getFirstError commandOptions | otherwise = do ToolShed.System.TimeAction.printCPUSeconds $ Test.Performance.run commandOptions fileCount probabilityDistribution >>= mapM_ print {-force evaluation-} System.Exit.exitWith System.Exit.ExitSuccess where fileCount :: Int probabilityDistribution :: Factory.Math.Probability.DiscreteDistribution Double (fileCount, probabilityDistribution) = readCommandArg arg setIncludeEmpty, setRandomSeed :: Maybe String -> CommandLineAction setIncludeEmpty arg commandOptions = return {-to IO-monad-} commandOptions { Data.CommandOptions.getIncludeEmpty = Data.Maybe.maybe True readCommandArg arg } setRandomSeed arg commandOptions = return {-to IO-monad-} commandOptions { Data.CommandOptions.getMaybeRandomSeed = Just $ Data.Maybe.maybe defaultRandomSeed readBoundedIntegral arg } runQuickChecks :: (Num f, Ord f, Show f) => Data.CommandOptions.CommandOptions f -> IO (Data.CommandOptions.CommandOptions f) runQuickChecks commandOptions | not $ ToolShed.SelfValidate.isValid commandOptions = fail $ ToolShed.SelfValidate.getFirstError commandOptions | otherwise = Test.QuickChecks.run >> System.Exit.exitWith System.Exit.ExitSuccess printVersion, printUsage :: IO (Data.CommandOptions.CommandOptions f) printVersion = Text.Printf.printf "%s\n\n%s %s.\n%s.\n%s.\n%s %s.\n" packageName "Copyright (C) 2010-2013" author "This program comes with ABSOLUTELY NO WARRANTY" "This is free software, & you are welcome to redistribute it under certain conditions" "Written by" author >> System.Exit.exitWith System.Exit.ExitSuccess where packageIdentifier :: Distribution.Package.PackageIdentifier packageIdentifier = Distribution.Package.PackageIdentifier { Distribution.Package.pkgName = Distribution.Package.PackageName progName, --CAVEAT: coincidentally. Distribution.Package.pkgVersion = Distribution.Version.Version (Data.Version.versionBranch Paths.version) [] } packageName, author :: String packageName = Distribution.Text.display packageIdentifier author = "Dr. Alistair Ward" printUsage = Text.Printf.hPrintf System.IO.stderr "Usage:\t%s %s\n\nEBNF argument-format:\n\t%-22s = %s;\n\t%-22s = %s;\n\t%-22s = %s;\n\t%-22s = %s;\n\t%-22s = %s;\n\t%-22s = %s;\n\t%-22s = %s;\n\t%-22s = %s;\n\t%-22s = %s;\n\nE.g.\n\t%s\n\t%s\n\t%s\n" ( G.usageInfo progName optDescrList ) "[ ...]" "Bool" "\"True\" | \"False\"\t(* case-sensitive *)" "ContinuousDistribution" "LogNormalDistribution location scale^2" "DiscreteDistribution" "PoissonDistribution lambda" "File-path" ( "File-name ('" ++ [System.FilePath.pathSeparator] ++ "' File-name)*" ) "Float" "Int ('.' Int)?" "Int" "[0-9]+" "lambda" "Float\t(* the mean & variance of the distribution *)" "location" "Float\t(* the mean of the log of the distribution *)" "scale^2" "Float\t(* the variance of the log of the distribution *)" ( progName ++ " --verbosity=Verbose -M 700000000 *.ogg +RTS -N\t#Find the best-fit for the globbed file-names, into the space available on a CD, using multiple CPU-cores where available." ) ( progName ++ " -r --testPerformanceContinuous='(100, LogNormalDistribution 19.4 0.6)'\t#Test performance." ) ( progName ++ " -r --testPerformanceDiscrete='(100, PoissonDistribution 4e8)'\t#Test performance." ) >> System.Exit.exitWith System.Exit.ExitSuccess args <- System.Environment.getArgs -- G.getOpt :: G.ArgOrder CommandLineAction -> [G.OptDescr CommandLineAction] -> [String] -> ([CommandLineAction], [String], [String]) case G.getOpt G.RequireOrder optDescrList args of (commandLineActions, nonOptions, [{-errors-}]) -> do commandOptions <- Data.List.foldl' (>>=) ( return {-to IO-monad-} ToolShed.Defaultable.defaultValue ) {-transform using CommandLineAction-mutators-} commandLineActions --ie: do o1 <- CommandLineAction[0] commandOptions[0]; o2 <- CommandLineAction[1] o1; ... if not $ ToolShed.SelfValidate.isValid commandOptions then fail $ ToolShed.SelfValidate.getFirstError commandOptions else if null nonOptions then fail "zero file-paths specified" else let standardInputProxy = "-" in do filePaths <- if standardInputProxy `elem` nonOptions then let getFilePaths :: IO Data.File.FilePathList getFilePaths = do eof <- System.IO.isEOF if eof then return {-to IO-monad-} [] else {-more to read-} (:) <$> getLine <*> getFilePaths {-recurse-} in do filePaths <- (filter (/= standardInputProxy) nonOptions ++) <$> getFilePaths if null filePaths then fail "zero file-paths" else return filePaths else {-real fileNames-} return {-to IO-monad-} nonOptions (acceptedFileSizeAndPathList, logFile) <- Control.Monad.Writer.runWriter . Data.File.selectSuitableFileSizes (Data.CommandOptions.isWithinSizeBounds commandOptions) <$> Data.File.findSizes (Data.List.nub filePaths) Control.Monad.when (Data.CommandOptions.getVerbosity commandOptions > minBound) . System.IO.hPutStrLn System.IO.stderr $ Data.List.intercalate "\n" logFile if null acceptedFileSizeAndPathList then fail "there are zero suitable files" else let aggregateSize = Data.File.aggregateSize acceptedFileSizeAndPathList minimumBytes = Data.CommandOptions.deriveMinimumBytes commandOptions in if aggregateSize < minimumBytes then fail $ "the aggregate size of all suitable files, is insufficient; " ++ show (aggregateSize, minimumBytes) else Control.Concurrent.getNumCapabilities >>= ( \numCapabilities -> distribute numCapabilities commandOptions acceptedFileSizeAndPathList ) >>= mapM_ print {-lazy evaluation-} (_, _, errors) -> System.IO.Error.ioError . System.IO.Error.userError $ concatMap init {-chop-} errors