{-
Copyright (C) 2010-2013 Dr. Alistair Ward
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
-}
{- |
[@AUTHOR@] Dr. Alistair Ward
[@DESCRIPTION@]
* Contains the entry-point of the application.
* Processes the command-line arguments.
* Delegates the task to 'Squeeze.findBestFit', potentially on multiple threads.
-}
module Main(main) where
import Control.Applicative((<*>), (<$>))
import Control.Arrow((&&&))
import qualified Control.Concurrent
import qualified Control.Monad
import qualified Control.Monad.Writer
import qualified Data.List
import qualified Data.Maybe
import qualified Data.Ord
import qualified Data.Version
import qualified Distribution.Package
import qualified Distribution.Text
import qualified Distribution.Verbosity
import qualified Distribution.Version
import qualified Factory.Math.Probability
import qualified Paths_squeeze as Paths --Either local stub, or package-instance autogenerated by 'Setup.hs build'.
import qualified Squeeze.Control.Concurrent.DivideAndConquer as Control.Concurrent.DivideAndConquer
import qualified Squeeze.Data.CommandOptions as Data.CommandOptions
import qualified Squeeze.Data.File as Data.File
import qualified Squeeze.Data.FileCombination as Data.FileCombination
import qualified Squeeze.Squeeze as Squeeze
import qualified Squeeze.Test.Performance as Test.Performance
import qualified Squeeze.Test.QuickChecks as Test.QuickChecks
import qualified System.Console.GetOpt as G
import qualified System.Environment
import qualified System.Exit
import qualified System.FilePath
import qualified System.IO
import qualified System.IO.Error
import qualified Text.Printf
import qualified ToolShed.Defaultable
import qualified ToolShed.SelfValidate
import qualified ToolShed.System.TimeAction
-- | Coerce the polymorphic data-type to concrete instance, in order that it's fields may be read from the command-line.
type CommandOptions' = Data.CommandOptions.CommandOptions Rational --'Double' would also be a suitable type-parameter.
-- | Used to thread user-defined command-line options, though the list of functions which implement them.
type CommandLineAction = CommandOptions' -> IO CommandOptions' --Supplied as the type-argument to 'G.OptDescr'.
-- | On failure to parse the specified string, returns an explanatory error.
read' :: Read a => String -> String -> a
read' errorMessage s = case reads s of
[(x, "")] -> x
_ -> error $ errorMessage ++ show s
-- | On failure to parse a command-line argument, returns an explanatory error.
readCommandArg :: Read a => String -> a
readCommandArg = read' "failed to parse command-line argument "
-- | Reads a bounded integral from the command-line, guarding against overflow.
readBoundedIntegral :: Integral i => String -> i
readBoundedIntegral s
| fromIntegral bounded /= unbounded = error $ "integral value exceeds permissible bounds; " ++ show unbounded ++ "."
| otherwise = bounded
where
unbounded = readCommandArg s
bounded = fromInteger unbounded
{- |
* Recursively bisects the task, distributing the sub-tasks to 'Squeeze.findBestFit', to utilise the available CPU-cores.
* Recombines the part solutions to finds the single monotonically increasing list of file-combinations matching the original criteria.
* CAVEAT: whilst the ultimate solution is similar, regardless of the specified number of CPU-cores available, the path leading to it typically differs.
-}
distribute
:: RealFrac ratio
=> Int -- ^ The number of CPU-cores available.
-> Data.CommandOptions.CommandOptions ratio
-> [Data.File.FileSizeAndPath] -- ^ The unordered list of files & sizes.
-> IO [Data.FileCombination.FileCombination]
distribute _ _ [] = return []
distribute 1 commandOptions fileSizeAndPathList = let
solutionSizeBounds = Data.CommandOptions.solutionSizeBounds commandOptions
in do
Control.Monad.when (Data.CommandOptions.getVerbosity commandOptions == maxBound) . System.IO.hPutStrLn System.IO.stderr $ "Solution-size bounds " ++ show solutionSizeBounds
return $ Squeeze.findBestFit solutionSizeBounds fileSizeAndPathList --Single-threaded.
distribute numCapabilities commandOptions fileSizeAndPathList = let
distribute' = distribute $ numCapabilities `div` 2 --Partially apply.
(selectedFileSizeAndPath, remainingFileSizeAndPaths) = Data.List.minimumBy (Data.Ord.comparing Data.File.getSize) &&& filter (/= selectedFileSizeAndPath) $ fileSizeAndPathList --Balance the load on the two threads, by selecting the smallest file rather than the largest; this makes a big difference !
in do
Control.Monad.when (Data.CommandOptions.getVerbosity commandOptions == maxBound) . System.IO.hPutStrLn System.IO.stderr $ show numCapabilities ++ " CPU-cores => bisecting task into those, with & without, " ++ show selectedFileSizeAndPath
fileCombinationsExcludingSelected <- distribute' commandOptions remainingFileSizeAndPaths --Recurse using all the original space, but a reduced list of files.
fileCombinationsIncludingSelected <- map (
Data.FileCombination.prepend selectedFileSizeAndPath --Prepend to all file-combinations, the file we previously removed.
) <$> distribute' (
commandOptions {
Data.CommandOptions.getMaximumBytes = Data.CommandOptions.getMaximumBytes commandOptions - Data.File.getSize selectedFileSizeAndPath
} --Reduce the maximum size; there's no need to account for the minimum, since this is defined as a ratio of the maximum.
) remainingFileSizeAndPaths --Recurse.
Control.Concurrent.DivideAndConquer.divideAndConquer Data.FileCombination.risingMergeByAggregateFileSize fileCombinationsExcludingSelected fileCombinationsIncludingSelected --Merge the part-solutions.
{- |
* Parses the command-line arguments, to determine 'Data.CommandOptions.CommandOptions', which over-ride the default value.
* Any arguments which follow known 'Data.CommandOptions.CommandOptions',
are interpreted as file-names to consider when attempting to find a suitable fit for the specified space-constraints.
* If the specified file-name is /-/, then the actual file-names are read from /standard input/, to augment any other non-options specified.
* Delegates the donkey-work to 'distribute'.
Because this may take a long time, it prints the results in real time, rather than batching until the optimum has been determined.
-}
main :: IO ()
main = do
progName <- System.Environment.getProgName
let
defaultCommandOptions :: CommandOptions'
defaultCommandOptions = ToolShed.Defaultable.defaultValue
defaultRandomSeed :: Int
defaultRandomSeed = 0
optDescrList :: [G.OptDescr CommandLineAction]
optDescrList = [
-- String [String] (G.ArgDescr CommandLineAction) String
G.Option "?" ["help"] (G.NoArg $ const printUsage) "Display this help, & then exit.",
G.Option "" ["verbosity"] (
setVerbosity `G.ReqArg` show [minBound :: Distribution.Verbosity.Verbosity .. maxBound]
) ("Define the log-level; default '" ++ show (Data.CommandOptions.getVerbosity defaultCommandOptions) ++ "'. CAVEAT: to be effective, it must precede other options."
),
G.Option "v" ["version"] (G.NoArg $ const printVersion) "Print version-information, & then exit.",
G.Option "z" ["includeEmpty"] (setIncludeEmpty `G.OptArg` "") ("Whether empty files & directories may be included in any solution; default '" ++ show (Data.CommandOptions.getIncludeEmpty defaultCommandOptions) ++ "'."),
G.Option "M" ["maximumBytes"] (setMaximumBytes `G.ReqArg` "") ("The maximum bytes of available space; default '" ++ show (Data.CommandOptions.getMaximumBytes defaultCommandOptions) ++ "'."),
G.Option "m" ["minimumUsageRatio"] (setMinimumUsageRatio `G.ReqArg` "") ("The minimum acceptable space usage-ratio; default '" ++ show (realToFrac $ Data.CommandOptions.getMinimumUsageRatio defaultCommandOptions :: Double) ++ "'."),
G.Option "q" ["runQuickChecks"] (G.NoArg runQuickChecks) "Run Quick-checks using arbitrary data, & then exit.",
G.Option "r" ["randomSeed"] (G.OptArg setRandomSeed "") ("Seed the random number-generator with the specified integer, to produce a repeatable pseudo-random sequence as required for performance-testing. If this option is unspecified then the seed is unpredictable, but if only its argument is unspecified then the seed defaults to '" ++ show defaultRandomSeed ++ "'. CAVEAT: to be effective, it must precede either 'testPerformanceContinuous' or 'testPerformanceDiscrete'."),
G.Option "" ["testPerformanceContinuous"] (
testPerformanceContinuous `G.ReqArg` "(, )"
) "Measure the CPU-seconds required for the specified number of randomly generated virtual files, the size of which conform to the specified continuous probability-distribution; & then exit.",
G.Option "" ["testPerformanceDiscrete"] (
testPerformanceDiscrete `G.ReqArg` "(, )"
) "Measure the CPU-seconds required for the specified number of randomly generated virtual files, the size of which conform to the specified discrete probability-distribution; & then exit."
] where
setMaximumBytes, setMinimumUsageRatio, setVerbosity, testPerformanceContinuous, testPerformanceDiscrete :: String -> CommandLineAction
setMaximumBytes arg commandOptions = return {-to IO-monad-} commandOptions { Data.CommandOptions.getMaximumBytes = readCommandArg arg }
setMinimumUsageRatio arg commandOptions = return {-to IO-monad-} commandOptions { Data.CommandOptions.getMinimumUsageRatio = realToFrac (readCommandArg arg :: Double) }
setVerbosity arg commandOptions = return {-to IO-monad-} commandOptions { Data.CommandOptions.getVerbosity = readCommandArg arg }
testPerformanceContinuous arg commandOptions
| not $ ToolShed.SelfValidate.isValid commandOptions = fail $ ToolShed.SelfValidate.getFirstError commandOptions
| otherwise = do
ToolShed.System.TimeAction.printCPUSeconds $ Test.Performance.run commandOptions fileCount probabilityDistribution >>= mapM_ print {-force evaluation-}
System.Exit.exitWith System.Exit.ExitSuccess
where
fileCount :: Int
probabilityDistribution :: Factory.Math.Probability.ContinuousDistribution Double
(fileCount, probabilityDistribution) = readCommandArg arg
testPerformanceDiscrete arg commandOptions
| not $ ToolShed.SelfValidate.isValid commandOptions = fail $ ToolShed.SelfValidate.getFirstError commandOptions
| otherwise = do
ToolShed.System.TimeAction.printCPUSeconds $ Test.Performance.run commandOptions fileCount probabilityDistribution >>= mapM_ print {-force evaluation-}
System.Exit.exitWith System.Exit.ExitSuccess
where
fileCount :: Int
probabilityDistribution :: Factory.Math.Probability.DiscreteDistribution Double
(fileCount, probabilityDistribution) = readCommandArg arg
setIncludeEmpty, setRandomSeed :: Maybe String -> CommandLineAction
setIncludeEmpty arg commandOptions = return {-to IO-monad-} commandOptions { Data.CommandOptions.getIncludeEmpty = Data.Maybe.maybe True readCommandArg arg }
setRandomSeed arg commandOptions = return {-to IO-monad-} commandOptions { Data.CommandOptions.getMaybeRandomSeed = Just $ Data.Maybe.maybe defaultRandomSeed readBoundedIntegral arg }
runQuickChecks :: (Num f, Ord f, Show f) => Data.CommandOptions.CommandOptions f -> IO (Data.CommandOptions.CommandOptions f)
runQuickChecks commandOptions
| not $ ToolShed.SelfValidate.isValid commandOptions = fail $ ToolShed.SelfValidate.getFirstError commandOptions
| otherwise = Test.QuickChecks.run >> System.Exit.exitWith System.Exit.ExitSuccess
printVersion, printUsage :: IO (Data.CommandOptions.CommandOptions f)
printVersion = Text.Printf.printf "%s\n\n%s %s.\n%s.\n%s.\n%s %s.\n" packageName "Copyright (C) 2010-2013" author "This program comes with ABSOLUTELY NO WARRANTY" "This is free software, & you are welcome to redistribute it under certain conditions" "Written by" author >> System.Exit.exitWith System.Exit.ExitSuccess where
packageIdentifier :: Distribution.Package.PackageIdentifier
packageIdentifier = Distribution.Package.PackageIdentifier {
Distribution.Package.pkgName = Distribution.Package.PackageName progName, --CAVEAT: coincidentally.
Distribution.Package.pkgVersion = Distribution.Version.Version (Data.Version.versionBranch Paths.version) []
}
packageName, author :: String
packageName = Distribution.Text.display packageIdentifier
author = "Dr. Alistair Ward"
printUsage = Text.Printf.hPrintf System.IO.stderr "Usage:\t%s %s\n\nEBNF argument-format:\n\t%-22s = %s;\n\t%-22s = %s;\n\t%-22s = %s;\n\t%-22s = %s;\n\t%-22s = %s;\n\t%-22s = %s;\n\t%-22s = %s;\n\t%-22s = %s;\n\t%-22s = %s;\n\nE.g.\n\t%s\n\t%s\n\t%s\n" (
G.usageInfo progName optDescrList
) "[ ...]" "Bool" "\"True\" | \"False\"\t(* case-sensitive *)" "ContinuousDistribution" "LogNormalDistribution location scale^2" "DiscreteDistribution" "PoissonDistribution lambda" "File-path" (
"File-name ('" ++ [System.FilePath.pathSeparator] ++ "' File-name)*"
) "Float" "Int ('.' Int)?" "Int" "[0-9]+" "lambda" "Float\t(* the mean & variance of the distribution *)" "location" "Float\t(* the mean of the log of the distribution *)" "scale^2" "Float\t(* the variance of the log of the distribution *)" (
progName ++ " --verbosity=Verbose -M 700000000 *.ogg +RTS -N\t#Find the best-fit for the globbed file-names, into the space available on a CD, using multiple CPU-cores where available."
) (
progName ++ " -r --testPerformanceContinuous='(100, LogNormalDistribution 19.4 0.6)'\t#Test performance."
) (
progName ++ " -r --testPerformanceDiscrete='(100, PoissonDistribution 4e8)'\t#Test performance."
) >> System.Exit.exitWith System.Exit.ExitSuccess
args <- System.Environment.getArgs
-- G.getOpt :: G.ArgOrder CommandLineAction -> [G.OptDescr CommandLineAction] -> [String] -> ([CommandLineAction], [String], [String])
case G.getOpt G.RequireOrder optDescrList args of
(commandLineActions, nonOptions, [{-errors-}]) -> do
commandOptions <- Data.List.foldl' (>>=) (
return {-to IO-monad-} ToolShed.Defaultable.defaultValue
) {-transform using CommandLineAction-mutators-} commandLineActions --ie: do o1 <- CommandLineAction[0] commandOptions[0]; o2 <- CommandLineAction[1] o1; ...
if not $ ToolShed.SelfValidate.isValid commandOptions
then fail $ ToolShed.SelfValidate.getFirstError commandOptions
else if null nonOptions
then fail "zero file-paths specified"
else let
standardInputProxy = "-"
in do
filePaths <- if standardInputProxy `elem` nonOptions
then let
getFilePaths :: IO Data.File.FilePathList
getFilePaths = do
eof <- System.IO.isEOF
if eof
then return {-to IO-monad-} []
else {-more to read-} (:) <$> getLine <*> getFilePaths {-recurse-}
in do
filePaths <- (filter (/= standardInputProxy) nonOptions ++) <$> getFilePaths
if null filePaths
then fail "zero file-paths"
else return filePaths
else {-real fileNames-} return {-to IO-monad-} nonOptions
(acceptedFileSizeAndPathList, logFile) <- Control.Monad.Writer.runWriter . Data.File.selectSuitableFileSizes (Data.CommandOptions.isWithinSizeBounds commandOptions) <$> Data.File.findSizes (Data.List.nub filePaths)
Control.Monad.when (Data.CommandOptions.getVerbosity commandOptions > minBound) . System.IO.hPutStrLn System.IO.stderr $ Data.List.intercalate "\n" logFile
if null acceptedFileSizeAndPathList
then fail "there are zero suitable files"
else let
aggregateSize = Data.File.aggregateSize acceptedFileSizeAndPathList
minimumBytes = Data.CommandOptions.deriveMinimumBytes commandOptions
in if aggregateSize < minimumBytes
then fail $ "the aggregate size of all suitable files, is insufficient; " ++ show (aggregateSize, minimumBytes)
else Control.Concurrent.getNumCapabilities >>= (
\numCapabilities -> distribute numCapabilities commandOptions acceptedFileSizeAndPathList
) >>= mapM_ print {-lazy evaluation-}
(_, _, errors) -> System.IO.Error.ioError . System.IO.Error.userError $ concatMap init {-chop-} errors