-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | This library is useful for running a large amount of parallel tasks
-- that run on top of the IO monad, executing them in batches from a work
-- queue.
--
-- It has several features aimed at monitoring the progress of the tasks
-- and tries to be reasonably efficient (in space and time) for large
-- numbers (millions) of tasks. There is also caching support available
-- so that the results of running the task can be preserved between runs
-- of the same program, which is useful for doing scientific analysis.
@package parallel-tasks
@version 4.0.1.0
-- | Module with the internal workhorse for the library,
-- parallelTasks. You only need to use this module if you want to
-- alter ExtendedParTaskOpts, which allows you to redirect the
-- logging output or store information about task timing.
module Control.Concurrent.ParallelTasks.Base
-- | Advanced options controlling the behaviour of parallel tasks. The
-- m parameter is the monad that the tasks execute in, the
-- a parameter is the output value of the tasks, and the
-- b parameter is the type that is stored in the results array.
-- It is common that either b = a or b = Maybe a.
data ExtendedParTaskOpts m a
ExtendedParTaskOpts :: ParTaskOpts m a -> (forall r. (Handle -> IO r) -> IO r) -> (Double -> Int -> TaskOutcome -> IO (Maybe String)) -> ExtendedParTaskOpts m a
-- | Core options
coreOpts :: ExtendedParTaskOpts m a -> ParTaskOpts m a
-- | Function that supplies a handle to an inner block to write messages
-- to. To use stdout or stderr, you can just supply ($ stdout).
-- To write to a file, use withFile "blah" WriteMode.
printTo :: ExtendedParTaskOpts m a -> forall r. (Handle -> IO r) -> IO r
-- | Function used to store the outcome of the task. Arguments are (in
-- order):
--
--
-- - Time that the task took to complete (in seconds)
-- - Index at which to store the result (same as index of the task in
-- the original tasks list)
-- - The outcome of the task
--
--
-- If a String is returned, it is logged
afterFinish :: ExtendedParTaskOpts m a -> Double -> Int -> TaskOutcome -> IO (Maybe String)
-- | Options controlling the general running of parallel tasks. The
-- m parameter is the monad (which must be an instance of
-- MonadIO) in which the tasks will be run, and the a
-- parameter is the return value of the tasks.
data ParTaskOpts m a
ParTaskOpts :: SimpleParTaskOpts -> (forall r. m (m r -> IO r)) -> Maybe (Integer, a) -> ParTaskOpts m a
-- | The simple options.
simpleOpts :: ParTaskOpts m a -> SimpleParTaskOpts
-- | Function to use to run the m monad on top of IO. The returned
-- function is run at least once per worker, so should support being run
-- multiple times in parallel, and should clean up after itself. Suitable
-- instance for IO is simply return id.
wrapWorker :: ParTaskOpts m a -> forall r. m (m r -> IO r)
-- | When Just, the number of microseconds to let each task run for, before
-- assuming it will not complete, and killing it off. In the case that
-- the task is killed off, the second part of the pair is the value that
-- will be stored in the vector.
timeLimit :: ParTaskOpts m a -> Maybe (Integer, a)
data SimpleParTaskOpts
SimpleParTaskOpts :: Maybe Int -> Maybe Int -> Maybe Int -> SimpleParTaskOpts
-- | Number of worker threads to use. When this is Nothing, defaults to
-- number of capabilities (see numCapabilities)
numberWorkers :: SimpleParTaskOpts -> Maybe Int
-- | How often to print the progress of the tasks. E.g. when Just 100,
-- print a message roughly after the completion of every 100 tasks.
printProgress :: SimpleParTaskOpts -> Maybe Int
-- | How often to print an estimate of the estimated completion time. E.g.
-- when Just 100, print an estimate after the completion of every 100
-- tasks.
printEstimate :: SimpleParTaskOpts -> Maybe Int
-- | Value indicating whether a task successfully completed, or was killed
-- off for taking too long
data TaskOutcome
Success :: TaskOutcome
TookTooLong :: TaskOutcome
-- | Default extended options. Prints messages to stderr, and writes a
-- message when a task is killed
defaultExtendedParTaskOpts :: MonadIO m => ParTaskOpts m a -> ExtendedParTaskOpts m a
-- | Default parallel task options. The number of workers defaults to the
-- number of capabilities, with no time limit, and printing progress
-- every 50 tasks and an estimated time every 200
defaultParTaskOpts :: ParTaskOpts IO a
-- | Runs the given set of computations in parallel, and once they are all
-- finished, returns their results. Note that they won't all be run in
-- parallel from the start; rather, a set of workers will be spawned that
-- work their way through the (potentially large) set of jobs.
parallelTasks :: MonadIO m => ExtendedParTaskOpts m a -> [m a] -> m (IOVector a)
instance Typeable TookTooLongException
instance Show TookTooLongException
instance Exception TookTooLongException
-- | A module with a function to support caching the output of your
-- parallel tasks.
module Control.Concurrent.ParallelTasks.Cache
-- | A function that performs caching (between runs of the same tasks) to
-- help when running the same analysis task many times.
--
-- Imagine that you have a program where you want to some map-reduce
-- work. The mapping takes a long time, but you are working on the reduce
-- part. You don't want to have to redo the mapping every time you run
-- your program; you can use this cache functionality to save the results
-- of the mapping between program runs. Alternatively, you may want to
-- analyse only part of your data at first (for speed) then slowly expand
-- to the rest of the data set. Caching allows you to re-use the results
-- you have already calculated.
--
-- There are three main concepts in the type signature. input is
-- a type containing all the information needed to perform the task and
-- produce the output. This may involve file handles or functions or
-- whatever. The key type is generally smaller, and is the
-- smallest possible unique identifier for a corresponding output. This
-- might be the primary key of a database record, or an input filename.
-- (Obviously, in some cases, input = key; that makes life
-- easy). The output type is the output of the task.
--
-- In order to serialise the cache to a file, both key and
-- output have to be instances of Serialize. To allow
-- efficient unboxing of a vector, we require an Unbox instance
-- for key (contact me if you think this is too onerous), and to
-- ensure strict reading from the cache we require NFData for
-- output.
--
-- Remember that parMapCache doesn't know when your cache is
-- invalid (e.g. because you've altered the processing algorithm that you
-- are passing to this function), and will blindly use it if it finds it.
-- It's your responsibility to remove the cache when it becomes invalid.
parMapCache :: (MonadIO m, Ord key, Show key, Unbox key, NFData output, Serialize key, Serialize output) => ParTaskOpts m output -> FilePath -> (input -> key) -> (input -> m output) -> [input] -> m (IOVector output)
-- | The parallel functions in this module all use the same underlying
-- behaviour. You supply a list of tasks that you wish performed, either
-- in the IO monad or some other MonadIO m => m
-- monad. This library starts up a limited number of threads (by default,
-- one per capability, i.e. one per available processor/core) and then
-- executes the given work queue across the threads. This is better than
-- simply starting all the jobs in parallel and waiting, because in the
-- case where you have thousands or millions of jobs, but only say 16
-- cores, you do not want the overheads of switching between all those
-- contending threads.
--
-- The default behaviour of these functions is to put useful progress
-- reports onto stderr while it is running (number of tasks completed,
-- estimate of final completion time). The library is aimed at millions
-- of jobs taking several hours to complete; hence built-in output is
-- very useful for you, while you wait. You can customise this behaviour
-- by using the primed version of each of these functions and supplying a
-- customised options record.
--
-- The only difference between the functions parallelList,
-- parallelVec and parallelIOVec is the type of the
-- results returned. The closest to the underlying behaviour is
-- parallelIOVec'; the other functions are simply convenience
-- wrappers that freeze/convert the IOVector into a Vector or list.
--
-- Note: make sure you compile your program with the -threaded
-- -with-rtsopts=-N options (e.g. in the ghc-options field in your
-- cabal file), or else you will not get any parallel execution in your
-- program!
module Control.Concurrent.ParallelTasks
-- | Runs the list of tasks in parallel (a few at a time), and returns the
-- results in a list (with the corresponding order to the input list,
-- i.e. the first task produces the first result in the list.) See the
-- module description for more details.
--
-- Defined as: parallelList' defaultParTaskOpts
parallelList :: [IO a] -> IO [a]
-- | As parallelList, but returns the results in an immutable
-- Vector.
--
-- Defined as parallelVec' defaultParTaskOpts
parallelVec :: [IO a] -> IO (Vector a)
-- | As parallelList, but returns the results in a mutable IOVector.
--
-- Defined as parallelIOVec' defaultParTaskOpts
parallelIOVec :: [IO a] -> IO (IOVector a)
parallelList' :: MonadIO m => ParTaskOpts m a -> [m a] -> m [a]
parallelVec' :: MonadIO m => ParTaskOpts m a -> [m a] -> m (Vector a)
parallelIOVec' :: MonadIO m => ParTaskOpts m a -> [m a] -> m (IOVector a)
data SimpleParTaskOpts
SimpleParTaskOpts :: Maybe Int -> Maybe Int -> Maybe Int -> SimpleParTaskOpts
-- | Number of worker threads to use. When this is Nothing, defaults to
-- number of capabilities (see numCapabilities)
numberWorkers :: SimpleParTaskOpts -> Maybe Int
-- | How often to print the progress of the tasks. E.g. when Just 100,
-- print a message roughly after the completion of every 100 tasks.
printProgress :: SimpleParTaskOpts -> Maybe Int
-- | How often to print an estimate of the estimated completion time. E.g.
-- when Just 100, print an estimate after the completion of every 100
-- tasks.
printEstimate :: SimpleParTaskOpts -> Maybe Int
-- | Options controlling the general running of parallel tasks. The
-- m parameter is the monad (which must be an instance of
-- MonadIO) in which the tasks will be run, and the a
-- parameter is the return value of the tasks.
data ParTaskOpts m a
ParTaskOpts :: SimpleParTaskOpts -> (forall r. m (m r -> IO r)) -> Maybe (Integer, a) -> ParTaskOpts m a
-- | The simple options.
simpleOpts :: ParTaskOpts m a -> SimpleParTaskOpts
-- | Function to use to run the m monad on top of IO. The returned
-- function is run at least once per worker, so should support being run
-- multiple times in parallel, and should clean up after itself. Suitable
-- instance for IO is simply return id.
wrapWorker :: ParTaskOpts m a -> forall r. m (m r -> IO r)
-- | When Just, the number of microseconds to let each task run for, before
-- assuming it will not complete, and killing it off. In the case that
-- the task is killed off, the second part of the pair is the value that
-- will be stored in the vector.
timeLimit :: ParTaskOpts m a -> Maybe (Integer, a)
-- | Default parallel task options. The number of workers defaults to the
-- number of capabilities, with no time limit, and printing progress
-- every 50 tasks and an estimated time every 200
defaultParTaskOpts :: ParTaskOpts IO a