-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | This library is useful for running a large amount of parallel tasks
--   that run on top of the IO monad, executing them in batches from a work
--   queue.
--   
--   It has several features aimed at monitoring the progress of the tasks
--   and tries to be reasonably efficient (in space and time) for large
--   numbers (millions) of tasks. There is also caching support available
--   so that the results of running the task can be preserved between runs
--   of the same program, which is useful for doing scientific analysis.
@package parallel-tasks
@version 4.0.1.0


-- | Module with the internal workhorse for the library,
--   <a>parallelTasks</a>. You only need to use this module if you want to
--   alter <a>ExtendedParTaskOpts</a>, which allows you to redirect the
--   logging output or store information about task timing.
module Control.Concurrent.ParallelTasks.Base

-- | Advanced options controlling the behaviour of parallel tasks. The
--   <tt>m</tt> parameter is the monad that the tasks execute in, the
--   <tt>a</tt> parameter is the output value of the tasks, and the
--   <tt>b</tt> parameter is the type that is stored in the results array.
--   It is common that either <tt>b = a</tt> or <tt>b = Maybe a</tt>.
data ExtendedParTaskOpts m a
ExtendedParTaskOpts :: ParTaskOpts m a -> (forall r. (Handle -> IO r) -> IO r) -> (Double -> Int -> TaskOutcome -> IO (Maybe String)) -> ExtendedParTaskOpts m a

-- | Core options
coreOpts :: ExtendedParTaskOpts m a -> ParTaskOpts m a

-- | Function that supplies a handle to an inner block to write messages
--   to. To use stdout or stderr, you can just supply <tt>($ stdout)</tt>.
--   To write to a file, use <tt>withFile "blah" WriteMode</tt>.
printTo :: ExtendedParTaskOpts m a -> forall r. (Handle -> IO r) -> IO r

-- | Function used to store the outcome of the task. Arguments are (in
--   order):
--   
--   <ul>
--   <li>Time that the task took to complete (in seconds)</li>
--   <li>Index at which to store the result (same as index of the task in
--   the original tasks list)</li>
--   <li>The outcome of the task</li>
--   </ul>
--   
--   If a String is returned, it is logged
afterFinish :: ExtendedParTaskOpts m a -> Double -> Int -> TaskOutcome -> IO (Maybe String)

-- | Options controlling the general running of parallel tasks. The
--   <tt>m</tt> parameter is the monad (which must be an instance of
--   <a>MonadIO</a>) in which the tasks will be run, and the <tt>a</tt>
--   parameter is the return value of the tasks.
data ParTaskOpts m a
ParTaskOpts :: SimpleParTaskOpts -> (forall r. m (m r -> IO r)) -> Maybe (Integer, a) -> ParTaskOpts m a

-- | The simple options.
simpleOpts :: ParTaskOpts m a -> SimpleParTaskOpts

-- | Function to use to run the <tt>m</tt> monad on top of IO. The returned
--   function is run at least once per worker, so should support being run
--   multiple times in parallel, and should clean up after itself. Suitable
--   instance for IO is simply <tt>return id</tt>.
wrapWorker :: ParTaskOpts m a -> forall r. m (m r -> IO r)

-- | When Just, the number of microseconds to let each task run for, before
--   assuming it will not complete, and killing it off. In the case that
--   the task is killed off, the second part of the pair is the value that
--   will be stored in the vector.
timeLimit :: ParTaskOpts m a -> Maybe (Integer, a)
data SimpleParTaskOpts
SimpleParTaskOpts :: Maybe Int -> Maybe Int -> Maybe Int -> SimpleParTaskOpts

-- | Number of worker threads to use. When this is Nothing, defaults to
--   number of capabilities (see <tt>numCapabilities</tt>)
numberWorkers :: SimpleParTaskOpts -> Maybe Int

-- | How often to print the progress of the tasks. E.g. when Just 100,
--   print a message roughly after the completion of every 100 tasks.
printProgress :: SimpleParTaskOpts -> Maybe Int

-- | How often to print an estimate of the estimated completion time. E.g.
--   when Just 100, print an estimate after the completion of every 100
--   tasks.
printEstimate :: SimpleParTaskOpts -> Maybe Int

-- | Value indicating whether a task successfully completed, or was killed
--   off for taking too long
data TaskOutcome
Success :: TaskOutcome
TookTooLong :: TaskOutcome

-- | Default extended options. Prints messages to stderr, and writes a
--   message when a task is killed
defaultExtendedParTaskOpts :: MonadIO m => ParTaskOpts m a -> ExtendedParTaskOpts m a

-- | Default parallel task options. The number of workers defaults to the
--   number of capabilities, with no time limit, and printing progress
--   every 50 tasks and an estimated time every 200
defaultParTaskOpts :: ParTaskOpts IO a

-- | Runs the given set of computations in parallel, and once they are all
--   finished, returns their results. Note that they won't all be run in
--   parallel from the start; rather, a set of workers will be spawned that
--   work their way through the (potentially large) set of jobs.
parallelTasks :: MonadIO m => ExtendedParTaskOpts m a -> [m a] -> m (IOVector a)
instance Typeable TookTooLongException
instance Show TookTooLongException
instance Exception TookTooLongException


-- | A module with a function to support caching the output of your
--   parallel tasks.
module Control.Concurrent.ParallelTasks.Cache

-- | A function that performs caching (between runs of the same tasks) to
--   help when running the same analysis task many times.
--   
--   Imagine that you have a program where you want to some map-reduce
--   work. The mapping takes a long time, but you are working on the reduce
--   part. You don't want to have to redo the mapping every time you run
--   your program; you can use this cache functionality to save the results
--   of the mapping between program runs. Alternatively, you may want to
--   analyse only part of your data at first (for speed) then slowly expand
--   to the rest of the data set. Caching allows you to re-use the results
--   you have already calculated.
--   
--   There are three main concepts in the type signature. <tt>input</tt> is
--   a type containing all the information needed to perform the task and
--   produce the output. This may involve file handles or functions or
--   whatever. The <tt>key</tt> type is generally smaller, and is the
--   smallest possible unique identifier for a corresponding output. This
--   might be the primary key of a database record, or an input filename.
--   (Obviously, in some cases, <tt>input = key</tt>; that makes life
--   easy). The <tt>output</tt> type is the output of the task.
--   
--   In order to serialise the cache to a file, both <tt>key</tt> and
--   <tt>output</tt> have to be instances of <tt>Serialize</tt>. To allow
--   efficient unboxing of a vector, we require an <tt>Unbox</tt> instance
--   for <tt>key</tt> (contact me if you think this is too onerous), and to
--   ensure strict reading from the cache we require <tt>NFData</tt> for
--   output.
--   
--   Remember that <tt>parMapCache</tt> doesn't know when your cache is
--   invalid (e.g. because you've altered the processing algorithm that you
--   are passing to this function), and will blindly use it if it finds it.
--   It's your responsibility to remove the cache when it becomes invalid.
parMapCache :: (MonadIO m, Ord key, Show key, Unbox key, NFData output, Serialize key, Serialize output) => ParTaskOpts m output -> FilePath -> (input -> key) -> (input -> m output) -> [input] -> m (IOVector output)


-- | The parallel functions in this module all use the same underlying
--   behaviour. You supply a list of tasks that you wish performed, either
--   in the <tt>IO</tt> monad or some other <tt>MonadIO m =&gt; m</tt>
--   monad. This library starts up a limited number of threads (by default,
--   one per capability, i.e. one per available processor/core) and then
--   executes the given work queue across the threads. This is better than
--   simply starting all the jobs in parallel and waiting, because in the
--   case where you have thousands or millions of jobs, but only say 16
--   cores, you do not want the overheads of switching between all those
--   contending threads.
--   
--   The default behaviour of these functions is to put useful progress
--   reports onto stderr while it is running (number of tasks completed,
--   estimate of final completion time). The library is aimed at millions
--   of jobs taking several hours to complete; hence built-in output is
--   very useful for you, while you wait. You can customise this behaviour
--   by using the primed version of each of these functions and supplying a
--   customised options record.
--   
--   The only difference between the functions <tt>parallelList</tt>,
--   <tt>parallelVec</tt> and <tt>parallelIOVec</tt> is the type of the
--   results returned. The closest to the underlying behaviour is
--   <tt>parallelIOVec'</tt>; the other functions are simply convenience
--   wrappers that freeze/convert the IOVector into a Vector or list.
--   
--   <i>Note</i>: make sure you compile your program with the <tt>-threaded
--   -with-rtsopts=-N</tt> options (e.g. in the ghc-options field in your
--   cabal file), or else you will not get any parallel execution in your
--   program!
module Control.Concurrent.ParallelTasks

-- | Runs the list of tasks in parallel (a few at a time), and returns the
--   results in a list (with the corresponding order to the input list,
--   i.e. the first task produces the first result in the list.) See the
--   module description for more details.
--   
--   Defined as: <tt>parallelList' defaultParTaskOpts</tt>
parallelList :: [IO a] -> IO [a]

-- | As <a>parallelList</a>, but returns the results in an immutable
--   Vector.
--   
--   Defined as <tt>parallelVec' defaultParTaskOpts</tt>
parallelVec :: [IO a] -> IO (Vector a)

-- | As <a>parallelList</a>, but returns the results in a mutable IOVector.
--   
--   Defined as <tt>parallelIOVec' defaultParTaskOpts</tt>
parallelIOVec :: [IO a] -> IO (IOVector a)
parallelList' :: MonadIO m => ParTaskOpts m a -> [m a] -> m [a]
parallelVec' :: MonadIO m => ParTaskOpts m a -> [m a] -> m (Vector a)
parallelIOVec' :: MonadIO m => ParTaskOpts m a -> [m a] -> m (IOVector a)
data SimpleParTaskOpts
SimpleParTaskOpts :: Maybe Int -> Maybe Int -> Maybe Int -> SimpleParTaskOpts

-- | Number of worker threads to use. When this is Nothing, defaults to
--   number of capabilities (see <tt>numCapabilities</tt>)
numberWorkers :: SimpleParTaskOpts -> Maybe Int

-- | How often to print the progress of the tasks. E.g. when Just 100,
--   print a message roughly after the completion of every 100 tasks.
printProgress :: SimpleParTaskOpts -> Maybe Int

-- | How often to print an estimate of the estimated completion time. E.g.
--   when Just 100, print an estimate after the completion of every 100
--   tasks.
printEstimate :: SimpleParTaskOpts -> Maybe Int

-- | Options controlling the general running of parallel tasks. The
--   <tt>m</tt> parameter is the monad (which must be an instance of
--   <a>MonadIO</a>) in which the tasks will be run, and the <tt>a</tt>
--   parameter is the return value of the tasks.
data ParTaskOpts m a
ParTaskOpts :: SimpleParTaskOpts -> (forall r. m (m r -> IO r)) -> Maybe (Integer, a) -> ParTaskOpts m a

-- | The simple options.
simpleOpts :: ParTaskOpts m a -> SimpleParTaskOpts

-- | Function to use to run the <tt>m</tt> monad on top of IO. The returned
--   function is run at least once per worker, so should support being run
--   multiple times in parallel, and should clean up after itself. Suitable
--   instance for IO is simply <tt>return id</tt>.
wrapWorker :: ParTaskOpts m a -> forall r. m (m r -> IO r)

-- | When Just, the number of microseconds to let each task run for, before
--   assuming it will not complete, and killing it off. In the case that
--   the task is killed off, the second part of the pair is the value that
--   will be stored in the vector.
timeLimit :: ParTaskOpts m a -> Maybe (Integer, a)

-- | Default parallel task options. The number of workers defaults to the
--   number of capabilities, with no time limit, and printing progress
--   every 50 tasks and an estimated time every 200
defaultParTaskOpts :: ParTaskOpts IO a