-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Low-level parallel operators on bulk random-accessble arrays.
--   
@package repa-eval
@version 4.0.0.1


-- | Gang Primitives.
module Data.Repa.Eval.Gang

-- | A <a>Gang</a> is a group of threads that execute arbitrary work
--   requests.
data Gang

-- | Fork a <a>Gang</a> with the given number of threads (at least 1).
forkGang :: Int -> IO Gang

-- | O(1). Yield the number of threads in the <a>Gang</a>.
gangSize :: Gang -> Int#

-- | Issue work requests for the <a>Gang</a> and wait until they complete.
--   
--   If the gang is already busy then print a warning to <a>stderr</a> and
--   just run the actions sequentially in the requesting thread.
gangIO :: Gang -> (Int# -> IO ()) -> IO ()

-- | Same as <a>gangIO</a> but in the <a>ST</a> monad.
gangST :: Gang -> (Int# -> ST s ()) -> ST s ()
instance Show Gang


-- | Values that can be stored in Repa Arrays.
module Data.Repa.Eval.Elt

-- | Element types that can be used with the blockwise filling functions.
--   
--   This class is mainly used to define the <a>touch</a> method. This is
--   used internally in the imeplementation of Repa to prevent let-binding
--   from being floated inappropriately by the GHC simplifier. Doing a
--   <a>seq</a> sometimes isn't enough, because the GHC simplifier can
--   erase these, and still move around the bindings.
--   
--   This class supports the generic deriving mechanism, use <tt>deriving
--   instance Elt (TYPE)</tt>
class Elt a where touch = gtouch . from zero = to gzero one = to gone
touch :: Elt a => a -> IO ()
zero :: Elt a => a
one :: Elt a => a
instance (Elt a, Elt b, Elt c, Elt d, Elt e, Elt f) => Elt (a, b, c, d, e, f)
instance (Elt a, Elt b, Elt c, Elt d, Elt e) => Elt (a, b, c, d, e)
instance (Elt a, Elt b, Elt c, Elt d) => Elt (a, b, c, d)
instance (Elt a, Elt b, Elt c) => Elt (a, b, c)
instance (Elt a, Elt b) => Elt (a, b)
instance Elt Word64
instance Elt Word32
instance Elt Word16
instance Elt Word8
instance Elt Word
instance Elt Int64
instance Elt Int32
instance Elt Int16
instance Elt Int8
instance Elt Int
instance Elt Double
instance Elt Float
instance Elt Char
instance Elt Bool
instance Elt a => GElt (K1 i a)
instance GElt a => GElt (M1 i c a)
instance (GElt a, GElt b) => GElt (a :+: b)
instance (GElt a, GElt b) => GElt (a :*: b)
instance GElt U1


-- | Generic sequential array computation operators.
module Data.Repa.Eval.Generic.Seq

-- | Fill something sequentially.
--   
--   <ul>
--   <li>The array is filled linearly from start to finish.</li>
--   </ul>
fillLinear :: (Int# -> a -> IO ()) -> (Int# -> a) -> Int# -> IO ()

-- | Fill a block in a rank-2 array, sequentially.
--   
--   <ul>
--   <li>Blockwise filling can be more cache-efficient than linear filling
--   for rank-2 arrays.</li>
--   <li>The block is filled in row major order from top to bottom.</li>
--   </ul>
fillBlock2 :: (Int# -> a -> IO ()) -> (Int# -> Int# -> a) -> Int# -> Int# -> Int# -> Int# -> Int# -> IO ()

-- | Fill a block in a rank-2 array, sequentially.
--   
--   <ul>
--   <li>Blockwise filling can be more cache-efficient than linear filling
--   for rank-2 arrays.</li>
--   <li>Using cursor functions can help to expose inter-element indexing
--   computations to the GHC and LLVM optimisers.</li>
--   <li>Coordinates given are of the filled edges of the block.</li>
--   <li>The block is filled in row major order from top to bottom.</li>
--   <li>We need the <a>Elt</a> constraint so that we can use its
--   <a>touch</a> function to provide an order of evaluation ammenable to
--   the LLVM optimiser. You should compile your Haskell program with
--   <tt>-fllvm -optlo-O3</tt> to enable LLVM's Global Value Numbering
--   optimisation.</li>
--   </ul>
fillCursoredBlock2 :: Elt a => (Int# -> a -> IO ()) -> (Int# -> Int# -> cursor) -> (Int# -> Int# -> cursor -> cursor) -> (cursor -> a) -> Int# -> Int# -> Int# -> Int# -> Int# -> IO ()

-- | Sequential reduction of all the elements in an array.
foldAll :: (Int# -> a) -> (a -> a -> a) -> a -> Int# -> a

-- | Sequentially reduce values between the given indices.
foldRange :: (Int# -> a) -> (a -> a -> a) -> a -> Int# -> Int# -> a

-- | Sequential reduction of a multidimensional array along the innermost
--   dimension.
foldInner :: (Int# -> a -> IO ()) -> (Int# -> a) -> (a -> a -> a) -> a -> Int# -> Int# -> IO ()


-- | Generic parallel array computation operators.
module Data.Repa.Eval.Generic.Par

-- | Fill something in parallel.
--   
--   <ul>
--   <li>The array is split into linear chunks, and each thread linearly
--   fills one chunk.</li>
--   </ul>
fillChunked :: Gang -> (Int# -> a -> IO ()) -> (Int# -> a) -> Int# -> IO ()

-- | Fill something in parallel, using a separate IO action for each
--   thread.
--   
--   <ul>
--   <li>The array is split into linear chunks, and each thread linearly
--   fills one chunk.</li>
--   </ul>
fillChunkedIO :: Gang -> (Int# -> a -> IO ()) -> (Int# -> IO (Int# -> IO a)) -> Int# -> IO ()

-- | Fill a block in a rank-2 array in parallel.
--   
--   <ul>
--   <li>Blockwise filling can be more cache-efficient than linear filling
--   for rank-2 arrays.</li>
--   <li>Coordinates given are of the filled edges of the block.</li>
--   <li>We divide the block into columns, and give one column to each
--   thread.</li>
--   <li>Each column is filled in row major order from top to bottom.</li>
--   </ul>
fillBlock2 :: Elt a => Gang -> (Int# -> a -> IO ()) -> (Int# -> Int# -> a) -> Int# -> Int# -> Int# -> Int# -> Int# -> IO ()

-- | Fill something in parallel, using a round-robin order.
--   
--   <ul>
--   <li>Threads handle elements in row major, round-robin order.</li>
--   <li>Using this method helps even out unbalanced workloads.</li>
--   </ul>
fillInterleaved :: Gang -> (Int# -> a -> IO ()) -> (Int# -> a) -> Int# -> IO ()

-- | Fill a block in a rank-2 array in parallel.
--   
--   <ul>
--   <li>Blockwise filling can be more cache-efficient than linear filling
--   for rank-2 arrays.</li>
--   <li>Using cursor functions can help to expose inter-element indexing
--   computations to the GHC and LLVM optimisers.</li>
--   <li>Coordinates given are of the filled edges of the block.</li>
--   <li>We divide the block into columns, and give one column to each
--   thread.</li>
--   <li>We need the <a>Elt</a> constraint so that we can use its
--   <a>touch</a> function to provide an order of evaluation ammenable to
--   the LLVM optimiser. You should compile your Haskell program with
--   <tt>-fllvm -optlo-O3</tt> to enable LLVM's Global Value Numbering
--   optimisation.</li>
--   </ul>
fillCursoredBlock2 :: Elt a => Gang -> (Int# -> a -> IO ()) -> (Int# -> Int# -> cursor) -> (Int# -> Int# -> cursor -> cursor) -> (cursor -> a) -> Int# -> Int# -> Int# -> Int# -> Int# -> IO ()

-- | Parallel tree reduction of an array to a single value. Each thread
--   takes an equally sized chunk of the data and computes a partial sum.
--   The main thread then reduces the array of partial sums to the final
--   result.
--   
--   We don't require that the initial value be a neutral element, so each
--   thread computes a fold1 on its chunk of the data, and the seed element
--   is only applied in the final reduction step.
foldAll :: Gang -> (Int# -> a) -> (a -> a -> a) -> a -> Int# -> IO a

-- | Parallel reduction of a multidimensional array along the innermost
--   dimension. Each output value is computed by a single thread, with the
--   output values distributed evenly amongst the available threads.
foldInner :: Gang -> (Int# -> a -> IO ()) -> (Int# -> a) -> (a -> a -> a) -> a -> Int# -> Int# -> IO ()