-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Low-level parallel operators on bulk random-accessble arrays.
--
-- Low-level parallel operators on bulk random-accessble arrays.
@package repa-eval
@version 4.2.3.1
-- | Gang Primitives.
module Data.Repa.Eval.Gang
-- | A Gang is a group of threads that execute arbitrary work
-- requests.
data Gang
-- | Fork a Gang with the given number of threads (at least 1).
forkGang :: Int -> IO Gang
-- | O(1). Yield the number of threads in the Gang.
gangSize :: Gang -> Int#
-- | Issue work requests for the Gang and wait until they complete.
--
-- If the gang is already busy then print a warning to stderr and
-- just run the actions sequentially in the requesting thread.
gangIO :: Gang -> (Int# -> IO ()) -> IO ()
-- | Same as gangIO but in the ST monad.
gangST :: Gang -> (Int# -> ST s ()) -> ST s ()
instance GHC.Show.Show Data.Repa.Eval.Gang.Gang
-- | Values that can be stored in Repa Arrays.
module Data.Repa.Eval.Elt
-- | Element types that can be used with the blockwise filling functions.
--
-- This class is mainly used to define the touch method. This is
-- used internally in the imeplementation of Repa to prevent let-binding
-- from being floated inappropriately by the GHC simplifier. Doing a
-- seq sometimes isn't enough, because the GHC simplifier can
-- erase these, and still move around the bindings.
--
-- This class supports the generic deriving mechanism, use deriving
-- instance Elt (TYPE)
class Elt a where touch = gtouch . from zero = to gzero one = to gone
-- | Place a demand on a value at a particular point in an IO computation.
touch :: Elt a => a -> IO ()
-- | Place a demand on a value at a particular point in an IO computation.
touch :: (Elt a, Generic a, GElt (Rep a)) => a -> IO ()
-- | Generic zero value, helpful for debugging.
zero :: Elt a => a
-- | Generic zero value, helpful for debugging.
zero :: (Elt a, Generic a, GElt (Rep a)) => a
-- | Generic one value, helpful for debugging.
one :: Elt a => a
-- | Generic one value, helpful for debugging.
one :: (Elt a, Generic a, GElt (Rep a)) => a
instance Data.Repa.Eval.Elt.GElt GHC.Generics.U1
instance (Data.Repa.Eval.Elt.GElt a, Data.Repa.Eval.Elt.GElt b) => Data.Repa.Eval.Elt.GElt (a GHC.Generics.:*: b)
instance (Data.Repa.Eval.Elt.GElt a, Data.Repa.Eval.Elt.GElt b) => Data.Repa.Eval.Elt.GElt (a GHC.Generics.:+: b)
instance Data.Repa.Eval.Elt.GElt a => Data.Repa.Eval.Elt.GElt (GHC.Generics.M1 i c a)
instance Data.Repa.Eval.Elt.Elt a => Data.Repa.Eval.Elt.GElt (GHC.Generics.K1 i a)
instance Data.Repa.Eval.Elt.Elt GHC.Types.Bool
instance Data.Repa.Eval.Elt.Elt GHC.Types.Char
instance Data.Repa.Eval.Elt.Elt GHC.Types.Float
instance Data.Repa.Eval.Elt.Elt GHC.Types.Double
instance Data.Repa.Eval.Elt.Elt GHC.Types.Int
instance Data.Repa.Eval.Elt.Elt GHC.Int.Int8
instance Data.Repa.Eval.Elt.Elt GHC.Int.Int16
instance Data.Repa.Eval.Elt.Elt GHC.Int.Int32
instance Data.Repa.Eval.Elt.Elt GHC.Int.Int64
instance Data.Repa.Eval.Elt.Elt GHC.Types.Word
instance Data.Repa.Eval.Elt.Elt GHC.Word.Word8
instance Data.Repa.Eval.Elt.Elt GHC.Word.Word16
instance Data.Repa.Eval.Elt.Elt GHC.Word.Word32
instance Data.Repa.Eval.Elt.Elt GHC.Word.Word64
instance (Data.Repa.Eval.Elt.Elt a, Data.Repa.Eval.Elt.Elt b) => Data.Repa.Eval.Elt.Elt (a, b)
instance (Data.Repa.Eval.Elt.Elt a, Data.Repa.Eval.Elt.Elt b, Data.Repa.Eval.Elt.Elt c) => Data.Repa.Eval.Elt.Elt (a, b, c)
instance (Data.Repa.Eval.Elt.Elt a, Data.Repa.Eval.Elt.Elt b, Data.Repa.Eval.Elt.Elt c, Data.Repa.Eval.Elt.Elt d) => Data.Repa.Eval.Elt.Elt (a, b, c, d)
instance (Data.Repa.Eval.Elt.Elt a, Data.Repa.Eval.Elt.Elt b, Data.Repa.Eval.Elt.Elt c, Data.Repa.Eval.Elt.Elt d, Data.Repa.Eval.Elt.Elt e) => Data.Repa.Eval.Elt.Elt (a, b, c, d, e)
instance (Data.Repa.Eval.Elt.Elt a, Data.Repa.Eval.Elt.Elt b, Data.Repa.Eval.Elt.Elt c, Data.Repa.Eval.Elt.Elt d, Data.Repa.Eval.Elt.Elt e, Data.Repa.Eval.Elt.Elt f) => Data.Repa.Eval.Elt.Elt (a, b, c, d, e, f)
-- | Generic sequential array computation operators.
module Data.Repa.Eval.Generic.Seq
-- | Fill something sequentially.
--
--
-- - The array is filled linearly from start to finish.
--
fillLinear :: (Int# -> a -> IO ()) -> (Int# -> a) -> Int# -> IO ()
-- | Fill a block in a rank-2 array, sequentially.
--
--
-- - Blockwise filling can be more cache-efficient than linear filling
-- for rank-2 arrays.
-- - The block is filled in row major order from top to bottom.
--
fillBlock2 :: (Int# -> a -> IO ()) -> (Int# -> Int# -> a) -> Int# -> Int# -> Int# -> Int# -> Int# -> IO ()
-- | Fill a block in a rank-2 array, sequentially.
--
--
-- - Blockwise filling can be more cache-efficient than linear filling
-- for rank-2 arrays.
-- - Using cursor functions can help to expose inter-element indexing
-- computations to the GHC and LLVM optimisers.
-- - Coordinates given are of the filled edges of the block.
-- - The block is filled in row major order from top to bottom.
-- - We need the Elt constraint so that we can use its
-- touch function to provide an order of evaluation ammenable to
-- the LLVM optimiser. You should compile your Haskell program with
-- -fllvm -optlo-O3 to enable LLVM's Global Value Numbering
-- optimisation.
--
fillCursoredBlock2 :: Elt a => (Int# -> a -> IO ()) -> (Int# -> Int# -> cursor) -> (Int# -> Int# -> cursor -> cursor) -> (cursor -> a) -> Int# -> Int# -> Int# -> Int# -> Int# -> IO ()
-- | Sequential reduction of all the elements in an array.
foldAll :: (Int# -> a) -> (a -> a -> a) -> a -> Int# -> a
-- | Sequentially reduce values between the given indices.
foldRange :: (Int# -> a) -> (a -> a -> a) -> a -> Int# -> Int# -> a
-- | Sequential reduction of a multidimensional array along the innermost
-- dimension.
foldInner :: (Int# -> a -> IO ()) -> (Int# -> a) -> (a -> a -> a) -> a -> Int# -> Int# -> IO ()
-- | Generic parallel array computation operators.
module Data.Repa.Eval.Generic.Par
-- | Fill something in parallel.
--
--
-- - The array is split into linear chunks, and each thread linearly
-- fills one chunk.
--
fillChunked :: Gang -> (Int# -> a -> IO ()) -> (Int# -> a) -> Int# -> IO ()
-- | Fill something in parallel, using a separate IO action for each
-- thread.
--
--
-- - The array is split into linear chunks, and each thread linearly
-- fills one chunk.
--
fillChunkedIO :: Gang -> (Int# -> a -> IO ()) -> (Int# -> IO (Int# -> IO a)) -> Int# -> IO ()
-- | Fill a block in a rank-2 array in parallel.
--
--
-- - Blockwise filling can be more cache-efficient than linear filling
-- for rank-2 arrays.
-- - Coordinates given are of the filled edges of the block.
-- - We divide the block into columns, and give one column to each
-- thread.
-- - Each column is filled in row major order from top to bottom.
--
fillBlock2 :: Elt a => Gang -> (Int# -> a -> IO ()) -> (Int# -> Int# -> a) -> Int# -> Int# -> Int# -> Int# -> Int# -> IO ()
-- | Fill something in parallel, using a round-robin order.
--
--
-- - Threads handle elements in row major, round-robin order.
-- - Using this method helps even out unbalanced workloads.
--
fillInterleaved :: Gang -> (Int# -> a -> IO ()) -> (Int# -> a) -> Int# -> IO ()
-- | Fill a block in a rank-2 array in parallel.
--
--
-- - Blockwise filling can be more cache-efficient than linear filling
-- for rank-2 arrays.
-- - Using cursor functions can help to expose inter-element indexing
-- computations to the GHC and LLVM optimisers.
-- - Coordinates given are of the filled edges of the block.
-- - We divide the block into columns, and give one column to each
-- thread.
-- - We need the Elt constraint so that we can use its
-- touch function to provide an order of evaluation ammenable to
-- the LLVM optimiser. You should compile your Haskell program with
-- -fllvm -optlo-O3 to enable LLVM's Global Value Numbering
-- optimisation.
--
fillCursoredBlock2 :: Elt a => Gang -> (Int# -> a -> IO ()) -> (Int# -> Int# -> cursor) -> (Int# -> Int# -> cursor -> cursor) -> (cursor -> a) -> Int# -> Int# -> Int# -> Int# -> Int# -> IO ()
-- | Parallel tree reduction of an array to a single value. Each thread
-- takes an equally sized chunk of the data and computes a partial sum.
-- The main thread then reduces the array of partial sums to the final
-- result.
--
-- We don't require that the initial value be a neutral element, so each
-- thread computes a fold1 on its chunk of the data, and the seed element
-- is only applied in the final reduction step.
foldAll :: Gang -> (Int# -> a) -> (a -> a -> a) -> a -> Int# -> IO a
-- | Parallel reduction of a multidimensional array along the innermost
-- dimension. Each output value is computed by a single thread, with the
-- output values distributed evenly amongst the available threads.
foldInner :: Gang -> (Int# -> a -> IO ()) -> (Int# -> a) -> (a -> a -> a) -> a -> Int# -> Int# -> IO ()