module Data.Repa.Eval.Generic.Par.Interleaved (fillInterleaved) where import Data.Repa.Eval.Gang import GHC.Exts -- | Fill something in parallel, using a round-robin order. -- -- * Threads handle elements in row major, round-robin order. -- -- * Using this method helps even out unbalanced workloads. -- fillInterleaved :: Gang -- ^ Gang to run the operation on. -> (Int# -> a -> IO ()) -- ^ Update function to write into result buffer. -> (Int# -> a) -- ^ Function to get the value at a given index. -> Int# -- ^ Number of elements. -> IO () fillInterleaved gang write getElem len = gangIO gang $ \thread -> let !step = threads !start = thread !count = elemsForThread thread in fill step start count where -- Decide now to split the work across the threads. !threads = gangSize gang -- All threads get this many elements. !chunkLenBase = len `quotInt#` threads -- Leftover elements to divide between first few threads. !chunkLenSlack = len `remInt#` threads -- How many elements to compute with this thread. elemsForThread thread | 1# <- thread <# chunkLenSlack = chunkLenBase +# 1# | otherwise = chunkLenBase {-# INLINE elemsForThread #-} -- Evaluate the elements of a single chunk. fill !step !ix0 !count0 = go ix0 count0 where go !ix !count | 1# <- count <=# 0# = return () | otherwise = do write ix (getElem ix) go (ix +# step) (count -# 1#) {-# INLINE fill #-} {-# INLINE [0] fillInterleaved #-}