{-# LANGUAGE BangPatterns, UnboxedTuples #-} module Data.Array.Repa.Internals.EvalCursored ( fillCursoredBlock2P , fillCursoredBlock2 ) where import Data.Array.Repa.Index import Data.Array.Repa.Internals.Elt import Data.Array.Repa.Internals.Gang import Data.Vector.Unboxed.Mutable as VM import GHC.Base (remInt, quotInt) import Prelude as P -- Block filling ---------------------------------------------------------------------------------- -- | Fill a block in a 2D image, in parallel. -- Coordinates given are of the filled edges of the block. -- We divide the block into columns, and give one column to each thread. fillCursoredBlock2P :: Elt a => IOVector a -- ^ vector to write elements into -> (DIM2 -> cursor) -- ^ make a cursor to a particular element -> (DIM2 -> cursor -> cursor) -- ^ shift the cursor by an offset -> (cursor -> a) -- ^ fn to evaluate an element at the given index. -> Int -- ^ width of whole image -> Int -- ^ x0 lower left corner of block to fill -> Int -- ^ y0 (low x and y value) -> Int -- ^ x1 upper right corner of block to fill -> Int -- ^ y1 (high x and y value, index of last elem to fill) -> IO () {-# INLINE [0] fillCursoredBlock2P #-} fillCursoredBlock2P !vec !makeCursorFCB !shiftCursorFCB !getElemFCB !imageWidth !x0 !y0 !x1 !y1 = gangIO theGang fillBlock where !threads = gangSize theGang !blockWidth = x1 - x0 + 1 -- All columns have at least this many pixels. !colChunkLen = blockWidth `quotInt` threads -- Extra pixels that we have to divide between some of the threads. !colChunkSlack = blockWidth `remInt` threads -- Get the starting pixel of a column in the image. {-# INLINE colIx #-} colIx !ix | ix < colChunkSlack = x0 + ix * (colChunkLen + 1) | otherwise = x0 + ix * colChunkLen + colChunkSlack -- Give one column to each thread {-# INLINE fillBlock #-} fillBlock :: Int -> IO () fillBlock !ix = let !x0' = colIx ix !x1' = colIx (ix + 1) - 1 !y0' = y0 !y1' = y1 in fillCursoredBlock2 vec makeCursorFCB shiftCursorFCB getElemFCB imageWidth x0' y0' x1' y1' -- | Fill a block in a 2D image. -- Coordinates given are of the filled edges of the block. fillCursoredBlock2 :: Elt a => IOVector a -- ^ vector to write elements into. -> (DIM2 -> cursor) -- ^ make a cursor to a particular element -> (DIM2 -> cursor -> cursor) -- ^ shift the cursor by an offset -> (cursor -> a) -- ^ fn to evaluate an element at the given index. -> Int -- ^ width of whole image -> Int -- ^ x0 lower left corner of block to fill -> Int -- ^ y0 (low x and y value) -> Int -- ^ x1 upper right corner of block to fill -> Int -- ^ y1 (high x and y value, index of last elem to fill) -> IO () {-# INLINE [0] fillCursoredBlock2 #-} fillCursoredBlock2 !vec !makeCursor !shiftCursor !getElem !imageWidth !x0 !y0 !x1 !y1 = fillBlock y0 where {-# INLINE fillBlock #-} fillBlock !y | y > y1 = return () | otherwise = do fillLine4 x0 fillBlock (y + 1) where {-# INLINE fillLine4 #-} fillLine4 !x | x + 4 > x1 = fillLine1 x | otherwise = do -- Compute each source cursor based on the previous one so that -- the variable live ranges in the generated code are shorter. let srcCur0 = makeCursor (Z :. y :. x) let srcCur1 = shiftCursor (Z :. 0 :. 1) srcCur0 let srcCur2 = shiftCursor (Z :. 0 :. 1) srcCur1 let srcCur3 = shiftCursor (Z :. 0 :. 1) srcCur2 -- Get the result value for each cursor. let val0 = getElem srcCur0 let val1 = getElem srcCur1 let val2 = getElem srcCur2 let val3 = getElem srcCur3 -- Ensure that we've computed each of the result values before we -- write into the array. If the backend code generator can't tell -- our destination array doesn't alias with the source then writing -- to it can prevent the sharing of intermediate computations. {- touch val0 touch val1 touch val2 touch val3 -} -- Compute cursor into destination array. let !dstCur0 = x + y * imageWidth VM.unsafeWrite vec (dstCur0) val0 VM.unsafeWrite vec (dstCur0 + 1) val1 VM.unsafeWrite vec (dstCur0 + 2) val2 VM.unsafeWrite vec (dstCur0 + 3) val3 fillLine4 (x + 4) {-# INLINE fillLine1 #-} fillLine1 !x | x > x1 = return () | otherwise = do VM.unsafeWrite vec (x + y * imageWidth) (getElem $ makeCursor (Z :. y :. x)) fillLine1 (x + 1)