module Data.Array.Repa.Internals.EvalBlockwise
( fillVectorBlockwiseP
, fillVectorBlock
, fillVectorBlockP)
where
import Data.Array.Repa.Internals.Elt
import Data.Array.Repa.Internals.Gang
import Data.Vector.Unboxed.Mutable as VM
import GHC.Base (remInt, quotInt)
import Prelude as P
fillVectorBlockwiseP
:: Elt a
=> IOVector a
-> (Int -> a)
-> Int
-> IO ()
fillVectorBlockwiseP !vec !getElemFVBP !imageWidth
= gangIO theGang fillBlock
where !threads = gangSize theGang
!vecLen = VM.length vec
!imageHeight = vecLen `div` imageWidth
!colChunkLen = imageWidth `quotInt` threads
!colChunkSlack = imageWidth `remInt` threads
colIx !ix
| ix < colChunkSlack = ix * (colChunkLen + 1)
| otherwise = ix * colChunkLen + colChunkSlack
fillBlock :: Int -> IO ()
fillBlock !ix
= let !x0 = colIx ix
!x1 = colIx (ix + 1)
!y0 = 0
!y1 = imageHeight
in fillVectorBlock vec getElemFVBP imageWidth x0 y0 x1 y1
fillVectorBlockP
:: Elt a
=> IOVector a
-> (Int -> a)
-> Int
-> Int
-> Int
-> Int
-> Int
-> IO ()
fillVectorBlockP !vec !getElem !imageWidth !x0 !y0 !x1 !y1
= gangIO theGang fillBlock
where !threads = gangSize theGang
!blockWidth = x1 x0 + 1
!colChunkLen = blockWidth `quotInt` threads
!colChunkSlack = blockWidth `remInt` threads
colIx !ix
| ix < colChunkSlack = x0 + ix * (colChunkLen + 1)
| otherwise = x0 + ix * colChunkLen + colChunkSlack
fillBlock :: Int -> IO ()
fillBlock !ix
= let !x0' = colIx ix
!x1' = colIx (ix + 1) 1
!y0' = y0
!y1' = y1
in fillVectorBlock vec getElem imageWidth x0' y0' x1' y1'
fillVectorBlock
:: Elt a
=> IOVector a
-> (Int -> a)
-> Int
-> Int
-> Int
-> Int
-> Int
-> IO ()
fillVectorBlock !vec !getElemFVB !imageWidth !x0 !y0 !x1 !y1
= do
fillBlock ixStart (ixStart + (x1 x0))
where
!ixStart = x0 + y0 * imageWidth
!ixFinal = x1 + y1 * imageWidth
fillBlock !ixLineStart !ixLineEnd
| ixLineStart > ixFinal = return ()
| otherwise
= do fillLine4 ixLineStart
fillBlock (ixLineStart + imageWidth) (ixLineEnd + imageWidth)
where
fillLine4 !ix
| ix + 4 > ixLineEnd = fillLine1 ix
| otherwise
= do
let d0 = getElemFVB (ix + 0)
let d1 = getElemFVB (ix + 1)
let d2 = getElemFVB (ix + 2)
let d3 = getElemFVB (ix + 3)
touch d0
touch d1
touch d2
touch d3
VM.unsafeWrite vec (ix + 0) d0
VM.unsafeWrite vec (ix + 1) d1
VM.unsafeWrite vec (ix + 2) d2
VM.unsafeWrite vec (ix + 3) d3
fillLine4 (ix + 4)
fillLine1 !ix
| ix > ixLineEnd = return ()
| otherwise
= do VM.unsafeWrite vec ix (getElemFVB ix)
fillLine1 (ix + 1)