module Data.Yarr.Convolution.Eval () where
import Data.Yarr.Base
import Data.Yarr.Eval
import Data.Yarr.Shape as S
import Data.Yarr.Convolution.Repr
import Data.Yarr.Repr.Separate
import Data.Yarr.Utils.FixedVector as V hiding (zero)
import Data.Yarr.Utils.Fork
import Data.Yarr.Utils.Parallel
import Data.Yarr.Utils.Split
instance (BlockShape sh, UTarget tr tl sh a) =>
Load CV CVL tr tl sh a where
type LoadIndex CVL tl sh = sh
loadP fill threads arr tarr =
cvLoadP fill threads arr tarr zero (entire arr tarr)
loadS fill arr tarr = cvLoadS fill arr tarr zero (entire arr tarr)
instance (BlockShape sh, UTarget tr tl sh a) =>
RangeLoad CV CVL tr tl sh a where
rangeLoadP = cvLoadP
rangeLoadS = cvLoadS
cvLoadP
:: forall sh a tr tl. (BlockShape sh, UTarget tr tl sh a)
=> Fill sh a
-> Threads
-> UArray CV CVL sh a
-> UArray tr tl sh a
-> sh -> sh
-> IO ()
cvLoadP fill threads arr@(Convoluted _ _ _ bget center cget) tarr start end = do
force arr
force tarr
!ts <- threads
let loadRange = (start, end)
loadCenter@(cs, ce) = intersectBlocks (vl_2 center loadRange)
appFill = fill cget (write tarr)
centerWork = makeFork ts cs ce appFill
borderFill = S.fill bget (write tarr)
!bordersCount = arity (undefined :: (BorderCount sh))
bordersSplit = makeSplitIndex ts 0 bordersCount
borders = clipBlock loadRange loadCenter
borderWork !t =
let !startBorder = bordersSplit t
!endBorder = bordersSplit (t + 1)
go !b | b >= endBorder = return ()
| otherwise = do
let (bs, be) = borders V.! b
borderFill bs be
go (b + 1)
in go startBorder
threadWork !t = do
centerWork t
borderWork t
parallel_ ts threadWork
touchArray arr
touchArray tarr
cvLoadS
:: (BlockShape sh, UTarget tr tl sh a)
=> Fill sh a
-> UArray CV CVL sh a
-> UArray tr tl sh a
-> sh -> sh
-> IO ()
cvLoadS fill arr@(Convoluted _ _ _ bget center cget) tarr start end = do
force arr
force tarr
let loadRange = (start, end)
loadCenter@(cs, ce) = intersectBlocks (vl_2 center loadRange)
fill cget (write tarr) cs ce
let borders = clipBlock loadRange loadCenter
V.mapM_ (\(bs, be) -> S.fill bget (write tarr) bs be) borders
touchArray arr
touchArray tarr
instance (BlockShape sh, Vector v e,
UVecTarget tr tslr tl sh v2 e, Dim v ~ Dim v2,
InlinableArity (Dim v)) =>
VecLoad (SE CV) CV CVL tr tslr tl sh v v2 e where
loadSlicesP fill threads arr tarr =
cvLoadSlicesP fill threads arr tarr zero (entire arr tarr)
loadSlicesS fill arr tarr =
cvLoadSlicesS fill arr tarr zero (entire arr tarr)
instance (BlockShape sh, Vector v e,
UVecTarget tr tslr tl sh v2 e, Dim v ~ Dim v2,
InlinableArity (Dim v)) =>
RangeVecLoad (SE CV) CV CVL tr tslr tl sh v v2 e where
rangeLoadSlicesP = cvLoadSlicesP
rangeLoadSlicesS = cvLoadSlicesS
cvLoadSlicesP
:: forall sh v e tr tslr tl v2.
(BlockShape sh, UVecTarget tr tslr tl sh v2 e,
Vector v e, Dim v ~ Dim v2, InlinableArity (Dim v))
=> Fill sh e
-> Threads
-> UArray (SE CV) CVL sh (v e)
-> UArray tr tl sh (v2 e)
-> sh -> sh
-> IO ()
cvLoadSlicesP fill threads = \arr tarr start end -> do
force arr
force tarr
!ts <- threads
let loadRange = (start, end)
sls = slices arr
centers = V.map center sls
loadCenters =
V.inlinableMap (\c -> intersectBlocks (vl_2 c loadRange)) centers
writes = V.inlinableMap write (slices tarr)
borderGets = V.inlinableMap borderGet sls
borderFills = V.inlinableZipWith S.fill borderGets writes
centerGets = V.inlinableMap centerGet sls
centerFills = V.inlinableZipWith fill centerGets writes
centerWork = makeForkSlicesOnce ts loadCenters centerFills
!slsCount = arity (undefined :: (Dim v))
!bordersPerSlice = arity (undefined :: (BorderCount sh))
!allBorders = slsCount * bordersPerSlice
bordersSplit = makeSplitIndex ts 0 allBorders
borders = V.map (clipBlock loadRange) loadCenters
fillsAndBorders = V.zipWith (,) borderFills borders
bordersWork !t =
let !startChunk = bordersSplit t
!endChunk = (bordersSplit (t + 1)) 1
(!startSlice, !startBorder) =
startChunk `quotRem` bordersPerSlice
(!endSlice, !endBorder) =
endChunk `quotRem` bordersPerSlice
go sl b | sl > endSlice = return ()
| otherwise =
let e = if sl == endSlice
then endBorder
else (bordersPerSlice 1)
(bfill, borders) = fillsAndBorders V.! sl
in do goSl bfill borders b e
go (sl + 1) 0
goSl bfill borders c e
| c > e = return ()
| otherwise =
let (bs, be) = borders V.! c
in bfill bs be >> goSl bfill borders (c + 1) e
in go startSlice startBorder
threadWork !t = do
centerWork t
bordersWork t
parallel_ ts threadWork
touchArray arr
touchArray tarr
cvLoadSlicesS
:: (BlockShape sh, UVecTarget tr tslr tl sh v2 e,
Vector v e, Dim v ~ Dim v2, InlinableArity (Dim v))
=> Fill sh e
-> UArray (SE CV) CVL sh (v e)
-> UArray tr tl sh (v2 e)
-> sh -> sh
-> IO ()
cvLoadSlicesS fill arr tarr start end = do
force arr
force tarr
let sls = slices arr
borderGets = V.inlinableMap borderGet sls
centers = V.map center sls
centerGets = V.inlinableMap centerGet sls
writes = V.inlinableMap write (slices tarr)
centerFills = V.inlinableZipWith fill centerGets writes
loadRange = (start, end)
loadCenters = V.map (\c -> intersectBlocks (vl_2 c loadRange)) centers
V.zipWithM_
(\centerFill (cs, ce) -> centerFill cs ce)
centerFills loadCenters
let borders = V.map (clipBlock loadRange) loadCenters
borderFills = V.inlinableZipWith S.fill borderGets writes
V.zipWithM_
(\bfill borders -> V.mapM_ (\(bs, be) -> bfill bs be) borders)
borderFills borders
touchArray arr
touchArray tarr