{-# LANGUAGE GADTs #-} -- | -- Module : Data.Array.Accelerate.Array.Slice -- -- Maintainer : Frederik Meisner Madsen -- Stability : experimental -- Portability : non-portable (GHC extensions) -- module Data.Array.Accelerate.CUDA.Array.Slice ( transferDesc, blocksOf, TransferDesc(..) ) where import Control.Arrow ( first ) import Data.List ( groupBy, elemIndex ) import Data.Array.Accelerate.Array.Representation ( SliceIndex(..) ) -- Slicing algorithm. -- Figure out how to transfer a multi-dimensional slice from a -- multi-dimensional array of the same or higher dimensionality to a -- contiguous memory region. The algorithm uses as few data transfers -- as possible, where a linear memory copy with a stride consitutes -- one transfer (cudaMemcpy2d). Each transfer is described by four -- numbers: A start, a stride, a number of basic blocks and block -- size. -- -- Memory transfer(s) description suitable for (multiple calls to) -- cudaMemcpu2D. The number of calls necessary is given by the length -- of the starting offsets. The unit is basic elements of the target -- array. data TransferDesc = TransferDesc { starts :: [Int] -- Starting offsets. , stride :: Int -- Stride. , nblocks :: Int -- Number of blocks. , blocksize :: Int } -- Elements per block. deriving Show -- Get the basic blocks of the given transfer descriptions, described -- as offset and length. This function can be used to do a memory copy -- with only contiguous data transfers, if a strided transfer is not -- available. blocksOf :: TransferDesc -> [(Int, Int, Int)] blocksOf tdesc = [ ( start + i * stride tdesc , i * blocksize tdesc , blocksize tdesc) | start <- starts tdesc , i <- [0..nblocks tdesc-1]] -- A slice index in one dimension, annotated with information used to -- catogorize how the dimension is transferred. type SliceIR = ( TransferType -- Transfer type annotation. , Int ) -- Size of this dimension. data TransferType = Strided -- Transfer the entire dimension. Each element -- will be transfered in a different block, but in -- a single transfer. | Contiguous -- Transfer the entire dimension in one -- contiguous data transfer. Each element will -- be transfered in the same block in the same -- transfer. | Fixed Int -- Transfer a specific element in this -- dimension. The argument is the element index. | FixedAll -- Transfer the entire dimension. Each element -- will be transfered in a seperate data -- transfer. -- Convert a slice index to internal representation used by this -- algorithm. Initially, all dimensions are represented as strided -- (transfer entire dimension) or fixed (transfer specific element). toIR :: SliceIndex slix sl co dim -> slix -> dim -> [SliceIR] toIR SliceNil () () = [] toIR (SliceAll si) (sl, ()) (sh, n) = (Strided, n):toIR si sl sh toIR (SliceFixed si) (sl, i ) (sh, n) = (Fixed i, n):toIR si sl sh {- toIR :: SliceIndex slix sl co dim -> slix -> dim -> [SliceIR] toIR slix sl dim = f slix sl dim [] where f :: SliceIndex slix' sl' co' dim' -> slix' -> dim' -> [SliceIR] -> [SliceIR] f SliceNil () () res = res f (SliceAll si) (sl, ()) (sh, n) res = f si sl sh ((Strided, n):res) f (SliceFixed si) (sl, i ) (sh, n) res = f si sl sh ((Fixed i, n):res) -} -- Promote strided slice indices to contiguous slice indices when -- possible (= all strided innermost dimensions). strideToContiguous :: [SliceIR] -> [SliceIR] strideToContiguous slirs = let (strided, rest) = span (isStrided . fst) slirs conti = map (first promote) strided in conti ++ rest where isStrided Strided = True isStrided _ = False promote Strided = Contiguous promote x = x -- Find largest group (= next to each other) of strided slice -- indices. Promote all other strided groups to fixed groups, which -- entails multiple data transfers in these dimensions. This is -- necessary step, since it is impossible to describe the memory -- transfer with the given transfer description type in some -- situations (namely when there is a gap between strided -- dimensions). This step chooses the split that results in fewest -- possible data transfers by selecting the largest stride pivot. selectStridePivot :: [SliceIR] -> [SliceIR] selectStridePivot slirs = let -- Group the slice dimensions in groups of same transfer types. groups = groupBy sameTransferType slirs -- Calculate the sizes of the strided groups, ignore the rest. sizes = map (product . map stridedSize) groups -- Find the largest strided group. Just imax = elemIndex (maximum sizes) sizes -- Promote the remaining strided groups to fixed groups using -- multiple data transfers. (groups1, pivotGroup:groups2) = splitAt imax groups groups' = map (map (first promote)) groups1 ++ pivotGroup:map (map (first promote)) groups2 in concat groups' where sameTransferType :: SliceIR -> SliceIR -> Bool sameTransferType x y = fst x ~= fst y where (~=) :: TransferType -> TransferType -> Bool Contiguous ~= Contiguous = True Strided ~= Strided = True Fixed _ ~= Fixed _ = True FixedAll ~= FixedAll = True _ ~= _ = False promote :: TransferType -> TransferType promote Strided = FixedAll promote x = x stridedSize :: SliceIR -> Int stridedSize (Strided, n) = n stridedSize _ = 0 -- Compute a description of the transfers necessary to copy the -- specified slice. transferDesc :: SliceIndex slix sl co dim -> slix -- Slice index. -> dim -- Full shape. -> TransferDesc transferDesc slix sl dim = let slirs = selectStridePivot $ strideToContiguous $ toIR slix sl dim tdesc0 = TransferDesc [0] 1 1 1 size0 = 1 in f slirs size0 tdesc0 where f :: [SliceIR] -> Int -> TransferDesc -> TransferDesc f slirs m tdesc = case slirs of [] -> tdesc (ttyp, n):slirs' -> f slirs' (n * m) $ case ttyp of Strided -> TransferDesc { starts = starts tdesc , stride = if stride tdesc == 1 then m else stride tdesc , nblocks = n * nblocks tdesc , blocksize = blocksize tdesc } Contiguous -> TransferDesc { starts = starts tdesc , stride = stride tdesc , nblocks = nblocks tdesc , blocksize = n * blocksize tdesc } Fixed i -> TransferDesc { starts = [start + i * m | start <- starts tdesc] , stride = stride tdesc , nblocks = nblocks tdesc , blocksize = blocksize tdesc } FixedAll -> TransferDesc { starts = [start + i * m | start <- starts tdesc, i <- [0..n-1]] , stride = stride tdesc , nblocks = nblocks tdesc , blocksize = blocksize tdesc }