/* ----------------------------------------------------------------------------- * * Module : Shape * Copyright : [2008..2010] Manuel M T Chakravarty, Gabriele Keller, Sean Lee * [2009..2012] Manuel M T Chakravarty, Gabriele Keller, Trevor L. McDonell * License : BSD3 * * Maintainer : Trevor L. McDonell * Stability : experimental * * ---------------------------------------------------------------------------*/ #ifndef __ACCELERATE_CUDA_SHAPE_H__ #define __ACCELERATE_CUDA_SHAPE_H__ #include #include "accelerate_cuda_type.h" /* * For the time being, the D.A.A.CUDA.Execute converts shape components from * Haskell Ints (which may be 32- or 64-bits) into CUDA integers which are * 32-bit. This is to avoid undue register pressure. Lift this restriction if * future hardware gains better 64-bit support and/or we need to access very * large arrays. * * typedef @type_hs_int@ Ix; */ typedef Int32 Ix; typedef void* DIM0; typedef Ix DIM1; typedef struct { Ix a1,a0; } DIM2; typedef struct { Ix a2,a1,a0; } DIM3; typedef struct { Ix a3,a2,a1,a0; } DIM4; typedef struct { Ix a4,a3,a2,a1,a0; } DIM5; typedef struct { Ix a5,a4,a3,a2,a1,a0; } DIM6; typedef struct { Ix a6,a5,a4,a3,a2,a1,a0; } DIM7; typedef struct { Ix a7,a6,a5,a4,a3,a2,a1,a0; } DIM8; typedef struct { Ix a8,a7,a6,a5,a4,a3,a2,a1,a0; } DIM9; #ifdef __cplusplus /* ----------------------------------------------------------------------------- * Shape construction and destruction */ /* * Convert the individual dimensions of a linear array into a shape */ static __inline__ __device__ DIM0 shape() { return NULL; } static __inline__ __device__ DIM1 shape(const Ix a) { return a; } static __inline__ __device__ DIM2 shape(const Ix b, const Ix a) { DIM2 sh = { b, a }; return sh; } static __inline__ __device__ DIM3 shape(const Ix c, const Ix b, const Ix a) { DIM3 sh = { c, b, a }; return sh; } static __inline__ __device__ DIM4 shape(const Ix d, const Ix c, const Ix b, const Ix a) { DIM4 sh = { d, c, b, a }; return sh; } static __inline__ __device__ DIM5 shape(const Ix e, const Ix d, const Ix c, const Ix b, const Ix a) { DIM5 sh = { e, d, c, b, a }; return sh; } static __inline__ __device__ DIM6 shape(const Ix f, const Ix e, const Ix d, const Ix c, const Ix b, const Ix a) { DIM6 sh = { f, e, d, c, b, a }; return sh; } static __inline__ __device__ DIM7 shape(const Ix g, const Ix f, const Ix e, const Ix d, const Ix c, const Ix b, const Ix a) { DIM7 sh = { g, f, e, d, c, b, a }; return sh; } static __inline__ __device__ DIM8 shape(const Ix h, const Ix g, const Ix f, const Ix e, const Ix d, const Ix c, const Ix b, const Ix a) { DIM8 sh = { h, g, f, e, d, c, b, a }; return sh; } static __inline__ __device__ DIM9 shape(const Ix i, const Ix h, const Ix g, const Ix f, const Ix e, const Ix d, const Ix c, const Ix b, const Ix a) { DIM9 sh = { i, h, g, f, e, d, c, b, a }; return sh; } /* * Yield the inner-most dimension of a shape only */ template static __inline__ __device__ Ix indexHead(const Shape ix) { return ix.a0; } template <> static __inline__ __device__ Ix indexHead(const DIM0 ix) { return 0; } template <> static __inline__ __device__ Ix indexHead(const DIM1 ix) { return ix; } /* * Yield all but the inner-most dimension of a shape */ static __inline__ __device__ DIM0 indexTail(const DIM1 ix) { return 0; } static __inline__ __device__ DIM1 indexTail(const DIM2 ix) { return ix.a1; } static __inline__ __device__ DIM2 indexTail(const DIM3 ix) { return shape(ix.a2, ix.a1); } static __inline__ __device__ DIM3 indexTail(const DIM4 ix) { return shape(ix.a3, ix.a2, ix.a1); } static __inline__ __device__ DIM4 indexTail(const DIM5 ix) { return shape(ix.a4, ix.a3, ix.a2, ix.a1); } static __inline__ __device__ DIM5 indexTail(const DIM6 ix) { return shape(ix.a5, ix.a4, ix.a3, ix.a2, ix.a1); } static __inline__ __device__ DIM6 indexTail(const DIM7 ix) { return shape(ix.a6, ix.a5, ix.a4, ix.a3, ix.a2, ix.a1); } static __inline__ __device__ DIM7 indexTail(const DIM8 ix) { return shape(ix.a7, ix.a6, ix.a5, ix.a4, ix.a3, ix.a2, ix.a1); } static __inline__ __device__ DIM8 indexTail(const DIM9 ix) { return shape(ix.a8, ix.a7, ix.a6, ix.a5, ix.a4, ix.a3, ix.a2, ix.a1); } /* ----------------------------------------------------------------------------- * Shape methods */ /* * Number of dimensions of a shape */ template static __inline__ __device__ int dim(const Shape sh) { return dim(indexTail(sh)) + 1; } template <> static __inline__ __device__ int dim(const DIM0 sh) { return 0; } /* * Yield the total number of elements in a shape */ template static __inline__ __device__ int size(const Shape sh) { return size(indexTail(sh)) * indexHead(sh); } template <> static __inline__ __device__ int size(const DIM0 sh) { return 1; } /* * Add an index to the head of a shape */ static __inline__ __device__ DIM1 indexCons(const DIM0 sh, const Ix ix) { return shape(ix); } static __inline__ __device__ DIM2 indexCons(const DIM1 sh, const Ix ix) { return shape(sh, ix); } static __inline__ __device__ DIM3 indexCons(const DIM2 sh, const Ix ix) { return shape(sh.a1, sh.a0, ix); } static __inline__ __device__ DIM4 indexCons(const DIM3 sh, const Ix ix) { return shape(sh.a2, sh.a1, sh.a0, ix); } static __inline__ __device__ DIM5 indexCons(const DIM4 sh, const Ix ix) { return shape(sh.a3, sh.a2, sh.a1, sh.a0, ix); } static __inline__ __device__ DIM6 indexCons(const DIM5 sh, const Ix ix) { return shape(sh.a4, sh.a3, sh.a2, sh.a1, sh.a0, ix); } static __inline__ __device__ DIM7 indexCons(const DIM6 sh, const Ix ix) { return shape(sh.a5, sh.a4, sh.a3, sh.a2, sh.a1, sh.a0, ix); } static __inline__ __device__ DIM8 indexCons(const DIM7 sh, const Ix ix) { return shape(sh.a6, sh.a5, sh.a4, sh.a3, sh.a2, sh.a1, sh.a0, ix); } static __inline__ __device__ DIM9 indexCons(const DIM8 sh, const Ix ix) { return shape(sh.a7, sh.a6, sh.a5, sh.a4, sh.a3, sh.a2, sh.a1, sh.a0, ix); } /* * Yield the index position in a linear, row-major representation of the array. * First argument is the shape of the array, the second the index */ template static __inline__ __device__ Ix toIndex(const Shape sh, const Shape ix) { return toIndex(indexTail(sh), indexTail(ix)) * indexHead(sh) + indexHead(ix); } template <> static __inline__ __device__ Ix toIndex(const DIM0 sh, const DIM0 ix) { return 0; } template <> static __inline__ __device__ Ix toIndex(const DIM1 sh, const DIM1 ix) { return ix; } /* * Inverse of 'toIndex' */ template static __inline__ __device__ Shape fromIndex(const Shape sh, const Ix ix) { const Ix d = indexHead(sh); return indexCons(fromIndex(indexTail(sh), ix / d), ix % d); } template <> static __inline__ __device__ DIM0 fromIndex(const DIM0 sh, const Ix ix) { return 0; } template <> static __inline__ __device__ DIM1 fromIndex(const DIM1 sh, const Ix ix) { return ix; } /* * Test for the magic index `ignore` */ template static __inline__ __device__ int ignore(const Shape ix) { return indexHead(ix) == -1 && ignore(indexTail(ix)); } template <> static __inline__ __device__ int ignore(const DIM0 ix) { return 1; } #else static __inline__ __device__ int dim(const Ix sh); static __inline__ __device__ int size(const Ix sh); static __inline__ __device__ int shape(const Ix sh); static __inline__ __device__ int toIndex(const Ix sh, const Ix ix); static __inline__ __device__ int fromIndex(const Ix sh, const Ix ix); static __inline__ __device__ int indexHead(const Ix ix); static __inline__ __device__ int indexTail(const Ix ix); static __inline__ __device__ int indexCons(const Ix sh, const Ix ix); #endif // __cplusplus #endif // __ACCELERATE_CUDA_SHAPE_H__