module Data.Array.Accelerate.Math.FFT (
Mode(..),
fft1D, fft1D',
fft2D, fft2D',
fft3D, fft3D',
fft
) where
import Prelude as P
import Data.Array.Accelerate as A
import Data.Array.Accelerate.Array.Sugar ( showShape )
import Data.Array.Accelerate.Math.Complex
#ifdef ACCELERATE_CUDA_BACKEND
import Data.Array.Accelerate.CUDA.Foreign
import Data.Array.Accelerate.Array.Sugar as S ( shapeToList, shape, EltRepr )
import Data.Array.Accelerate.Type
import Data.Functor
import Foreign.CUDA.FFT
import qualified Foreign.CUDA.Driver as CUDA hiding (free)
#endif
import Data.Bits
data Mode = Forward | Reverse | Inverse
deriving (Eq, Show)
isPow2 :: Int -> Bool
isPow2 x = x .&. (x1) == 0
signOfMode :: Num a => Mode -> a
signOfMode m
= case m of
Forward -> 1
Reverse -> 1
Inverse -> 1
fft1D :: (Elt e, IsFloating e)
=> Mode
-> Vector (Complex e)
-> Acc (Vector (Complex e))
fft1D mode vec
= let Z :. len = arrayShape vec
in
fft1D' mode len (use vec)
fft1D' :: forall e. (Elt e, IsFloating e)
=> Mode
-> Int
-> Acc (Vector (Complex e))
-> Acc (Vector (Complex e))
fft1D' mode len vec
= let sign = signOfMode mode :: e
scale = P.fromIntegral len
#ifdef ACCELERATE_CUDA_BACKEND
sh = (Z:.len)
vec' = cudaFFT mode sh fft' vec
#else
vec' = fft' vec
#endif
fft' a = fft sign Z len a
in
if P.not (isPow2 len)
then error $ unlines
[ "Data.Array.Accelerate.FFT: fft1D"
, " Array dimensions must be powers of two, but are: " ++ showShape (Z:.len) ]
else case mode of
Inverse -> A.map (/scale) vec'
_ -> vec'
fft2D :: (Elt e, IsFloating e)
=> Mode
-> Array DIM2 (Complex e)
-> Acc (Array DIM2 (Complex e))
fft2D mode arr
= let Z :. height :. width = arrayShape arr
in
fft2D' mode width height (use arr)
fft2D' :: forall e. (Elt e, IsFloating e)
=> Mode
-> Int
-> Int
-> Acc (Array DIM2 (Complex e))
-> Acc (Array DIM2 (Complex e))
fft2D' mode width height arr
= let sign = signOfMode mode :: e
scale = P.fromIntegral (width * height)
#ifdef ACCELERATE_CUDA_BACKEND
sh = (Z:.width:.height)
arr' = cudaFFT mode sh fft' arr
#else
arr' = fft' arr
#endif
fft' a = A.transpose . fft sign (Z:.width) height
>-> A.transpose . fft sign (Z:.height) width
$ a
in
if P.not (isPow2 width && isPow2 height)
then error $ unlines
[ "Data.Array.Accelerate.FFT: fft2D"
, " Array dimensions must be powers of two, but are: " ++ showShape (Z:.height:.width) ]
else case mode of
Inverse -> A.map (/scale) arr'
_ -> arr'
fft3D :: (Elt e, IsFloating e)
=> Mode
-> Array DIM3 (Complex e)
-> Acc (Array DIM3 (Complex e))
fft3D mode arr
= let Z :. depth :. height :. width = arrayShape arr
in
fft3D' mode width height depth (use arr)
fft3D' :: forall e. (Elt e, IsFloating e)
=> Mode
-> Int
-> Int
-> Int
-> Acc (Array DIM3 (Complex e))
-> Acc (Array DIM3 (Complex e))
fft3D' mode width height depth arr
= let sign = signOfMode mode :: e
scale = P.fromIntegral (width * height)
#ifdef ACCELERATE_CUDA_BACKEND
sh = (Z:.width:.height:.depth)
arr' = cudaFFT mode sh fft' arr
#else
arr' = fft' arr
#endif
fft' a = rotate3D . fft sign (Z:.width :.depth) height
>-> rotate3D . fft sign (Z:.height:.width) depth
>-> rotate3D . fft sign (Z:.depth :.height) width
$ a
in
if P.not (isPow2 width && isPow2 height && isPow2 depth)
then error $ unlines
[ "Data.Array.Accelerate.FFT: fft3D"
, " Array dimensions must be powers of two, but are: " ++ showShape (Z:.depth:.height:.width) ]
else case mode of
Inverse -> A.map (/scale) arr'
_ -> arr'
rotate3D :: Elt e => Acc (Array DIM3 e) -> Acc (Array DIM3 e)
rotate3D arr
= backpermute (swap (A.shape arr)) swap arr
where
swap :: Exp DIM3 -> Exp DIM3
swap ix =
let Z :. m :. k :. l = unlift ix :: Z :. Exp Int :. Exp Int :. Exp Int
in lift $ Z :. k :. l :. m
fft :: forall sh e. (Slice sh, Shape sh, IsFloating e, Elt e)
=> e
-> sh
-> Int
-> Acc (Array (sh:.Int) (Complex e))
-> Acc (Array (sh:.Int) (Complex e))
fft sign sh sz arr = go sz 0 1
where
go :: Int -> Int -> Int -> Acc (Array (sh:.Int) (Complex e))
go len offset stride
| len == 2
= A.generate (constant (sh :. len)) swivel
| otherwise
= combine
(go (len `div` 2) offset (stride * 2))
(go (len `div` 2) (offset + stride) (stride * 2))
where
len' = the (unit (constant len))
offset' = the (unit (constant offset))
stride' = the (unit (constant stride))
swivel ix =
let sh' :. sz' = unlift ix :: Exp sh :. Exp Int
in
sz' ==* 0 ? ( (arr ! lift (sh' :. offset')) + (arr ! lift (sh' :. offset' + stride'))
, (arr ! lift (sh' :. offset')) (arr ! lift (sh' :. offset' + stride')) )
combine evens odds =
let odds' = A.generate (A.shape odds) (\ix -> twiddle len' (indexHead ix) * odds!ix)
in
append (A.zipWith (+) evens odds') (A.zipWith () evens odds')
twiddle n' i' =
let n = A.fromIntegral n'
i = A.fromIntegral i'
k = 2*pi*i/n
in
lift ( cos k, A.constant sign * sin k )
#ifdef ACCELERATE_CUDA_BACKEND
cudaFFT :: forall e sh. (Shape sh, Elt e, IsFloating e)
=> Mode
-> sh
-> (Acc (Array sh (Complex e)) -> Acc (Array sh (Complex e)))
-> Acc (Array sh (Complex e))
-> Acc (Array sh (Complex e))
cudaFFT mode sh p arr = deinterleave sh (foreignAcc ff pure (interleave arr))
where
ff = cudaAcc foreignFFT
pure = interleave . p . deinterleave sh
sign = signOfMode mode :: Int
foreignFFT :: Array DIM1 e -> CIO (Array DIM1 e)
foreignFFT arr' = do
hndl <- liftIO $
case shapeToList sh of
[width] -> plan1D width types 1
[height, width] -> plan2D height width types
[depth, height, width] -> plan3D depth height width types
_ -> error "Accelerate-fft cannot use CUFFT for arrays of dimensions higher than 3"
output <- allocateArray (S.shape arr')
iptr <- floatingDevicePtr arr'
optr <- floatingDevicePtr output
--Execute
liftIO $ execute hndl iptr optr
liftIO $ destroy hndl
return output
types
= case (floatingType :: FloatingType e) of
TypeFloat{} -> C2C
TypeDouble{} -> Z2Z
TypeCFloat{} -> C2C
TypeCDouble{} -> Z2Z
execute :: Handle -> CUDA.DevicePtr e -> CUDA.DevicePtr e -> IO ()
execute hndl iptr optr
= case (floatingType :: FloatingType e) of
TypeFloat{} -> execC2C hndl iptr optr sign
TypeDouble{} -> execZ2Z hndl iptr optr sign
TypeCFloat{} -> execC2C hndl (CUDA.castDevPtr iptr) (CUDA.castDevPtr optr) sign
TypeCDouble{} -> execZ2Z hndl (CUDA.castDevPtr iptr) (CUDA.castDevPtr optr) sign
floatingDevicePtr :: Vector e -> CIO (CUDA.DevicePtr e)
floatingDevicePtr v
= case (floatingType :: FloatingType e) of
TypeFloat{} -> singleDevicePtr v
TypeDouble{} -> singleDevicePtr v
TypeCFloat{} -> CUDA.castDevPtr <$> singleDevicePtr v
TypeCDouble{} -> CUDA.castDevPtr <$> singleDevicePtr v
singleDevicePtr :: DevicePtrs (EltRepr e) ~ ((),CUDA.DevicePtr b) => Vector e -> CIO (CUDA.DevicePtr b)
singleDevicePtr v = P.snd <$> devicePtrsOfArray v
#endif
append
:: forall sh e. (Slice sh, Shape sh, Elt e)
=> Acc (Array (sh:.Int) e)
-> Acc (Array (sh:.Int) e)
-> Acc (Array (sh:.Int) e)
append xs ys
= let sh :. n = unlift (A.shape xs) :: Exp sh :. Exp Int
_ :. m = unlift (A.shape ys) :: Exp sh :. Exp Int
in
generate (lift (sh :. n+m))
(\ix -> let sz :. i = unlift ix :: Exp sh :. Exp Int
in i <* n ? (xs ! lift (sz:.i), ys ! lift (sz:.in) ))
#ifdef ACCELERATE_CUDA_BACKEND
interleave :: (Shape sh, Elt e) => Acc (Array sh (Complex e)) -> Acc (Vector e)
interleave arr = generate sh swizzle
where
sh = index1 (2 * A.size arr)
swizzle ix =
let i = indexHead ix
v = arr A.!! (i `div` 2)
in
i `mod` 2 ==* 0 ? (real v, imag v)
deinterleave :: (Shape sh, Elt e) => sh -> Acc (Vector e) -> Acc (Array sh (Complex e))
deinterleave (constant -> sh) arr =
generate sh (\ix -> let i = toIndex sh ix * 2
in lift (arr A.!! i, arr A.!! (i+1)))
#endif