{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE TypeOperators #-}
module Data.Array.Accelerate.Examples.Internal.Backend
where
import Prelude as P
import Data.Label
import System.Console.GetOpt
import Data.Array.Accelerate
import qualified Data.Array.Accelerate as A
import qualified Data.Array.Accelerate.Interpreter as Interp
#ifdef ACCELERATE_LLVM_NATIVE_BACKEND
import qualified Data.Array.Accelerate.LLVM.Native as CPU
#endif
#ifdef ACCELERATE_LLVM_PTX_BACKEND
import qualified Data.Array.Accelerate.LLVM.PTX as PTX
#endif
#ifdef ACCELERATE_CUDA_BACKEND
import qualified Data.Array.Accelerate.CUDA as CUDA
#endif
#ifdef ACCELERATE_CILK_BACKEND
import qualified Data.Array.Accelerate.Cilk as Cilk
#endif
{-# INLINE run #-}
run :: Arrays a => Backend -> Acc a -> a
run Interpreter = Interp.run
#ifdef ACCELERATE_LLVM_NATIVE_BACKEND
run CPU = CPU.run
#endif
#ifdef ACCELERATE_LLVM_PTX_BACKEND
run PTX = PTX.run
#endif
#ifdef ACCELERATE_CUDA_BACKEND
run CUDA = CUDA.run
#endif
#ifdef ACCELERATE_CILK_BACKEND
run Cilk = Cilk.run
#endif
{-# INLINE run1 #-}
run1 :: (Arrays a, Arrays b) => Backend -> (Acc a -> Acc b) -> a -> b
run1 Interpreter f = Interp.run1 f
#ifdef ACCELERATE_LLVM_NATIVE_BACKEND
run1 CPU f = CPU.run1 f
#endif
#ifdef ACCELERATE_LLVM_PTX_BACKEND
run1 PTX f = PTX.run1 f
#endif
#ifdef ACCELERATE_CUDA_BACKEND
run1 CUDA f = CUDA.run1 f
#endif
#ifdef ACCELERATE_CILK_BACKEND
run1 Cilk f = Cilk.run . f . use
#endif
{-# INLINE run2 #-}
run2 :: (Arrays a, Arrays b, Arrays c) => Backend -> (Acc a -> Acc b -> Acc c) -> a -> b -> c
run2 backend f x y = go (x,y)
where
!go = run1 backend (A.uncurry f)
{-# INLINE run3 #-}
run3 :: (Arrays a, Arrays b, Arrays c, Arrays d) => Backend -> (Acc a -> Acc b -> Acc c -> Acc d) -> a -> b -> c -> d
run3 backend f x y z = go (x,y,z)
where
!go = run1 backend (\t -> let (a,b,c) = unlift t in f a b c)
{-# INLINE run4 #-}
run4 :: (Arrays a, Arrays b, Arrays c, Arrays d, Arrays e) => Backend -> (Acc a -> Acc b -> Acc c -> Acc d -> Acc e) -> a -> b -> c -> d -> e
run4 backend f x y z w = go (x,y,z,w)
where
!go = run1 backend (\t -> let (a,b,c,d) = unlift t in f a b c d)
data Backend = Interpreter
#ifdef ACCELERATE_LLVM_NATIVE_BACKEND
| CPU
#endif
#ifdef ACCELERATE_LLVM_PTX_BACKEND
| PTX
#endif
#ifdef ACCELERATE_CUDA_BACKEND
| CUDA
#endif
#ifdef ACCELERATE_LLVM_MULTIDEV_BACKEND
| Multi
#endif
#ifdef ACCELERATE_CILK_BACKEND
| Cilk
#endif
deriving (P.Eq, P.Enum, P.Bounded)
instance Show Backend where
show Interpreter = "interpreter"
#ifdef ACCELERATE_LLVM_NATIVE_BACKEND
show CPU = "llvm-cpu"
#endif
#ifdef ACCELERATE_LLVM_PTX_BACKEND
show PTX = "llvm-ptx"
#endif
#ifdef ACCELERATE_CUDA_BACKEND
show CUDA = "cuda"
#endif
#ifdef ACCELERATE_LLVM_MULTIDEV_BACKEND
show Multi = "llvm-multi"
#endif
#ifdef ACCELERATE_CILK_BACKEND
show Cilk = "cilk"
#endif
defaultBackend :: Backend
defaultBackend =
case maxBound of
Interpreter -> Interpreter
_ -> succ Interpreter
availableBackends :: (options :-> Backend) -> [OptDescr (options -> options)]
availableBackends optBackend =
[ Option [] [show Interpreter]
(NoArg (set optBackend Interpreter))
"reference implementation (sequential)"
#ifdef ACCELERATE_LLVM_NATIVE_BACKEND
, Option [] [show CPU]
(NoArg (set optBackend CPU))
"LLVM based implementation for multicore CPUs (parallel)"
#endif
#ifdef ACCELERATE_LLVM_PTX_BACKEND
, Option [] [show PTX]
(NoArg (set optBackend PTX))
"LLVM based implementation for NVIDIA GPUs (parallel)"
#endif
#ifdef ACCELERATE_CUDA_BACKEND
, Option [] [show CUDA]
(NoArg (set optBackend CUDA))
"CUDA based implementation for NVIDIA GPUs (parallel)"
#endif
#ifdef ACCELERATE_LLVM_MULTIDEV_BACKEND
, Option [] [show Multi]
(NoArg (set optBackend Multi))
"LLVM based multi-device implementation using CPUs and GPUs (parallel)"
#endif
#ifdef ACCELERATE_CILK_BACKEND
, Option [] [show Cilk]
(NoArg (set optBackend Cilk))
"Cilk based implementation for multicore CPUs (parallel)"
#endif
]
concurrentBackends :: Backend -> Maybe Int
concurrentBackends Interpreter = Nothing
#ifdef ACCELERATE_LLVM_NATIVE_BACKEND
concurrentBackends CPU = Nothing
#endif
#ifdef ACCELERATE_LLVM_PTX_BACKEND
concurrentBackends PTX = Nothing
#endif
#ifdef ACCELERATE_CUDA_BACKEND
concurrentBackends CUDA = Just 1
#endif
#ifdef ACCELERATE_CILK_BACKEND
concurrentBackends Cilk = Just 1
#endif