{-# LANGUAGE CPP #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
module Data.Array.Accelerate.LLVM.PTX.State (
evalPTX,
createTargetForDevice, createTargetFromContext,
Pool(..),
withPool, unsafeWithPool,
defaultTarget,
defaultTargetPool,
) where
import Data.Array.Accelerate.Error
import Data.Array.Accelerate.LLVM.State
import Data.Array.Accelerate.LLVM.PTX.Target
import qualified Data.Array.Accelerate.LLVM.PTX.Array.Table as MT
import qualified Data.Array.Accelerate.LLVM.PTX.Context as CT
import qualified Data.Array.Accelerate.LLVM.PTX.Debug as Debug
import qualified Data.Array.Accelerate.LLVM.PTX.Execute.Stream as ST
import qualified Data.Array.Accelerate.LLVM.PTX.Link.Cache as LC
import qualified Data.Array.Accelerate.LLVM.PTX.Pool as Pool
import Data.Range ( Range(..) )
import Control.Parallel.Meta ( Executable(..) )
import Control.Concurrent ( runInBoundThread )
import Control.Exception ( try, catch )
import Data.Maybe ( fromMaybe, catMaybes )
import System.Environment ( lookupEnv )
import System.IO.Unsafe ( unsafePerformIO )
import Text.Printf ( printf )
import Text.Read ( readMaybe )
import Foreign.CUDA.Driver.Error
import qualified Foreign.CUDA.Driver as CUDA
import qualified Foreign.CUDA.Driver.Context as Context
evalPTX :: PTX -> LLVM PTX a -> IO a
evalPTX ptx acc =
runInBoundThread (CT.withContext (ptxContext ptx) (evalLLVM ptx acc))
`catch`
\e -> $internalError "unhandled" (show (e :: CUDAException))
createTargetForDevice
:: CUDA.Device
-> CUDA.DeviceProperties
-> [CUDA.ContextFlag]
-> IO PTX
createTargetForDevice dev prp flags = do
raw <- CUDA.create dev flags
ptx <- createTarget dev prp raw
_ <- CUDA.pop
return ptx
createTargetFromContext
:: CUDA.Context
-> IO PTX
createTargetFromContext raw = do
dev <- Context.device
prp <- CUDA.props dev
createTarget dev prp raw
createTarget
:: CUDA.Device
-> CUDA.DeviceProperties
-> CUDA.Context
-> IO PTX
createTarget dev prp raw = do
ctx <- CT.raw dev prp raw
mt <- MT.new ctx
lc <- LC.new
st <- ST.new ctx
return $! PTX ctx mt lc st simpleIO
{-# INLINE simpleIO #-}
simpleIO :: Executable
simpleIO = Executable $ \_name _ppt range action ->
case range of
Empty -> return ()
IE u v -> action u v 0
data Pool a = Pool
{ managed :: {-# UNPACK #-} !(Pool.Pool a)
, unmanaged :: [a]
}
withPool :: Pool a -> (a -> IO b) -> IO b
withPool p = Pool.with (managed p)
unsafeWithPool :: Pool a -> (a -> b) -> b
unsafeWithPool p = Pool.unsafeWith (managed p)
{-# NOINLINE defaultTarget #-}
defaultTarget :: PTX
defaultTarget = head (unmanaged defaultTargetPool)
{-# NOINLINE defaultTargetPool #-}
defaultTargetPool :: Pool PTX
defaultTargetPool = unsafePerformIO $! do
Debug.traceIO Debug.dump_gc "gc: initialise default PTX pool"
CUDA.initialise []
ngpu <- CUDA.count
menv <- (readMaybe =<<) <$> lookupEnv "ACCELERATE_LLVM_PTX_DEVICES"
let ids = fromMaybe [0..ngpu-1] menv
boot :: Int -> IO (Maybe PTX)
boot i = do
dev <- CUDA.device i
prp <- CUDA.props dev
r <- try $ createTargetForDevice dev prp [CUDA.SchedAuto]
case r of
Right ptx -> return (Just ptx)
Left (e::CUDAException) -> do
Debug.traceIO Debug.dump_gc (printf "gc: failed to initialise device %d: %s" i (show e))
return Nothing
devices <- catMaybes <$> mapM boot ids
if null devices
then error "No CUDA-capable devices are available"
else Pool <$> Pool.create devices
<*> return devices