module Data.Array.Accelerate.CUDA.Context (
Context(..), create, push, pop, destroy,
keepAlive, fromDeviceContext
) where
import Data.Array.Accelerate.CUDA.Debug ( message, verbose, dump_gc, showFFloatSIBase )
import Data.Array.Accelerate.CUDA.Analysis.Device
import Data.Function ( on )
import Control.Exception ( bracket_ )
import Control.Concurrent ( forkIO, threadDelay )
import Control.Monad ( when )
import GHC.Exts ( Ptr(..), mkWeak# )
import GHC.Base ( IO(..) )
import GHC.Weak ( Weak(..) )
import Text.PrettyPrint
import qualified Foreign.CUDA.Driver as CUDA hiding ( device )
import qualified Foreign.CUDA.Driver.Context as CUDA
data Context = Context {
deviceProperties :: !CUDA.DeviceProperties,
deviceContext :: !CUDA.Context,
weakContext :: !(Weak CUDA.Context)
}
instance Eq Context where
(==) = (==) `on` deviceContext
create :: CUDA.Device -> [CUDA.ContextFlag] -> IO Context
create dev flags = do
ctx <- CUDA.create dev flags >> CUDA.pop >>= keepAlive
actx@(Context prp _ _) <- fromDeviceContext dev ctx
when (CUDA.computeCapability prp >= CUDA.Compute 2 0)
$ bracket_ (CUDA.push ctx) CUDA.pop (CUDA.setCacheConfig CUDA.PreferL1)
message verbose (deviceInfo dev prp)
return actx
fromDeviceContext :: CUDA.Device -> CUDA.Context -> IO Context
fromDeviceContext dev ctx = do
prp <- CUDA.props dev
weak <- mkWeakContext ctx $ do
message dump_gc $ "gc: finalise context #" ++ show (CUDA.useContext ctx)
CUDA.destroy ctx
message dump_gc $ "gc: initialise context #" ++ show (CUDA.useContext ctx)
return $! Context prp ctx weak
destroy :: Context -> IO ()
destroy (deviceContext -> ctx) = do
message dump_gc ("gc: destroy context: #" ++ show (CUDA.useContext ctx))
CUDA.destroy ctx
push :: Context -> IO ()
push (deviceContext -> ctx) = do
message dump_gc ("gc: push context: #" ++ show (CUDA.useContext ctx))
CUDA.push ctx
pop :: IO ()
pop = do
ctx <- CUDA.pop
message dump_gc ("gc: pop context: #" ++ show (CUDA.useContext ctx))
mkWeakContext :: CUDA.Context -> IO () -> IO (Weak CUDA.Context)
mkWeakContext c@(CUDA.Context (Ptr c#)) f = IO $ \s ->
case mkWeak# c# c f s of (# s', w #) -> (# s', Weak w #)
keepAlive :: a -> IO a
keepAlive x = forkIO (caffeine x) >> return x
where
caffeine hit = do threadDelay (5 * 1000 * 1000)
caffeine hit
deviceInfo :: CUDA.Device -> CUDA.DeviceProperties -> String
deviceInfo dev prp = render $ reset <>
devID <> colon <+> vcat [ name <+> parens compute
, processors <+> at <+> text clock <+> parens cores <> comma <+> memory
]
where
name = text (CUDA.deviceName prp)
compute = text "compute capatability" <+> text (show $ CUDA.computeCapability prp)
devID = text "Device" <+> int (fromIntegral $ CUDA.useDevice dev)
processors = int (CUDA.multiProcessorCount prp) <+> text "multiprocessors"
cores = int (CUDA.multiProcessorCount prp * coresPerMultiProcessor prp) <+> text "cores"
memory = text mem <+> text "global memory"
clock = showFFloatSIBase (Just 2) 1000 (fromIntegral $ CUDA.clockRate prp * 1000 :: Double) "Hz"
mem = showFFloatSIBase (Just 0) 1024 (fromIntegral $ CUDA.totalGlobalMem prp :: Double) "B"
at = char '@'
reset = zeroWidthText "\r"