{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UnboxedTuples #-}
module Data.Array.Accelerate.LLVM.PTX.Array.Prim (
mallocArray,
useArrayAsync,
indexArrayAsync,
peekArrayAsync,
pokeArrayAsync,
copyArrayAsync,
memsetArrayAsync,
withDevicePtr,
) where
import Data.Array.Accelerate.Array.Data
import Data.Array.Accelerate.Array.Unique
import Data.Array.Accelerate.Error
import Data.Array.Accelerate.Lifetime hiding ( withLifetime )
import Data.Array.Accelerate.Representation.Elt
import Data.Array.Accelerate.Representation.Type
import Data.Array.Accelerate.Type
import Data.Array.Accelerate.LLVM.State
import Data.Array.Accelerate.LLVM.PTX.Target
import Data.Array.Accelerate.LLVM.PTX.Execute.Async
import Data.Array.Accelerate.LLVM.PTX.Execute.Event
import Data.Array.Accelerate.LLVM.PTX.Execute.Stream
import Data.Array.Accelerate.LLVM.PTX.Array.Remote as Remote
import qualified Data.Array.Accelerate.LLVM.PTX.Debug as Debug
import qualified Foreign.CUDA.Driver as CUDA
import qualified Foreign.CUDA.Driver.Stream as CUDA
import Control.Monad
import Control.Monad.Reader
import Data.IORef
import GHC.Base
import Text.Printf
import Prelude
{-# INLINEABLE mallocArray #-}
mallocArray
:: HasCallStack
=> SingleType e
-> Int
-> ArrayData e
-> LLVM PTX ()
mallocArray :: SingleType e -> Int -> ArrayData e -> LLVM PTX ()
mallocArray !SingleType e
t !Int
n !ArrayData e
ad = do
String -> LLVM PTX ()
forall (m :: * -> *). MonadIO m => String -> m ()
message (String
"mallocArray: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
showBytes (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* TypeR e -> Int
forall e. TypeR e -> Int
bytesElt (ScalarType e -> TypeR e
forall (s :: * -> *) a. s a -> TupR s a
TupRsingle (SingleType e -> ScalarType e
forall a. SingleType a -> ScalarType a
SingleScalarType SingleType e
t))))
LLVM PTX Bool -> LLVM PTX ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (LLVM PTX Bool -> LLVM PTX ()) -> LLVM PTX Bool -> LLVM PTX ()
forall a b. (a -> b) -> a -> b
$ SingleType e -> ArrayData e -> Int -> Bool -> LLVM PTX Bool
forall e.
SingleType e -> ArrayData e -> Int -> Bool -> LLVM PTX Bool
Remote.malloc SingleType e
t ArrayData e
ad Int
n Bool
False
{-# INLINEABLE useArrayAsync #-}
useArrayAsync
:: HasCallStack
=> SingleType e
-> Int
-> ArrayData e
-> Par PTX (Future (ArrayData e))
useArrayAsync :: SingleType e
-> Int -> ArrayData e -> Par PTX (Future (ArrayData e))
useArrayAsync !SingleType e
t !Int
n !ArrayData e
ad = do
String -> Par PTX ()
forall (m :: * -> *). MonadIO m => String -> m ()
message (String
"useArrayAsync: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
showBytes (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* TypeR e -> Int
forall e. TypeR e -> Int
bytesElt (ScalarType e -> TypeR e
forall (s :: * -> *) a. s a -> TupR s a
TupRsingle (SingleType e -> ScalarType e
forall a. SingleType a -> ScalarType a
SingleScalarType SingleType e
t))))
Bool
alloc <- LLVM PTX Bool -> Par PTX Bool
forall arch a.
(Async arch, HasCallStack) =>
LLVM arch a -> Par arch a
liftPar (LLVM PTX Bool -> Par PTX Bool) -> LLVM PTX Bool -> Par PTX Bool
forall a b. (a -> b) -> a -> b
$ SingleType e -> ArrayData e -> Int -> Bool -> LLVM PTX Bool
forall e.
SingleType e -> ArrayData e -> Int -> Bool -> LLVM PTX Bool
Remote.malloc SingleType e
t ArrayData e
ad Int
n Bool
True
if Bool
alloc
then SingleType e
-> Int -> ArrayData e -> Par PTX (Future (ArrayData e))
forall e.
HasCallStack =>
SingleType e
-> Int -> ArrayData e -> Par PTX (Future (ArrayData e))
pokeArrayAsync SingleType e
t Int
n ArrayData e
ad
else ArrayData e -> Par PTX (FutureR PTX (ArrayData e))
forall arch a.
(Async arch, HasCallStack) =>
a -> Par arch (FutureR arch a)
newFull ArrayData e
ad
{-# INLINEABLE pokeArrayAsync #-}
pokeArrayAsync
:: HasCallStack
=> SingleType e
-> Int
-> ArrayData e
-> Par PTX (Future (ArrayData e))
pokeArrayAsync :: SingleType e
-> Int -> ArrayData e -> Par PTX (Future (ArrayData e))
pokeArrayAsync !SingleType e
t !Int
n !ArrayData e
ad
| SingleArrayDict e
SingleArrayDict <- SingleType e -> SingleArrayDict e
forall a. SingleType a -> SingleArrayDict a
singleArrayDict SingleType e
t
, SingleDict e
SingleDict <- SingleType e -> SingleDict e
forall a. SingleType a -> SingleDict a
singleDict SingleType e
t
= do
let !src :: HostPtr e
src = Ptr e -> HostPtr e
forall a. Ptr a -> HostPtr a
CUDA.HostPtr (UniqueArray e -> Ptr e
forall a. UniqueArray a -> Ptr a
unsafeUniqueArrayPtr ArrayData e
UniqueArray e
ad)
!bytes :: Int
bytes = Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* TypeR e -> Int
forall e. TypeR e -> Int
bytesElt (ScalarType e -> TypeR e
forall (s :: * -> *) a. s a -> TupR s a
TupRsingle (SingleType e -> ScalarType e
forall a. SingleType a -> ScalarType a
SingleScalarType SingleType e
t))
Stream
stream <- (ParState -> Stream) -> Par PTX Stream
forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks ParState -> Stream
ptxStream
Future (UniqueArray e)
result <- LLVM PTX (Future (UniqueArray e))
-> Par PTX (Future (UniqueArray e))
forall arch a.
(Async arch, HasCallStack) =>
LLVM arch a -> Par arch a
liftPar (LLVM PTX (Future (UniqueArray e))
-> Par PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
-> Par PTX (Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$
Stream
-> (Stream -> LLVM PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall (m :: * -> *) a b.
MonadIO m =>
Lifetime a -> (a -> m b) -> m b
withLifetime Stream
stream ((Stream -> LLVM PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e)))
-> (Stream -> LLVM PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$ \Stream
st ->
SingleType e
-> ArrayData e
-> (DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall e r.
HasCallStack =>
SingleType e
-> ArrayData e
-> (DevicePtr (ScalarArrayDataR e) -> LLVM PTX (Maybe Event, r))
-> LLVM PTX r
withDevicePtr SingleType e
t ArrayData e
ad ((DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e)))
-> (DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$ \DevicePtr (ScalarArrayDataR e)
dst ->
Stream
-> LLVM PTX (UniqueArray e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e))
forall a. Stream -> LLVM PTX a -> LLVM PTX (Maybe Event, Future a)
nonblocking Stream
stream (LLVM PTX (UniqueArray e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (UniqueArray e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$ do
String -> Int -> Maybe Stream -> IO () -> LLVM PTX ()
forall (m :: * -> *).
MonadIO m =>
String -> Int -> Maybe Stream -> IO () -> m ()
transfer String
"pokeArray" Int
bytes (Stream -> Maybe Stream
forall a. a -> Maybe a
Just Stream
st) (IO () -> LLVM PTX ()) -> IO () -> LLVM PTX ()
forall a b. (a -> b) -> a -> b
$ do
Int -> HostPtr e -> DevicePtr e -> Maybe Stream -> IO ()
forall a.
Storable a =>
Int -> HostPtr a -> DevicePtr a -> Maybe Stream -> IO ()
CUDA.pokeArrayAsync Int
n HostPtr e
src DevicePtr e
DevicePtr (ScalarArrayDataR e)
dst (Stream -> Maybe Stream
forall a. a -> Maybe a
Just Stream
st)
Int64 -> IO ()
Debug.didCopyBytesToRemote (Int -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
bytes)
UniqueArray e -> LLVM PTX (UniqueArray e)
forall (m :: * -> *) a. Monad m => a -> m a
return ArrayData e
UniqueArray e
ad
Future (UniqueArray e) -> Par PTX (Future (UniqueArray e))
forall (m :: * -> *) a. Monad m => a -> m a
return Future (UniqueArray e)
result
{-# INLINEABLE indexArrayAsync #-}
indexArrayAsync
:: HasCallStack
=> Int
-> SingleType e
-> ArrayData e
-> Int
-> Par PTX (Future (ArrayData e))
indexArrayAsync :: Int
-> SingleType e
-> ArrayData e
-> Int
-> Par PTX (Future (ArrayData e))
indexArrayAsync !Int
n !SingleType e
t !ArrayData e
ad_src !Int
i
| SingleArrayDict e
SingleArrayDict <- SingleType e -> SingleArrayDict e
forall a. SingleType a -> SingleArrayDict a
singleArrayDict SingleType e
t
, SingleDict e
SingleDict <- SingleType e -> SingleDict e
forall a. SingleType a -> SingleDict a
singleDict SingleType e
t
= do
UniqueArray e
ad_dst <- IO (UniqueArray e) -> Par PTX (UniqueArray e)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (UniqueArray e) -> Par PTX (UniqueArray e))
-> IO (UniqueArray e) -> Par PTX (UniqueArray e)
forall a b. (a -> b) -> a -> b
$ TupR ScalarType e -> Int -> IO (ArrayData e)
forall e.
HasCallStack =>
TupR ScalarType e -> Int -> IO (MutableArrayData e)
newArrayData (ScalarType e -> TupR ScalarType e
forall (s :: * -> *) a. s a -> TupR s a
TupRsingle (ScalarType e -> TupR ScalarType e)
-> ScalarType e -> TupR ScalarType e
forall a b. (a -> b) -> a -> b
$ SingleType e -> ScalarType e
forall a. SingleType a -> ScalarType a
SingleScalarType SingleType e
t) Int
n
let !bytes :: Int
bytes = Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* TupR ScalarType e -> Int
forall e. TypeR e -> Int
bytesElt (ScalarType e -> TupR ScalarType e
forall (s :: * -> *) a. s a -> TupR s a
TupRsingle (SingleType e -> ScalarType e
forall a. SingleType a -> ScalarType a
SingleScalarType SingleType e
t))
!dst :: HostPtr e
dst = Ptr e -> HostPtr e
forall a. Ptr a -> HostPtr a
CUDA.HostPtr (UniqueArray e -> Ptr e
forall a. UniqueArray a -> Ptr a
unsafeUniqueArrayPtr UniqueArray e
ad_dst)
Stream
stream <- (ParState -> Stream) -> Par PTX Stream
forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks ParState -> Stream
ptxStream
Future (UniqueArray e)
result <- LLVM PTX (Future (UniqueArray e))
-> Par PTX (Future (UniqueArray e))
forall arch a.
(Async arch, HasCallStack) =>
LLVM arch a -> Par arch a
liftPar (LLVM PTX (Future (UniqueArray e))
-> Par PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
-> Par PTX (Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$
Stream
-> (Stream -> LLVM PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall (m :: * -> *) a b.
MonadIO m =>
Lifetime a -> (a -> m b) -> m b
withLifetime Stream
stream ((Stream -> LLVM PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e)))
-> (Stream -> LLVM PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$ \Stream
st ->
SingleType e
-> ArrayData e
-> (DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall e r.
HasCallStack =>
SingleType e
-> ArrayData e
-> (DevicePtr (ScalarArrayDataR e) -> LLVM PTX (Maybe Event, r))
-> LLVM PTX r
withDevicePtr SingleType e
t ArrayData e
ad_src ((DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e)))
-> (DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$ \DevicePtr (ScalarArrayDataR e)
src ->
Stream
-> LLVM PTX (UniqueArray e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e))
forall a. Stream -> LLVM PTX a -> LLVM PTX (Maybe Event, Future a)
nonblocking Stream
stream (LLVM PTX (UniqueArray e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (UniqueArray e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$ do
String -> Int -> Maybe Stream -> IO () -> LLVM PTX ()
forall (m :: * -> *).
MonadIO m =>
String -> Int -> Maybe Stream -> IO () -> m ()
transfer String
"indexArray" Int
bytes (Stream -> Maybe Stream
forall a. a -> Maybe a
Just Stream
st) (IO () -> LLVM PTX ()) -> IO () -> LLVM PTX ()
forall a b. (a -> b) -> a -> b
$ do
Int -> DevicePtr e -> HostPtr e -> Maybe Stream -> IO ()
forall a.
Storable a =>
Int -> DevicePtr a -> HostPtr a -> Maybe Stream -> IO ()
CUDA.peekArrayAsync Int
n (DevicePtr e
DevicePtr (ScalarArrayDataR e)
src DevicePtr e -> Int -> DevicePtr e
forall a. Storable a => DevicePtr a -> Int -> DevicePtr a
`CUDA.advanceDevPtr` (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
*Int
n)) HostPtr e
dst (Stream -> Maybe Stream
forall a. a -> Maybe a
Just Stream
st)
Int64 -> IO ()
Debug.didCopyBytesFromRemote (Int -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
bytes)
UniqueArray e -> LLVM PTX (UniqueArray e)
forall (m :: * -> *) a. Monad m => a -> m a
return UniqueArray e
ad_dst
Future (UniqueArray e) -> Par PTX (Future (UniqueArray e))
forall (m :: * -> *) a. Monad m => a -> m a
return Future (UniqueArray e)
result
{-# INLINEABLE peekArrayAsync #-}
peekArrayAsync
:: HasCallStack
=> SingleType e
-> Int
-> ArrayData e
-> Par PTX (Future (ArrayData e))
peekArrayAsync :: SingleType e
-> Int -> ArrayData e -> Par PTX (Future (ArrayData e))
peekArrayAsync !SingleType e
t !Int
n !ArrayData e
ad
| SingleArrayDict e
SingleArrayDict <- SingleType e -> SingleArrayDict e
forall a. SingleType a -> SingleArrayDict a
singleArrayDict SingleType e
t
, SingleDict e
SingleDict <- SingleType e -> SingleDict e
forall a. SingleType a -> SingleDict a
singleDict SingleType e
t
= do
let !bytes :: Int
bytes = Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* TypeR e -> Int
forall e. TypeR e -> Int
bytesElt (ScalarType e -> TypeR e
forall (s :: * -> *) a. s a -> TupR s a
TupRsingle (SingleType e -> ScalarType e
forall a. SingleType a -> ScalarType a
SingleScalarType SingleType e
t))
!dst :: HostPtr e
dst = Ptr e -> HostPtr e
forall a. Ptr a -> HostPtr a
CUDA.HostPtr (UniqueArray e -> Ptr e
forall a. UniqueArray a -> Ptr a
unsafeUniqueArrayPtr ArrayData e
UniqueArray e
ad)
Stream
stream <- (ParState -> Stream) -> Par PTX Stream
forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks ParState -> Stream
ptxStream
Future (UniqueArray e)
result <- LLVM PTX (Future (UniqueArray e))
-> Par PTX (Future (UniqueArray e))
forall arch a.
(Async arch, HasCallStack) =>
LLVM arch a -> Par arch a
liftPar (LLVM PTX (Future (UniqueArray e))
-> Par PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
-> Par PTX (Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$
Stream
-> (Stream -> LLVM PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall (m :: * -> *) a b.
MonadIO m =>
Lifetime a -> (a -> m b) -> m b
withLifetime Stream
stream ((Stream -> LLVM PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e)))
-> (Stream -> LLVM PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$ \Stream
st ->
SingleType e
-> ArrayData e
-> (DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall e r.
HasCallStack =>
SingleType e
-> ArrayData e
-> (DevicePtr (ScalarArrayDataR e) -> LLVM PTX (Maybe Event, r))
-> LLVM PTX r
withDevicePtr SingleType e
t ArrayData e
ad ((DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e)))
-> (DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$ \DevicePtr (ScalarArrayDataR e)
src ->
Stream
-> LLVM PTX (UniqueArray e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e))
forall a. Stream -> LLVM PTX a -> LLVM PTX (Maybe Event, Future a)
nonblocking Stream
stream (LLVM PTX (UniqueArray e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (UniqueArray e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$ do
String -> Int -> Maybe Stream -> IO () -> LLVM PTX ()
forall (m :: * -> *).
MonadIO m =>
String -> Int -> Maybe Stream -> IO () -> m ()
transfer String
"peekArray" Int
bytes (Stream -> Maybe Stream
forall a. a -> Maybe a
Just Stream
st) (IO () -> LLVM PTX ()) -> IO () -> LLVM PTX ()
forall a b. (a -> b) -> a -> b
$ do
Int -> DevicePtr e -> HostPtr e -> Maybe Stream -> IO ()
forall a.
Storable a =>
Int -> DevicePtr a -> HostPtr a -> Maybe Stream -> IO ()
CUDA.peekArrayAsync Int
n DevicePtr e
DevicePtr (ScalarArrayDataR e)
src HostPtr e
dst (Stream -> Maybe Stream
forall a. a -> Maybe a
Just Stream
st)
Int64 -> IO ()
Debug.didCopyBytesFromRemote (Int -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
bytes)
UniqueArray e -> LLVM PTX (UniqueArray e)
forall (m :: * -> *) a. Monad m => a -> m a
return ArrayData e
UniqueArray e
ad
Future (UniqueArray e) -> Par PTX (Future (UniqueArray e))
forall (m :: * -> *) a. Monad m => a -> m a
return Future (UniqueArray e)
result
{-# INLINEABLE copyArrayAsync #-}
copyArrayAsync
:: HasCallStack
=> SingleType e
-> Int
-> ArrayData e
-> ArrayData e
-> Par PTX (Future (ArrayData e))
copyArrayAsync :: SingleType e
-> Int
-> ArrayData e
-> ArrayData e
-> Par PTX (Future (ArrayData e))
copyArrayAsync !SingleType e
t !Int
n !ArrayData e
ad_src !ArrayData e
ad_dst
| SingleArrayDict e
SingleArrayDict <- SingleType e -> SingleArrayDict e
forall a. SingleType a -> SingleArrayDict a
singleArrayDict SingleType e
t
, SingleDict e
SingleDict <- SingleType e -> SingleDict e
forall a. SingleType a -> SingleDict a
singleDict SingleType e
t
= do
let !bytes :: Int
bytes = Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* TypeR e -> Int
forall e. TypeR e -> Int
bytesElt (ScalarType e -> TypeR e
forall (s :: * -> *) a. s a -> TupR s a
TupRsingle (SingleType e -> ScalarType e
forall a. SingleType a -> ScalarType a
SingleScalarType SingleType e
t))
Stream
stream <- (ParState -> Stream) -> Par PTX Stream
forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks ParState -> Stream
ptxStream
Future (UniqueArray e)
result <- LLVM PTX (Future (UniqueArray e))
-> Par PTX (Future (UniqueArray e))
forall arch a.
(Async arch, HasCallStack) =>
LLVM arch a -> Par arch a
liftPar (LLVM PTX (Future (UniqueArray e))
-> Par PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
-> Par PTX (Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$
Stream
-> (Stream -> LLVM PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall (m :: * -> *) a b.
MonadIO m =>
Lifetime a -> (a -> m b) -> m b
withLifetime Stream
stream ((Stream -> LLVM PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e)))
-> (Stream -> LLVM PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$ \Stream
st ->
SingleType e
-> ArrayData e
-> (DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall e r.
HasCallStack =>
SingleType e
-> ArrayData e
-> (DevicePtr (ScalarArrayDataR e) -> LLVM PTX (Maybe Event, r))
-> LLVM PTX r
withDevicePtr SingleType e
t ArrayData e
ad_src ((DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e)))
-> (DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$ \DevicePtr (ScalarArrayDataR e)
src ->
SingleType e
-> ArrayData e
-> (DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, (Maybe Event, Future (UniqueArray e))))
-> LLVM PTX (Maybe Event, Future (UniqueArray e))
forall e r.
HasCallStack =>
SingleType e
-> ArrayData e
-> (DevicePtr (ScalarArrayDataR e) -> LLVM PTX (Maybe Event, r))
-> LLVM PTX r
withDevicePtr SingleType e
t ArrayData e
ad_dst ((DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, (Maybe Event, Future (UniqueArray e))))
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> (DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, (Maybe Event, Future (UniqueArray e))))
-> LLVM PTX (Maybe Event, Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$ \DevicePtr (ScalarArrayDataR e)
dst -> do
(Maybe Event
e,Future (UniqueArray e)
r) <- Stream
-> LLVM PTX (UniqueArray e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e))
forall a. Stream -> LLVM PTX a -> LLVM PTX (Maybe Event, Future a)
nonblocking Stream
stream (LLVM PTX (UniqueArray e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (UniqueArray e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$ do
String -> Int -> Maybe Stream -> IO () -> LLVM PTX ()
forall (m :: * -> *).
MonadIO m =>
String -> Int -> Maybe Stream -> IO () -> m ()
transfer String
"copyArray" Int
bytes (Stream -> Maybe Stream
forall a. a -> Maybe a
Just Stream
st) (IO () -> LLVM PTX ()) -> IO () -> LLVM PTX ()
forall a b. (a -> b) -> a -> b
$ Int -> DevicePtr e -> DevicePtr e -> Maybe Stream -> IO ()
forall a.
Storable a =>
Int -> DevicePtr a -> DevicePtr a -> Maybe Stream -> IO ()
CUDA.copyArrayAsync Int
n DevicePtr e
DevicePtr (ScalarArrayDataR e)
src DevicePtr e
DevicePtr (ScalarArrayDataR e)
dst (Stream -> Maybe Stream
forall a. a -> Maybe a
Just Stream
st)
UniqueArray e -> LLVM PTX (UniqueArray e)
forall (m :: * -> *) a. Monad m => a -> m a
return ArrayData e
UniqueArray e
ad_dst
(Maybe Event, (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (Maybe Event, (Maybe Event, Future (UniqueArray e)))
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe Event
e, (Maybe Event
e,Future (UniqueArray e)
r))
Future (UniqueArray e) -> Par PTX (Future (UniqueArray e))
forall (m :: * -> *) a. Monad m => a -> m a
return Future (UniqueArray e)
result
{-# INLINEABLE memsetArrayAsync #-}
memsetArrayAsync
:: HasCallStack
=> SingleType e
-> Int
-> ScalarArrayDataR e
-> ArrayData e
-> Par PTX (Future (ArrayData e))
memsetArrayAsync :: SingleType e
-> Int
-> ScalarArrayDataR e
-> ArrayData e
-> Par PTX (Future (ArrayData e))
memsetArrayAsync !SingleType e
t !Int
n !ScalarArrayDataR e
v !ArrayData e
ad
| SingleArrayDict e
SingleArrayDict <- SingleType e -> SingleArrayDict e
forall a. SingleType a -> SingleArrayDict a
singleArrayDict SingleType e
t
, SingleDict e
SingleDict <- SingleType e -> SingleDict e
forall a. SingleType a -> SingleDict a
singleDict SingleType e
t
= do
let !bytes :: Int
bytes = Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* TypeR e -> Int
forall e. TypeR e -> Int
bytesElt (ScalarType e -> TypeR e
forall (s :: * -> *) a. s a -> TupR s a
TupRsingle (SingleType e -> ScalarType e
forall a. SingleType a -> ScalarType a
SingleScalarType SingleType e
t))
Stream
stream <- (ParState -> Stream) -> Par PTX Stream
forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks ParState -> Stream
ptxStream
Future (UniqueArray e)
result <- LLVM PTX (Future (UniqueArray e))
-> Par PTX (Future (UniqueArray e))
forall arch a.
(Async arch, HasCallStack) =>
LLVM arch a -> Par arch a
liftPar (LLVM PTX (Future (UniqueArray e))
-> Par PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
-> Par PTX (Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$
Stream
-> (Stream -> LLVM PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall (m :: * -> *) a b.
MonadIO m =>
Lifetime a -> (a -> m b) -> m b
withLifetime Stream
stream ((Stream -> LLVM PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e)))
-> (Stream -> LLVM PTX (Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$ \Stream
st ->
SingleType e
-> ArrayData e
-> (DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall e r.
HasCallStack =>
SingleType e
-> ArrayData e
-> (DevicePtr (ScalarArrayDataR e) -> LLVM PTX (Maybe Event, r))
-> LLVM PTX r
withDevicePtr SingleType e
t ArrayData e
ad ((DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e)))
-> (DevicePtr (ScalarArrayDataR e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$ \DevicePtr (ScalarArrayDataR e)
ptr ->
Stream
-> LLVM PTX (UniqueArray e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e))
forall a. Stream -> LLVM PTX a -> LLVM PTX (Maybe Event, Future a)
nonblocking Stream
stream (LLVM PTX (UniqueArray e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e)))
-> LLVM PTX (UniqueArray e)
-> LLVM PTX (Maybe Event, Future (UniqueArray e))
forall a b. (a -> b) -> a -> b
$ do
String -> Int -> Maybe Stream -> IO () -> LLVM PTX ()
forall (m :: * -> *).
MonadIO m =>
String -> Int -> Maybe Stream -> IO () -> m ()
transfer String
"memset" Int
bytes (Stream -> Maybe Stream
forall a. a -> Maybe a
Just Stream
st) (IO () -> LLVM PTX ()) -> IO () -> LLVM PTX ()
forall a b. (a -> b) -> a -> b
$ DevicePtr e -> Int -> e -> Maybe Stream -> IO ()
forall a.
Storable a =>
DevicePtr a -> Int -> a -> Maybe Stream -> IO ()
CUDA.memsetAsync DevicePtr e
DevicePtr (ScalarArrayDataR e)
ptr Int
n e
ScalarArrayDataR e
v (Stream -> Maybe Stream
forall a. a -> Maybe a
Just Stream
st)
UniqueArray e -> LLVM PTX (UniqueArray e)
forall (m :: * -> *) a. Monad m => a -> m a
return ArrayData e
UniqueArray e
ad
Future (UniqueArray e) -> Par PTX (Future (UniqueArray e))
forall (m :: * -> *) a. Monad m => a -> m a
return Future (UniqueArray e)
result
{-# INLINEABLE withDevicePtr #-}
withDevicePtr
:: HasCallStack
=> SingleType e
-> ArrayData e
-> (CUDA.DevicePtr (ScalarArrayDataR e) -> LLVM PTX (Maybe Event, r))
-> LLVM PTX r
withDevicePtr :: SingleType e
-> ArrayData e
-> (DevicePtr (ScalarArrayDataR e) -> LLVM PTX (Maybe Event, r))
-> LLVM PTX r
withDevicePtr !SingleType e
t !ArrayData e
ad !DevicePtr (ScalarArrayDataR e) -> LLVM PTX (Maybe Event, r)
f = do
Maybe r
mr <- SingleType e
-> ArrayData e
-> (DevicePtr (ScalarArrayDataR e) -> LLVM PTX (Maybe Event, r))
-> LLVM PTX (Maybe r)
forall e r.
SingleType e
-> ArrayData e
-> (DevicePtr (ScalarArrayDataR e) -> LLVM PTX (Maybe Event, r))
-> LLVM PTX (Maybe r)
withRemote SingleType e
t ArrayData e
ad DevicePtr (ScalarArrayDataR e) -> LLVM PTX (Maybe Event, r)
f
case Maybe r
mr of
Maybe r
Nothing -> String -> LLVM PTX r
forall a. HasCallStack => String -> a
internalError String
"array does not exist on the device"
Just r
r -> r -> LLVM PTX r
forall (m :: * -> *) a. Monad m => a -> m a
return r
r
{-# INLINE nonblocking #-}
nonblocking :: Stream -> LLVM PTX a -> LLVM PTX (Maybe Event, Future a)
nonblocking :: Stream -> LLVM PTX a -> LLVM PTX (Maybe Event, Future a)
nonblocking !Stream
stream !LLVM PTX a
action = do
a
result <- LLVM PTX a
action
Event
event <- Stream -> LLVM PTX Event
waypoint Stream
stream
Bool
ready <- IO Bool -> LLVM PTX Bool
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (Event -> IO Bool
query Event
event)
if Bool
ready
then do
Future a
future <- IORef (IVar a) -> Future a
forall a. IORef (IVar a) -> Future a
Future (IORef (IVar a) -> Future a)
-> LLVM PTX (IORef (IVar a)) -> LLVM PTX (Future a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO (IORef (IVar a)) -> LLVM PTX (IORef (IVar a))
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IVar a -> IO (IORef (IVar a))
forall a. a -> IO (IORef a)
newIORef (a -> IVar a
forall a. a -> IVar a
Full a
result))
(Maybe Event, Future a) -> LLVM PTX (Maybe Event, Future a)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe Event
forall a. Maybe a
Nothing, Future a
future)
else do
Future a
future <- IORef (IVar a) -> Future a
forall a. IORef (IVar a) -> Future a
Future (IORef (IVar a) -> Future a)
-> LLVM PTX (IORef (IVar a)) -> LLVM PTX (Future a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO (IORef (IVar a)) -> LLVM PTX (IORef (IVar a))
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IVar a -> IO (IORef (IVar a))
forall a. a -> IO (IORef a)
newIORef (Event -> Maybe (Lifetime FunctionTable) -> a -> IVar a
forall a. Event -> Maybe (Lifetime FunctionTable) -> a -> IVar a
Pending Event
event Maybe (Lifetime FunctionTable)
forall a. Maybe a
Nothing a
result))
(Maybe Event, Future a) -> LLVM PTX (Maybe Event, Future a)
forall (m :: * -> *) a. Monad m => a -> m a
return (Event -> Maybe Event
forall a. a -> Maybe a
Just Event
event, Future a
future)
{-# INLINE withLifetime #-}
withLifetime :: MonadIO m => Lifetime a -> (a -> m b) -> m b
withLifetime :: Lifetime a -> (a -> m b) -> m b
withLifetime (Lifetime LTF
ref Weak LTF
_ a
a) a -> m b
f = do
b
r <- a -> m b
f a
a
IO () -> m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (LTF -> IO ()
forall a. IORef a -> IO ()
touchIORef LTF
ref)
b -> m b
forall (m :: * -> *) a. Monad m => a -> m a
return b
r
{-# INLINE touchIORef #-}
touchIORef :: IORef a -> IO ()
touchIORef :: IORef a -> IO ()
touchIORef IORef a
r = (State# RealWorld -> (# State# RealWorld, () #)) -> IO ()
forall a. (State# RealWorld -> (# State# RealWorld, a #)) -> IO a
IO ((State# RealWorld -> (# State# RealWorld, () #)) -> IO ())
-> (State# RealWorld -> (# State# RealWorld, () #)) -> IO ()
forall a b. (a -> b) -> a -> b
$ \State# RealWorld
s -> case IORef a -> State# RealWorld -> State# RealWorld
forall a. a -> State# RealWorld -> State# RealWorld
touch# IORef a
r State# RealWorld
s of State# RealWorld
s' -> (# State# RealWorld
s', () #)
{-# INLINE showBytes #-}
showBytes :: Int -> String
showBytes :: Int -> String
showBytes Int
x = Maybe Int -> Double -> Double -> String -> String
forall a. RealFloat a => Maybe Int -> a -> a -> String -> String
Debug.showFFloatSIBase (Int -> Maybe Int
forall a. a -> Maybe a
Just Int
0) Double
1024 (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
x :: Double) String
"B"
{-# INLINE trace #-}
trace :: MonadIO m => String -> m a -> m a
trace :: String -> m a -> m a
trace String
msg m a
next = IO () -> m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (Flag -> String -> IO ()
Debug.traceIO Flag
Debug.dump_gc (String
"gc: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
msg)) m () -> m a -> m a
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> m a
next
{-# INLINE message #-}
message :: MonadIO m => String -> m ()
message :: String -> m ()
message String
s = String
s String -> m () -> m ()
forall (m :: * -> *) a. MonadIO m => String -> m a -> m a
`trace` () -> m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
{-# INLINE transfer #-}
transfer :: MonadIO m => String -> Int -> Maybe CUDA.Stream -> IO () -> m ()
transfer :: String -> Int -> Maybe Stream -> IO () -> m ()
transfer String
name Int
bytes Maybe Stream
stream IO ()
action =
let showRate :: a -> a -> String
showRate a
x a
t = Maybe Int -> a -> a -> String -> String
forall a. RealFloat a => Maybe Int -> a -> a -> String -> String
Debug.showFFloatSIBase (Int -> Maybe Int
forall a. a -> Maybe a
Just Int
3) a
1024 (a -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
x a -> a -> a
forall a. Fractional a => a -> a -> a
/ a
t) String
"B/s"
msg :: Double -> Double -> Double -> String
msg Double
wall Double
cpu Double
gpu = String -> String -> String -> String -> String -> String
forall r. PrintfType r => String -> r
printf String
"gc: %s: %s @ %s, %s"
String
name
(Int -> String
showBytes Int
bytes)
(Int -> Double -> String
forall a a. (RealFloat a, Integral a) => a -> a -> String
showRate Int
bytes Double
wall)
(Double -> Double -> Double -> String
Debug.elapsed Double
wall Double
cpu Double
gpu)
in
IO () -> m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (Flag
-> (Double -> Double -> Double -> String)
-> Maybe Stream
-> IO ()
-> IO ()
Debug.timed Flag
Debug.dump_gc Double -> Double -> Double -> String
msg Maybe Stream
stream IO ()
action)