Copyright | [2016..2017] Trevor L. McDonell |
---|---|
License | BSD3 |
Maintainer | Trevor L. McDonell <tmcdonell@cse.unsw.edu.au> |
Stability | experimental |
Portability | non-portable (GHC extensions) |
Safe Haskell | None |
Language | Haskell2010 |
Synopsis
- data ForeignAcc f where
- ForeignAcc :: String -> (Stream -> a -> LLVM PTX b) -> ForeignAcc (a -> b)
- data ForeignExp f where
- ForeignExp :: String -> IRFun1 PTX () (x -> y) -> ForeignExp (x -> y)
- data LLVM target a
- data PTX = PTX {
- ptxContext :: !Context
- ptxMemoryTable :: !MemoryTable
- ptxKernelTable :: !KernelTable
- ptxStreamReservoir :: !Reservoir
- fillP :: !Executable
- data Context = Context {
- deviceProperties :: !DeviceProperties
- deviceContext :: !(Lifetime Context)
- liftIO :: MonadIO m => IO a -> m a
- withDevicePtr :: (ArrayElt e, ArrayPtrs e ~ Ptr a, Typeable e, Typeable a, Storable a) => ArrayData e -> (DevicePtr a -> LLVM PTX (Maybe Event, r)) -> LLVM PTX r
- module Data.Array.Accelerate.LLVM.Array.Data
- copyToHostLazy :: Arrays arrs => arrs -> LLVM PTX arrs
- cloneArrayAsync :: (Shape sh, Elt e) => Stream -> Array sh e -> LLVM PTX (Array sh e)
- type Async a = AsyncR PTX a
- type Stream = Lifetime Stream
- type Event = Lifetime Event
- type family StreamR arch :: Type
- type family EventR arch :: Type
- join :: Async arch => StreamR arch -> LLVM arch ()
- fork :: Async arch => LLVM arch (StreamR arch)
- checkpoint :: Async arch => StreamR arch -> LLVM arch (EventR arch)
- after :: Async arch => StreamR arch -> EventR arch -> LLVM arch ()
- block :: Async arch => EventR arch -> LLVM arch ()
- async :: Async arch => (StreamR arch -> LLVM arch a) -> LLVM arch (AsyncR arch a)
- get :: Async arch => AsyncR arch a -> LLVM arch a
- data AsyncR arch a = AsyncR !(EventR arch) !a
Documentation
data ForeignAcc f where Source #
ForeignAcc :: String -> (Stream -> a -> LLVM PTX b) -> ForeignAcc (a -> b) |
Instances
Foreign ForeignAcc Source # | |
Defined in Data.Array.Accelerate.LLVM.PTX.Foreign strForeign :: ForeignAcc args -> String liftForeign :: ForeignAcc args -> Q (TExp (ForeignAcc args)) |
data ForeignExp f where Source #
ForeignExp :: String -> IRFun1 PTX () (x -> y) -> ForeignExp (x -> y) |
Instances
Foreign ForeignExp Source # | |
Defined in Data.Array.Accelerate.LLVM.PTX.Foreign strForeign :: ForeignExp args -> String liftForeign :: ForeignExp args -> Q (TExp (ForeignExp args)) |
The LLVM monad, for executing array computations. This consists of a stack
for the LLVM execution context as well as the per-execution target specific
state target
.
Instances
MonadState target (LLVM target) | |
Execute arch => ExecuteAfun arch (LLVM arch b) | |
Defined in Data.Array.Accelerate.LLVM.Execute executeOpenAfun :: ExecOpenAfun arch aenv (ExecAfunR arch (LLVM arch b)) -> LLVM arch (AvalR arch aenv) -> LLVM arch b | |
Monad (LLVM target) | |
Functor (LLVM target) | |
Applicative (LLVM target) | |
Defined in Data.Array.Accelerate.LLVM.State | |
RemoteMemory (LLVM PTX) | |
Defined in Data.Array.Accelerate.LLVM.PTX.Array.Remote mallocRemote :: Int -> LLVM PTX (Maybe (RemotePtr (LLVM PTX) Word8)) pokeRemote :: PrimElt e a => Int -> RemotePtr (LLVM PTX) a -> ArrayData e -> LLVM PTX () peekRemote :: PrimElt e a => Int -> RemotePtr (LLVM PTX) a -> MutableArrayData e -> LLVM PTX () castRemotePtr :: proxy (LLVM PTX) -> RemotePtr (LLVM PTX) a -> RemotePtr (LLVM PTX) b totalRemoteMem :: LLVM PTX Int64 | |
MonadIO (LLVM target) | |
Defined in Data.Array.Accelerate.LLVM.State | |
MonadMask (LLVM target) | |
Defined in Data.Array.Accelerate.LLVM.State mask :: ((forall a. LLVM target a -> LLVM target a) -> LLVM target b) -> LLVM target b # uninterruptibleMask :: ((forall a. LLVM target a -> LLVM target a) -> LLVM target b) -> LLVM target b # generalBracket :: LLVM target a -> (a -> ExitCase b -> LLVM target c) -> (a -> LLVM target b) -> LLVM target (b, c) # | |
MonadCatch (LLVM target) | |
MonadThrow (LLVM target) | |
Defined in Data.Array.Accelerate.LLVM.State | |
type ExecAfunR arch (LLVM arch b) | |
Defined in Data.Array.Accelerate.LLVM.Execute type ExecAfunR arch (LLVM arch b) = b | |
type RemotePtr (LLVM PTX) | |
Defined in Data.Array.Accelerate.LLVM.PTX.Array.Remote |
The PTX execution target for NVIDIA GPUs.
The execution target carries state specific for the current execution context. The data here --- device memory and execution streams --- are implicitly tied to this CUDA execution context.
Don't store anything here that is independent of the context, for example state related to [persistent] kernel caching should _not_ go here.
PTX | |
|
Instances
Skeleton PTX | |
Defined in Data.Array.Accelerate.LLVM.PTX.CodeGen generate :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun1 PTX aenv (sh -> e) -> CodeGen (IROpenAcc PTX aenv (Array sh e)) transform :: (Shape sh, Shape sh', Elt a, Elt b) => PTX -> UID -> Gamma aenv -> IRFun1 PTX aenv (sh' -> sh) -> IRFun1 PTX aenv (a -> b) -> IRDelayed PTX aenv (Array sh a) -> CodeGen (IROpenAcc PTX aenv (Array sh' b)) map :: (Shape sh, Elt a, Elt b) => PTX -> UID -> Gamma aenv -> IRFun1 PTX aenv (a -> b) -> IRDelayed PTX aenv (Array sh a) -> CodeGen (IROpenAcc PTX aenv (Array sh b)) fold :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRExp PTX aenv e -> IRDelayed PTX aenv (Array (sh :. Int) e) -> CodeGen (IROpenAcc PTX aenv (Array sh e)) fold1 :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRDelayed PTX aenv (Array (sh :. Int) e) -> CodeGen (IROpenAcc PTX aenv (Array sh e)) foldSeg :: (Shape sh, Elt e, Elt i, IsIntegral i) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRExp PTX aenv e -> IRDelayed PTX aenv (Array (sh :. Int) e) -> IRDelayed PTX aenv (Segments i) -> CodeGen (IROpenAcc PTX aenv (Array (sh :. Int) e)) fold1Seg :: (Shape sh, Elt e, Elt i, IsIntegral i) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRDelayed PTX aenv (Array (sh :. Int) e) -> IRDelayed PTX aenv (Segments i) -> CodeGen (IROpenAcc PTX aenv (Array (sh :. Int) e)) scanl :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRExp PTX aenv e -> IRDelayed PTX aenv (Array (sh :. Int) e) -> CodeGen (IROpenAcc PTX aenv (Array (sh :. Int) e)) scanl' :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRExp PTX aenv e -> IRDelayed PTX aenv (Array (sh :. Int) e) -> CodeGen (IROpenAcc PTX aenv (Array (sh :. Int) e, Array sh e)) scanl1 :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRDelayed PTX aenv (Array (sh :. Int) e) -> CodeGen (IROpenAcc PTX aenv (Array (sh :. Int) e)) scanr :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRExp PTX aenv e -> IRDelayed PTX aenv (Array (sh :. Int) e) -> CodeGen (IROpenAcc PTX aenv (Array (sh :. Int) e)) scanr' :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRExp PTX aenv e -> IRDelayed PTX aenv (Array (sh :. Int) e) -> CodeGen (IROpenAcc PTX aenv (Array (sh :. Int) e, Array sh e)) scanr1 :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRDelayed PTX aenv (Array (sh :. Int) e) -> CodeGen (IROpenAcc PTX aenv (Array (sh :. Int) e)) permute :: (Shape sh, Shape sh', Elt e) => PTX -> UID -> Gamma aenv -> IRPermuteFun PTX aenv (e -> e -> e) -> IRFun1 PTX aenv (sh -> sh') -> IRDelayed PTX aenv (Array sh e) -> CodeGen (IROpenAcc PTX aenv (Array sh' e)) backpermute :: (Shape sh, Shape sh', Elt e) => PTX -> UID -> Gamma aenv -> IRFun1 PTX aenv (sh' -> sh) -> IRDelayed PTX aenv (Array sh e) -> CodeGen (IROpenAcc PTX aenv (Array sh' e)) stencil :: (Stencil sh a stencil, Elt b) => PTX -> UID -> Gamma aenv -> IRFun1 PTX aenv (stencil -> b) -> IRBoundary PTX aenv (Array sh a) -> IRDelayed PTX aenv (Array sh a) -> CodeGen (IROpenAcc PTX aenv (Array sh b)) stencil2 :: (Stencil sh a stencil1, Stencil sh b stencil2, Elt c) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (stencil1 -> stencil2 -> c) -> IRBoundary PTX aenv (Array sh a) -> IRDelayed PTX aenv (Array sh a) -> IRBoundary PTX aenv (Array sh b) -> IRDelayed PTX aenv (Array sh b) -> CodeGen (IROpenAcc PTX aenv (Array sh c)) | |
Persistent PTX | |
Embed PTX | |
Defined in Data.Array.Accelerate.LLVM.PTX.Embed | |
Execute PTX | |
Defined in Data.Array.Accelerate.LLVM.PTX.Execute map :: (Shape sh, Elt b) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> sh -> LLVM PTX (Array sh b) generate :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> sh -> LLVM PTX (Array sh e) transform :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> sh -> LLVM PTX (Array sh e) backpermute :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> sh -> LLVM PTX (Array sh e) fold :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> LLVM PTX (Array sh e) fold1 :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> LLVM PTX (Array sh e) foldSeg :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> DIM1 -> LLVM PTX (Array (sh :. Int) e) fold1Seg :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> DIM1 -> LLVM PTX (Array (sh :. Int) e) scanl :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> LLVM PTX (Array (sh :. Int) e) scanl1 :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> LLVM PTX (Array (sh :. Int) e) scanl' :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> LLVM PTX (Array (sh :. Int) e, Array sh e) scanr :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> LLVM PTX (Array (sh :. Int) e) scanr1 :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> LLVM PTX (Array (sh :. Int) e) scanr' :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> LLVM PTX (Array (sh :. Int) e, Array sh e) permute :: (Shape sh, Shape sh', Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> Bool -> sh -> Array sh' e -> LLVM PTX (Array sh' e) stencil1 :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> sh -> LLVM PTX (Array sh e) stencil2 :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> sh -> sh -> LLVM PTX (Array sh e) aforeign :: (Arrays as, Arrays bs) => String -> (StreamR PTX -> as -> LLVM PTX bs) -> StreamR PTX -> as -> LLVM PTX bs | |
Link PTX | |
Defined in Data.Array.Accelerate.LLVM.PTX.Link linkForTarget :: ObjectR PTX -> LLVM PTX (ExecutableR PTX) | |
Compile PTX | |
Defined in Data.Array.Accelerate.LLVM.PTX.Compile compileForTarget :: DelayedOpenAcc aenv a -> Gamma aenv -> LLVM PTX (ObjectR PTX) | |
Foreign PTX | |
Intrinsic PTX | |
Defined in Data.Array.Accelerate.LLVM.PTX.CodeGen.Intrinsic intrinsicForTarget :: PTX -> HashMap ShortByteString Label | |
Target PTX Source # | |
Defined in Data.Array.Accelerate.LLVM.PTX.Target targetTriple :: PTX -> Maybe ShortByteString targetDataLayout :: PTX -> Maybe DataLayout | |
Remote PTX | |
Defined in Data.Array.Accelerate.LLVM.PTX.Array.Data allocateRemote :: (Shape sh, Elt e) => sh -> LLVM PTX (Array sh e) useRemoteR :: (ArrayElt e, ArrayPtrs e ~ Ptr a, Storable a, Typeable a, Typeable e) => Int -> Maybe (StreamR PTX) -> ArrayData e -> LLVM PTX () copyToRemoteR :: (ArrayElt e, ArrayPtrs e ~ Ptr a, Storable a, Typeable a, Typeable e) => Int -> Int -> Maybe (StreamR PTX) -> ArrayData e -> LLVM PTX () copyToHostR :: (ArrayElt e, ArrayPtrs e ~ Ptr a, Storable a, Typeable a, Typeable e) => Int -> Int -> Maybe (StreamR PTX) -> ArrayData e -> LLVM PTX () copyToPeerR :: (ArrayElt e, ArrayPtrs e ~ Ptr a, Storable a, Typeable a, Typeable e) => Int -> Int -> PTX -> Maybe (StreamR PTX) -> ArrayData e -> LLVM PTX () indexRemote :: Array sh e -> Int -> LLVM PTX e | |
Async PTX | |
Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Async | |
Marshalable PTX Int | |
Marshalable PTX Int32 | |
ArrayElt e => Marshalable PTX (ArrayData e) | |
RemoteMemory (LLVM PTX) | |
Defined in Data.Array.Accelerate.LLVM.PTX.Array.Remote mallocRemote :: Int -> LLVM PTX (Maybe (RemotePtr (LLVM PTX) Word8)) pokeRemote :: PrimElt e a => Int -> RemotePtr (LLVM PTX) a -> ArrayData e -> LLVM PTX () peekRemote :: PrimElt e a => Int -> RemotePtr (LLVM PTX) a -> MutableArrayData e -> LLVM PTX () castRemotePtr :: proxy (LLVM PTX) -> RemotePtr (LLVM PTX) a -> RemotePtr (LLVM PTX) b totalRemoteMem :: LLVM PTX Int64 | |
data ExecutableR PTX | |
Defined in Data.Array.Accelerate.LLVM.PTX.Link | |
data ObjectR PTX | |
Defined in Data.Array.Accelerate.LLVM.PTX.Compile data ObjectR PTX = ObjectR {
| |
type ArgR PTX | |
type EventR PTX Source # | |
Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Async | |
type StreamR PTX Source # | |
Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Async | |
data KernelMetadata PTX | |
Defined in Data.Array.Accelerate.LLVM.PTX.CodeGen.Base | |
type RemotePtr (LLVM PTX) | |
Defined in Data.Array.Accelerate.LLVM.PTX.Array.Remote |
An execution context, which is tied to a specific device and CUDA execution context.
Context | |
|
withDevicePtr :: (ArrayElt e, ArrayPtrs e ~ Ptr a, Typeable e, Typeable a, Storable a) => ArrayData e -> (DevicePtr a -> LLVM PTX (Maybe Event, r)) -> LLVM PTX r Source #
Lookup the device memory associated with a given host array and do something with it.
copyToHostLazy :: Arrays arrs => arrs -> LLVM PTX arrs Source #
Copy an array from the remote device to the host. Although the Accelerate program is hyper-strict and will evaluate the computation as soon as any part of it is demanded, the individual array payloads are copied back to the host _only_ as they are demanded by the Haskell program. This has several consequences:
- If the device has multiple memcpy engines, only one will be used. The transfers are however associated with a non-default stream.
- Using
seq
to force an Array to head-normal form will initiate the computation, but not transfer the results back to the host. Requesting an array element or usingdeepseq
to force to normal form is required to actually transfer the data.
cloneArrayAsync :: (Shape sh, Elt e) => Stream -> Array sh e -> LLVM PTX (Array sh e) Source #
Clone an array into a newly allocated array on the device.
type Event = Lifetime Event Source #
Events can be used for efficient device-side synchronisation between execution streams and between the host.
type family StreamR arch :: Type #
Streams (i.e. threads) can execute concurrently with other streams, but operations within the same stream proceed sequentially.
type family EventR arch :: Type #
An Event marks a point in the execution stream, possibly in the future. Since execution within a stream is sequential, events can be used to test the progress of a computation and synchronise between different streams.
join :: Async arch => StreamR arch -> LLVM arch () #
Mark the given execution stream as closed. The stream may still be executing in the background, but no new work may be submitted to it.
fork :: Async arch => LLVM arch (StreamR arch) #
Create a new execution stream that can be used to track (potentially parallel) computations
checkpoint :: Async arch => StreamR arch -> LLVM arch (EventR arch) #
Generate a new event at the end of the given execution stream. It will be filled once all prior work submitted to the stream has completed.
after :: Async arch => StreamR arch -> EventR arch -> LLVM arch () #
Make all future work submitted to the given execution stream wait until the given event has passed. Typically the event is from a different execution stream, therefore this function is intended to enable non-blocking cross-stream coordination.
block :: Async arch => EventR arch -> LLVM arch () #
Block execution of the calling thread until the given event has been recorded.
async :: Async arch => (StreamR arch -> LLVM arch a) -> LLVM arch (AsyncR arch a) #
Execute the given operation asynchronously in a new execution stream.
get :: Async arch => AsyncR arch a -> LLVM arch a #
Wait for an asynchronous operation to complete, then return it.
The result of a potentially parallel computation which will be available at some point (presumably, in the future). This is essentially a write-once IVar.