accelerate-llvm-ptx-1.1.0.1: Accelerate backend for NVIDIA GPUs

Copyright[2016..2017] Trevor L. McDonell
LicenseBSD3
MaintainerTrevor L. McDonell <tmcdonell@cse.unsw.edu.au>
Stabilityexperimental
Portabilitynon-portable (GHC extensions)
Safe HaskellNone
LanguageHaskell2010

Data.Array.Accelerate.LLVM.PTX.Foreign

Contents

Description

 

Synopsis

Documentation

data ForeignAcc f where Source #

Constructors

ForeignAcc :: String -> (Stream -> a -> LLVM PTX b) -> ForeignAcc (a -> b) 

Instances

Foreign ForeignAcc Source # 

Methods

strForeign :: ForeignAcc args -> String

liftForeign :: ForeignAcc args -> Q (TExp (ForeignAcc args))

data ForeignExp f where Source #

Constructors

ForeignExp :: String -> IRFun1 PTX () (x -> y) -> ForeignExp (x -> y) 

Instances

Foreign ForeignExp Source # 

Methods

strForeign :: ForeignExp args -> String

liftForeign :: ForeignExp args -> Q (TExp (ForeignExp args))

data LLVM target a :: * -> * -> * #

Instances

MonadState target (LLVM target) 

Methods

get :: LLVM target target #

put :: target -> LLVM target () #

state :: (target -> (a, target)) -> LLVM target a #

Execute arch => ExecuteAfun arch (LLVM arch b) 

Associated Types

type ExecAfunR arch (LLVM arch b) :: *

Methods

executeOpenAfun :: ExecOpenAfun arch aenv (ExecAfunR arch (LLVM arch b)) -> LLVM arch (AvalR arch aenv) -> LLVM arch b

Monad (LLVM target) 

Methods

(>>=) :: LLVM target a -> (a -> LLVM target b) -> LLVM target b #

(>>) :: LLVM target a -> LLVM target b -> LLVM target b #

return :: a -> LLVM target a #

fail :: String -> LLVM target a #

Functor (LLVM target) 

Methods

fmap :: (a -> b) -> LLVM target a -> LLVM target b #

(<$) :: a -> LLVM target b -> LLVM target a #

Applicative (LLVM target) 

Methods

pure :: a -> LLVM target a #

(<*>) :: LLVM target (a -> b) -> LLVM target a -> LLVM target b #

liftA2 :: (a -> b -> c) -> LLVM target a -> LLVM target b -> LLVM target c #

(*>) :: LLVM target a -> LLVM target b -> LLVM target b #

(<*) :: LLVM target a -> LLVM target b -> LLVM target a #

MonadIO (LLVM target) 

Methods

liftIO :: IO a -> LLVM target a #

MonadThrow (LLVM target) 

Methods

throwM :: Exception e => e -> LLVM target a #

MonadCatch (LLVM target) 

Methods

catch :: Exception e => LLVM target a -> (e -> LLVM target a) -> LLVM target a #

MonadMask (LLVM target) 

Methods

mask :: ((forall a. LLVM target a -> LLVM target a) -> LLVM target b) -> LLVM target b #

uninterruptibleMask :: ((forall a. LLVM target a -> LLVM target a) -> LLVM target b) -> LLVM target b #

type ExecAfunR arch (LLVM arch b) 
type ExecAfunR arch (LLVM arch b) = b
type RemotePtr (LLVM PTX) 
type RemotePtr (LLVM PTX) = DevicePtr

data PTX Source #

The PTX execution target for NVIDIA GPUs.

The execution target carries state specific for the current execution context. The data here --- device memory and execution streams --- are implicitly tied to this CUDA execution context.

Don't store anything here that is independent of the context, for example state related to [persistent] kernel caching should _not_ go here.

Constructors

PTX 

Fields

Instances

Target PTX Source # 
type StreamR PTX 
type StreamR PTX = Stream
type EventR PTX 
type EventR PTX = Event
data KernelMetadata PTX 
data KernelMetadata PTX = KM_PTX LaunchConfig
type ArgR PTX 
type ArgR PTX = FunParam
data ObjectR PTX 
data ObjectR PTX = ObjectR {}
data ExecutableR PTX 
data ExecutableR PTX = PTXR {}
type RemotePtr (LLVM PTX) 
type RemotePtr (LLVM PTX) = DevicePtr

data Context Source #

An execution context, which is tied to a specific device and CUDA execution context.

Constructors

Context 

Instances

liftIO :: MonadIO m => forall a. IO a -> m a #

Lift a computation from the IO monad.

withDevicePtr :: (ArrayElt e, ArrayPtrs e ~ Ptr a, Typeable e, Typeable a, Storable a) => ArrayData e -> (DevicePtr a -> LLVM PTX (Maybe Event, r)) -> LLVM PTX r Source #

Lookup the device memory associated with a given host array and do something with it.

copyToHostLazy :: Arrays arrs => arrs -> LLVM PTX arrs Source #

Copy an array from the remote device to the host. Although the Accelerate program is hyper-strict and will evaluate the computation as soon as any part of it is demanded, the individual array payloads are copied back to the host _only_ as they are demanded by the Haskell program. This has several consequences:

  1. If the device has multiple memcpy engines, only one will be used. The transfers are however associated with a non-default stream.
  2. Using seq to force an Array to head-normal form will initiate the computation, but not transfer the results back to the host. Requesting an array element or using deepseq to force to normal form is required to actually transfer the data.

cloneArrayAsync :: (Shape sh, Elt e) => Stream -> Array sh e -> LLVM PTX (Array sh e) Source #

Clone an array into a newly allocated array on the device.

type Async a = AsyncR PTX a Source #

type Stream = Lifetime Stream Source #

A Stream represents an independent sequence of computations executed on the GPU. Operations in different streams may be executed concurrently with each other, but operations in the same stream can never overlap. Events can be used for efficient cross-stream synchronisation.

type Event = Lifetime Event Source #

Events can be used for efficient device-side synchronisation between execution streams and between the host.

Orphan instances

Foreign PTX Source # 

Methods

foreignAcc :: (Foreign asm, Typeable * a, Typeable * b) => PTX -> asm (a -> b) -> Maybe (StreamR PTX -> a -> LLVM PTX b)

foreignExp :: (Foreign asm, Typeable * x, Typeable * y) => PTX -> asm (x -> y) -> Maybe (IRFun1 PTX () (x -> y))