accelerate-llvm-ptx-1.3.0.0: Accelerate backend for NVIDIA GPUs
Copyright[2016..2020] The Accelerate Team
LicenseBSD3
MaintainerTrevor L. McDonell <trevor.mcdonell@gmail.com>
Stabilityexperimental
Portabilitynon-portable (GHC extensions)
Safe HaskellNone
LanguageHaskell2010

Data.Array.Accelerate.LLVM.PTX.Foreign

Description

 
Synopsis

Documentation

data ForeignAcc f where Source #

Constructors

ForeignAcc :: String -> (a -> Par PTX (Future b)) -> ForeignAcc (a -> b) 

Instances

Instances details
Foreign ForeignAcc Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Foreign

data ForeignExp f where Source #

Constructors

ForeignExp :: String -> IRFun1 PTX () (x -> y) -> ForeignExp (x -> y) 

Instances

Instances details
Foreign ForeignExp Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Foreign

data LLVM target a #

The LLVM monad, for executing array computations. This consists of a stack for the LLVM execution context as well as the per-execution target specific state target.

Instances

Instances details
MonadState target (LLVM target) 
Instance details

Defined in Data.Array.Accelerate.LLVM.State

Methods

get :: LLVM target target #

put :: target -> LLVM target () #

state :: (target -> (a, target)) -> LLVM target a #

Monad (LLVM target) 
Instance details

Defined in Data.Array.Accelerate.LLVM.State

Methods

(>>=) :: LLVM target a -> (a -> LLVM target b) -> LLVM target b #

(>>) :: LLVM target a -> LLVM target b -> LLVM target b #

return :: a -> LLVM target a #

Functor (LLVM target) 
Instance details

Defined in Data.Array.Accelerate.LLVM.State

Methods

fmap :: (a -> b) -> LLVM target a -> LLVM target b #

(<$) :: a -> LLVM target b -> LLVM target a #

Applicative (LLVM target) 
Instance details

Defined in Data.Array.Accelerate.LLVM.State

Methods

pure :: a -> LLVM target a #

(<*>) :: LLVM target (a -> b) -> LLVM target a -> LLVM target b #

liftA2 :: (a -> b -> c) -> LLVM target a -> LLVM target b -> LLVM target c #

(*>) :: LLVM target a -> LLVM target b -> LLVM target b #

(<*) :: LLVM target a -> LLVM target b -> LLVM target a #

RemoteMemory (LLVM PTX) 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Array.Remote

Associated Types

type RemotePtr (LLVM PTX) :: Type -> Type

Methods

mallocRemote :: Int -> LLVM PTX (Maybe (RemotePtr (LLVM PTX) Word8))

pokeRemote :: SingleType e -> Int -> RemotePtr (LLVM PTX) (ScalarArrayDataR e) -> ArrayData e -> LLVM PTX ()

peekRemote :: SingleType e -> Int -> RemotePtr (LLVM PTX) (ScalarArrayDataR e) -> MutableArrayData e -> LLVM PTX ()

castRemotePtr :: RemotePtr (LLVM PTX) a -> RemotePtr (LLVM PTX) b

totalRemoteMem :: LLVM PTX Int64

availableRemoteMem :: LLVM PTX Int64

remoteAllocationSize :: LLVM PTX Int

MonadIO (LLVM target) 
Instance details

Defined in Data.Array.Accelerate.LLVM.State

Methods

liftIO :: IO a -> LLVM target a #

MonadMask (LLVM target) 
Instance details

Defined in Data.Array.Accelerate.LLVM.State

Methods

mask :: ((forall a. LLVM target a -> LLVM target a) -> LLVM target b) -> LLVM target b #

uninterruptibleMask :: ((forall a. LLVM target a -> LLVM target a) -> LLVM target b) -> LLVM target b #

generalBracket :: LLVM target a -> (a -> ExitCase b -> LLVM target c) -> (a -> LLVM target b) -> LLVM target (b, c) #

MonadCatch (LLVM target) 
Instance details

Defined in Data.Array.Accelerate.LLVM.State

Methods

catch :: Exception e => LLVM target a -> (e -> LLVM target a) -> LLVM target a #

MonadThrow (LLVM target) 
Instance details

Defined in Data.Array.Accelerate.LLVM.State

Methods

throwM :: Exception e => e -> LLVM target a #

type RemotePtr (LLVM PTX) 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Array.Remote

type RemotePtr (LLVM PTX) = DevicePtr

data PTX Source #

The PTX execution target for NVIDIA GPUs.

The execution target carries state specific for the current execution context. The data here --- device memory and execution streams --- are implicitly tied to this CUDA execution context.

Don't store anything here that is independent of the context, for example state related to [persistent] kernel caching should _not_ go here.

Constructors

PTX 

Fields

Instances

Instances details
Skeleton PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.CodeGen

Methods

generate :: UID -> Gamma aenv -> ArrayR (Array sh e) -> IRFun1 PTX aenv (sh -> e) -> CodeGen PTX (IROpenAcc PTX aenv (Array sh e))

transform :: UID -> Gamma aenv -> ArrayR (Array sh a) -> ArrayR (Array sh' b) -> IRFun1 PTX aenv (sh' -> sh) -> IRFun1 PTX aenv (a -> b) -> CodeGen PTX (IROpenAcc PTX aenv (Array sh' b))

map :: UID -> Gamma aenv -> ArrayR (Array sh a) -> TypeR b -> IRFun1 PTX aenv (a -> b) -> CodeGen PTX (IROpenAcc PTX aenv (Array sh b))

fold :: UID -> Gamma aenv -> ArrayR (Array sh e) -> IRFun2 PTX aenv (e -> e -> e) -> Maybe (IRExp PTX aenv e) -> MIRDelayed PTX aenv (Array (sh, Int) e) -> CodeGen PTX (IROpenAcc PTX aenv (Array sh e))

foldSeg :: UID -> Gamma aenv -> ArrayR (Array (sh, Int) e) -> IntegralType i -> IRFun2 PTX aenv (e -> e -> e) -> Maybe (IRExp PTX aenv e) -> MIRDelayed PTX aenv (Array (sh, Int) e) -> MIRDelayed PTX aenv (Segments i) -> CodeGen PTX (IROpenAcc PTX aenv (Array (sh, Int) e))

scan :: UID -> Gamma aenv -> ArrayR (Array (sh, Int) e) -> Direction -> IRFun2 PTX aenv (e -> e -> e) -> Maybe (IRExp PTX aenv e) -> MIRDelayed PTX aenv (Array (sh, Int) e) -> CodeGen PTX (IROpenAcc PTX aenv (Array (sh, Int) e))

scan' :: UID -> Gamma aenv -> ArrayR (Array (sh, Int) e) -> Direction -> IRFun2 PTX aenv (e -> e -> e) -> IRExp PTX aenv e -> MIRDelayed PTX aenv (Array (sh, Int) e) -> CodeGen PTX (IROpenAcc PTX aenv (Array (sh, Int) e, Array sh e))

permute :: UID -> Gamma aenv -> ArrayR (Array sh e) -> ShapeR sh' -> IRPermuteFun PTX aenv (e -> e -> e) -> IRFun1 PTX aenv (sh -> PrimMaybe sh') -> MIRDelayed PTX aenv (Array sh e) -> CodeGen PTX (IROpenAcc PTX aenv (Array sh' e))

backpermute :: UID -> Gamma aenv -> ArrayR (Array sh e) -> ShapeR sh' -> IRFun1 PTX aenv (sh' -> sh) -> CodeGen PTX (IROpenAcc PTX aenv (Array sh' e))

stencil1 :: UID -> Gamma aenv -> StencilR sh a stencil -> TypeR b -> IRFun1 PTX aenv (stencil -> b) -> IRBoundary PTX aenv (Array sh a) -> MIRDelayed PTX aenv (Array sh a) -> CodeGen PTX (IROpenAcc PTX aenv (Array sh b))

stencil2 :: UID -> Gamma aenv -> StencilR sh a stencil1 -> StencilR sh b stencil2 -> TypeR c -> IRFun2 PTX aenv (stencil1 -> stencil2 -> c) -> IRBoundary PTX aenv (Array sh a) -> MIRDelayed PTX aenv (Array sh a) -> IRBoundary PTX aenv (Array sh b) -> MIRDelayed PTX aenv (Array sh b) -> CodeGen PTX (IROpenAcc PTX aenv (Array sh c))

Persistent PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Compile.Cache

Embed PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Embed

Methods

embedForTarget :: PTX -> ObjectR PTX -> Q (TExp (ExecutableR PTX))

Execute PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute

Methods

map :: Maybe (a :~: b) -> ArrayR (Array sh a) -> TypeR b -> ExecutableR PTX -> Gamma aenv -> ValR PTX aenv -> Array sh a -> Par PTX (FutureR PTX (Array sh b))

generate :: ArrayR (Array sh e) -> ExecutableR PTX -> Gamma aenv -> ValR PTX aenv -> sh -> Par PTX (FutureR PTX (Array sh e))

transform :: ArrayR (Array sh a) -> ArrayR (Array sh' b) -> ExecutableR PTX -> Gamma aenv -> ValR PTX aenv -> sh' -> Array sh a -> Par PTX (FutureR PTX (Array sh' b))

backpermute :: ArrayR (Array sh e) -> ShapeR sh' -> ExecutableR PTX -> Gamma aenv -> ValR PTX aenv -> sh' -> Array sh e -> Par PTX (FutureR PTX (Array sh' e))

fold :: HasInitialValue -> ArrayR (Array sh e) -> ExecutableR PTX -> Gamma aenv -> ValR PTX aenv -> Delayed (Array (sh, Int) e) -> Par PTX (FutureR PTX (Array sh e))

foldSeg :: IntegralType i -> HasInitialValue -> ArrayR (Array (sh, Int) e) -> ExecutableR PTX -> Gamma aenv -> ValR PTX aenv -> Delayed (Array (sh, Int) e) -> Delayed (Segments i) -> Par PTX (FutureR PTX (Array (sh, Int) e))

scan :: Direction -> HasInitialValue -> ArrayR (Array (sh, Int) e) -> ExecutableR PTX -> Gamma aenv -> ValR PTX aenv -> Delayed (Array (sh, Int) e) -> Par PTX (FutureR PTX (Array (sh, Int) e))

scan' :: Direction -> ArrayR (Array (sh, Int) e) -> ExecutableR PTX -> Gamma aenv -> ValR PTX aenv -> Delayed (Array (sh, Int) e) -> Par PTX (FutureR PTX (Array (sh, Int) e, Array sh e))

permute :: Bool -> ArrayR (Array sh e) -> ShapeR sh' -> ExecutableR PTX -> Gamma aenv -> ValR PTX aenv -> Array sh' e -> Delayed (Array sh e) -> Par PTX (FutureR PTX (Array sh' e))

stencil1 :: TypeR a -> ArrayR (Array sh b) -> sh -> ExecutableR PTX -> Gamma aenv -> ValR PTX aenv -> Delayed (Array sh a) -> Par PTX (FutureR PTX (Array sh b))

stencil2 :: TypeR a -> TypeR b -> ArrayR (Array sh c) -> sh -> ExecutableR PTX -> Gamma aenv -> ValR PTX aenv -> Delayed (Array sh a) -> Delayed (Array sh b) -> Par PTX (FutureR PTX (Array sh c))

aforeign :: String -> ArraysR as -> ArraysR bs -> (as -> Par PTX (FutureR PTX bs)) -> as -> Par PTX (FutureR PTX bs)

Link PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Link

Associated Types

data ExecutableR PTX

Methods

linkForTarget :: ObjectR PTX -> LLVM PTX (ExecutableR PTX)

Compile PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Compile

Associated Types

data ObjectR PTX

Methods

compileForTarget :: PreOpenAcc DelayedOpenAcc aenv a -> Gamma aenv -> LLVM PTX (ObjectR PTX)

Marshal PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Marshal

Associated Types

type ArgR PTX

Methods

marshalInt :: Int -> ArgR PTX

marshalScalarData' :: SingleType e -> ScalarArrayData e -> Par PTX (DList (ArgR PTX))

Foreign PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Foreign

Methods

foreignAcc :: Foreign asm => asm (a -> b) -> Maybe (a -> Par PTX (FutureR PTX b))

foreignExp :: Foreign asm => asm (x -> y) -> Maybe (IRFun1 PTX () (x -> y))

Intrinsic PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.CodeGen.Intrinsic

Target PTX Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Target

Remote PTX

Remote memory management for the PTX target. Data can be copied asynchronously using multiple execution engines whenever possible.

Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Array.Data

Methods

allocateRemote :: ArrayR (Array sh e) -> sh -> Par PTX (Array sh e)

useRemoteR :: SingleType e -> Int -> ArrayData e -> Par PTX (FutureR PTX (ArrayData e))

copyToRemoteR :: SingleType e -> Int -> ArrayData e -> Par PTX (FutureR PTX (ArrayData e))

copyToHostR :: SingleType e -> Int -> ArrayData e -> Par PTX (FutureR PTX (ArrayData e))

copyToPeerR :: PTX -> SingleType e -> Int -> ArrayData e -> Par PTX (FutureR PTX (ArrayData e))

useRemoteAsync :: ArraysR arrs -> arrs -> Par PTX (FutureArraysR PTX arrs)

copyToRemoteAsync :: ArraysR arrs -> arrs -> Par PTX (FutureArraysR PTX arrs)

copyToHostAsync :: ArraysR arrs -> arrs -> Par PTX (FutureArraysR PTX arrs)

copyToPeerAsync :: PTX -> ArraysR arrs -> arrs -> Par PTX (FutureArraysR PTX arrs)

indexRemoteAsync :: TypeR e -> Array sh e -> Int -> Par PTX (FutureR PTX e)

Async PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Async

Associated Types

data Par PTX :: Type -> Type

type FutureR PTX :: Type -> Type

Methods

new :: HasCallStack => Par PTX (FutureR PTX a)

put :: HasCallStack => FutureR PTX a -> a -> Par PTX ()

get :: HasCallStack => FutureR PTX a -> Par PTX a

fork :: Par PTX () -> Par PTX ()

liftPar :: HasCallStack => LLVM PTX a -> Par PTX a

block :: HasCallStack => FutureR PTX a -> Par PTX a

spawn :: HasCallStack => Par PTX a -> Par PTX a

newFull :: HasCallStack => a -> Par PTX (FutureR PTX a)

MonadReader ParState (Par PTX) Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Async

Methods

ask :: Par PTX ParState #

local :: (ParState -> ParState) -> Par PTX a -> Par PTX a #

reader :: (ParState -> a) -> Par PTX a #

MonadState PTX (Par PTX) Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Async

Methods

get :: Par PTX PTX #

put :: PTX -> Par PTX () #

state :: (PTX -> (a, PTX)) -> Par PTX a #

Monad (Par PTX) Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Async

Methods

(>>=) :: Par PTX a -> (a -> Par PTX b) -> Par PTX b #

(>>) :: Par PTX a -> Par PTX b -> Par PTX b #

return :: a -> Par PTX a #

Functor (Par PTX) Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Async

Methods

fmap :: (a -> b) -> Par PTX a -> Par PTX b #

(<$) :: a -> Par PTX b -> Par PTX a #

Applicative (Par PTX) Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Async

Methods

pure :: a -> Par PTX a #

(<*>) :: Par PTX (a -> b) -> Par PTX a -> Par PTX b #

liftA2 :: (a -> b -> c) -> Par PTX a -> Par PTX b -> Par PTX c #

(*>) :: Par PTX a -> Par PTX b -> Par PTX b #

(<*) :: Par PTX a -> Par PTX b -> Par PTX a #

RemoteMemory (LLVM PTX) 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Array.Remote

Associated Types

type RemotePtr (LLVM PTX) :: Type -> Type

Methods

mallocRemote :: Int -> LLVM PTX (Maybe (RemotePtr (LLVM PTX) Word8))

pokeRemote :: SingleType e -> Int -> RemotePtr (LLVM PTX) (ScalarArrayDataR e) -> ArrayData e -> LLVM PTX ()

peekRemote :: SingleType e -> Int -> RemotePtr (LLVM PTX) (ScalarArrayDataR e) -> MutableArrayData e -> LLVM PTX ()

castRemotePtr :: RemotePtr (LLVM PTX) a -> RemotePtr (LLVM PTX) b

totalRemoteMem :: LLVM PTX Int64

availableRemoteMem :: LLVM PTX Int64

remoteAllocationSize :: LLVM PTX Int

MonadIO (Par PTX) Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Async

Methods

liftIO :: IO a -> Par PTX a #

data ExecutableR PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Link

data ExecutableR PTX = PTXR {}
data ObjectR PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Compile

data ObjectR PTX = ObjectR {}
type ArgR PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Marshal

type ArgR PTX = FunParam
type FutureR PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Async

type FutureR PTX = Future
newtype Par PTX a 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Async

newtype Par PTX a = Par {}
data KernelMetadata PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.CodeGen.Base

data KernelMetadata PTX = KM_PTX LaunchConfig
type RemotePtr (LLVM PTX) 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Array.Remote

type RemotePtr (LLVM PTX) = DevicePtr

data Context Source #

An execution context, which is tied to a specific device and CUDA execution context.

Constructors

Context 

Instances

Instances details
Eq Context Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Context

Methods

(==) :: Context -> Context -> Bool #

(/=) :: Context -> Context -> Bool #

Hashable Context Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Context

Methods

hashWithSalt :: Int -> Context -> Int #

hash :: Context -> Int #

liftIO :: MonadIO m => IO a -> m a #

Lift a computation from the IO monad.

withDevicePtr :: HasCallStack => SingleType e -> ArrayData e -> (DevicePtr (ScalarArrayDataR e) -> LLVM PTX (Maybe Event, r)) -> LLVM PTX r Source #

Lookup the device memory associated with a given host array and do something with it.

copyToHostLazy :: HasCallStack => ArraysR arrs -> FutureArraysR PTX arrs -> Par PTX arrs Source #

Copy an array from the remote device to the host. Although the Accelerate program is hyper-strict and will evaluate the computation as soon as any part of it is demanded, the individual array payloads are copied back to the host _only_ as they are demanded by the Haskell program. This has several consequences:

  1. If the device has multiple memcpy engines, only one will be used. The transfers are however associated with a non-default stream.
  2. Using seq to force an Array to head-normal form will initiate the computation, but not transfer the results back to the host. Requesting an array element or using deepseq to force to normal form is required to actually transfer the data.

cloneArrayAsync :: ArrayR (Array sh e) -> Array sh e -> Par PTX (Future (Array sh e)) Source #

Clone an array into a newly allocated array on the device.

pattern Par :: ReaderT ParState (LLVM PTX) a -> Par PTX a Source #

data IVar a Source #

Constructors

Full !a 
Pending !Event !(Maybe (Lifetime FunctionTable)) !a 
Empty 

data Future a Source #

Constructors

Future !(IORef (IVar a)) 

type ParState = (Stream, Maybe (Lifetime FunctionTable)) Source #

evalPar :: Par PTX a -> LLVM PTX a Source #

Evaluate a parallel computation

ptxKernel :: ParState -> Maybe (Lifetime FunctionTable) Source #

wait :: Future a -> IO a Source #

Block the calling _host_ thread until the value offered by the future is available.

type Event = Lifetime Event Source #

Events can be used for efficient device-side synchronisation between execution streams and between the host.

query :: Event -> IO Bool Source #

Test whether an event has completed

waypoint :: Stream -> LLVM PTX Event Source #

Create a new event marker that will be filled once execution in the specified stream has completed all previously submitted work.

type Stream = Lifetime Stream Source #

A Stream represents an independent sequence of computations executed on the GPU. Operations in different streams may be executed concurrently with each other, but operations in the same stream can never overlap. Events can be used for efficient cross-stream synchronisation.

Orphan instances

Foreign PTX Source # 
Instance details

Methods

foreignAcc :: Foreign asm => asm (a -> b) -> Maybe (a -> Par PTX (FutureR PTX b))

foreignExp :: Foreign asm => asm (x -> y) -> Maybe (IRFun1 PTX () (x -> y))