accelerate-llvm-ptx-1.2.0.1: Accelerate backend for NVIDIA GPUs

Copyright[2016..2017] Trevor L. McDonell
LicenseBSD3
MaintainerTrevor L. McDonell <tmcdonell@cse.unsw.edu.au>
Stabilityexperimental
Portabilitynon-portable (GHC extensions)
Safe HaskellNone
LanguageHaskell2010

Data.Array.Accelerate.LLVM.PTX.Foreign

Contents

Description

 
Synopsis

Documentation

data ForeignAcc f where Source #

Constructors

ForeignAcc :: String -> (Stream -> a -> LLVM PTX b) -> ForeignAcc (a -> b) 
Instances
Foreign ForeignAcc Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Foreign

Methods

strForeign :: ForeignAcc args -> String

liftForeign :: ForeignAcc args -> Q (TExp (ForeignAcc args))

data ForeignExp f where Source #

Constructors

ForeignExp :: String -> IRFun1 PTX () (x -> y) -> ForeignExp (x -> y) 
Instances
Foreign ForeignExp Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Foreign

Methods

strForeign :: ForeignExp args -> String

liftForeign :: ForeignExp args -> Q (TExp (ForeignExp args))

data LLVM target a #

The LLVM monad, for executing array computations. This consists of a stack for the LLVM execution context as well as the per-execution target specific state target.

Instances
MonadState target (LLVM target) 
Instance details

Defined in Data.Array.Accelerate.LLVM.State

Methods

get :: LLVM target target #

put :: target -> LLVM target () #

state :: (target -> (a, target)) -> LLVM target a #

Execute arch => ExecuteAfun arch (LLVM arch b) 
Instance details

Defined in Data.Array.Accelerate.LLVM.Execute

Associated Types

type ExecAfunR arch (LLVM arch b) :: Type

Methods

executeOpenAfun :: ExecOpenAfun arch aenv (ExecAfunR arch (LLVM arch b)) -> LLVM arch (AvalR arch aenv) -> LLVM arch b

Monad (LLVM target) 
Instance details

Defined in Data.Array.Accelerate.LLVM.State

Methods

(>>=) :: LLVM target a -> (a -> LLVM target b) -> LLVM target b #

(>>) :: LLVM target a -> LLVM target b -> LLVM target b #

return :: a -> LLVM target a #

fail :: String -> LLVM target a #

Functor (LLVM target) 
Instance details

Defined in Data.Array.Accelerate.LLVM.State

Methods

fmap :: (a -> b) -> LLVM target a -> LLVM target b #

(<$) :: a -> LLVM target b -> LLVM target a #

Applicative (LLVM target) 
Instance details

Defined in Data.Array.Accelerate.LLVM.State

Methods

pure :: a -> LLVM target a #

(<*>) :: LLVM target (a -> b) -> LLVM target a -> LLVM target b #

liftA2 :: (a -> b -> c) -> LLVM target a -> LLVM target b -> LLVM target c #

(*>) :: LLVM target a -> LLVM target b -> LLVM target b #

(<*) :: LLVM target a -> LLVM target b -> LLVM target a #

RemoteMemory (LLVM PTX) 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Array.Remote

Associated Types

type RemotePtr (LLVM PTX) :: Type -> Type

Methods

mallocRemote :: Int -> LLVM PTX (Maybe (RemotePtr (LLVM PTX) Word8))

pokeRemote :: PrimElt e a => Int -> RemotePtr (LLVM PTX) a -> ArrayData e -> LLVM PTX ()

peekRemote :: PrimElt e a => Int -> RemotePtr (LLVM PTX) a -> MutableArrayData e -> LLVM PTX ()

castRemotePtr :: proxy (LLVM PTX) -> RemotePtr (LLVM PTX) a -> RemotePtr (LLVM PTX) b

totalRemoteMem :: LLVM PTX Int64

availableRemoteMem :: LLVM PTX Int64

remoteAllocationSize :: LLVM PTX Int

MonadIO (LLVM target) 
Instance details

Defined in Data.Array.Accelerate.LLVM.State

Methods

liftIO :: IO a -> LLVM target a #

MonadMask (LLVM target) 
Instance details

Defined in Data.Array.Accelerate.LLVM.State

Methods

mask :: ((forall a. LLVM target a -> LLVM target a) -> LLVM target b) -> LLVM target b #

uninterruptibleMask :: ((forall a. LLVM target a -> LLVM target a) -> LLVM target b) -> LLVM target b #

generalBracket :: LLVM target a -> (a -> ExitCase b -> LLVM target c) -> (a -> LLVM target b) -> LLVM target (b, c) #

MonadCatch (LLVM target) 
Instance details

Defined in Data.Array.Accelerate.LLVM.State

Methods

catch :: Exception e => LLVM target a -> (e -> LLVM target a) -> LLVM target a #

MonadThrow (LLVM target) 
Instance details

Defined in Data.Array.Accelerate.LLVM.State

Methods

throwM :: Exception e => e -> LLVM target a #

type ExecAfunR arch (LLVM arch b) 
Instance details

Defined in Data.Array.Accelerate.LLVM.Execute

type ExecAfunR arch (LLVM arch b) = b
type RemotePtr (LLVM PTX) 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Array.Remote

type RemotePtr (LLVM PTX) = DevicePtr

data PTX Source #

The PTX execution target for NVIDIA GPUs.

The execution target carries state specific for the current execution context. The data here --- device memory and execution streams --- are implicitly tied to this CUDA execution context.

Don't store anything here that is independent of the context, for example state related to [persistent] kernel caching should _not_ go here.

Constructors

PTX 

Fields

Instances
Skeleton PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.CodeGen

Methods

generate :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun1 PTX aenv (sh -> e) -> CodeGen (IROpenAcc PTX aenv (Array sh e))

transform :: (Shape sh, Shape sh', Elt a, Elt b) => PTX -> UID -> Gamma aenv -> IRFun1 PTX aenv (sh' -> sh) -> IRFun1 PTX aenv (a -> b) -> IRDelayed PTX aenv (Array sh a) -> CodeGen (IROpenAcc PTX aenv (Array sh' b))

map :: (Shape sh, Elt a, Elt b) => PTX -> UID -> Gamma aenv -> IRFun1 PTX aenv (a -> b) -> IRDelayed PTX aenv (Array sh a) -> CodeGen (IROpenAcc PTX aenv (Array sh b))

fold :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRExp PTX aenv e -> IRDelayed PTX aenv (Array (sh :. Int) e) -> CodeGen (IROpenAcc PTX aenv (Array sh e))

fold1 :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRDelayed PTX aenv (Array (sh :. Int) e) -> CodeGen (IROpenAcc PTX aenv (Array sh e))

foldSeg :: (Shape sh, Elt e, Elt i, IsIntegral i) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRExp PTX aenv e -> IRDelayed PTX aenv (Array (sh :. Int) e) -> IRDelayed PTX aenv (Segments i) -> CodeGen (IROpenAcc PTX aenv (Array (sh :. Int) e))

fold1Seg :: (Shape sh, Elt e, Elt i, IsIntegral i) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRDelayed PTX aenv (Array (sh :. Int) e) -> IRDelayed PTX aenv (Segments i) -> CodeGen (IROpenAcc PTX aenv (Array (sh :. Int) e))

scanl :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRExp PTX aenv e -> IRDelayed PTX aenv (Array (sh :. Int) e) -> CodeGen (IROpenAcc PTX aenv (Array (sh :. Int) e))

scanl' :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRExp PTX aenv e -> IRDelayed PTX aenv (Array (sh :. Int) e) -> CodeGen (IROpenAcc PTX aenv (Array (sh :. Int) e, Array sh e))

scanl1 :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRDelayed PTX aenv (Array (sh :. Int) e) -> CodeGen (IROpenAcc PTX aenv (Array (sh :. Int) e))

scanr :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRExp PTX aenv e -> IRDelayed PTX aenv (Array (sh :. Int) e) -> CodeGen (IROpenAcc PTX aenv (Array (sh :. Int) e))

scanr' :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRExp PTX aenv e -> IRDelayed PTX aenv (Array (sh :. Int) e) -> CodeGen (IROpenAcc PTX aenv (Array (sh :. Int) e, Array sh e))

scanr1 :: (Shape sh, Elt e) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (e -> e -> e) -> IRDelayed PTX aenv (Array (sh :. Int) e) -> CodeGen (IROpenAcc PTX aenv (Array (sh :. Int) e))

permute :: (Shape sh, Shape sh', Elt e) => PTX -> UID -> Gamma aenv -> IRPermuteFun PTX aenv (e -> e -> e) -> IRFun1 PTX aenv (sh -> sh') -> IRDelayed PTX aenv (Array sh e) -> CodeGen (IROpenAcc PTX aenv (Array sh' e))

backpermute :: (Shape sh, Shape sh', Elt e) => PTX -> UID -> Gamma aenv -> IRFun1 PTX aenv (sh' -> sh) -> IRDelayed PTX aenv (Array sh e) -> CodeGen (IROpenAcc PTX aenv (Array sh' e))

stencil :: (Stencil sh a stencil, Elt b) => PTX -> UID -> Gamma aenv -> IRFun1 PTX aenv (stencil -> b) -> IRBoundary PTX aenv (Array sh a) -> IRDelayed PTX aenv (Array sh a) -> CodeGen (IROpenAcc PTX aenv (Array sh b))

stencil2 :: (Stencil sh a stencil1, Stencil sh b stencil2, Elt c) => PTX -> UID -> Gamma aenv -> IRFun2 PTX aenv (stencil1 -> stencil2 -> c) -> IRBoundary PTX aenv (Array sh a) -> IRDelayed PTX aenv (Array sh a) -> IRBoundary PTX aenv (Array sh b) -> IRDelayed PTX aenv (Array sh b) -> CodeGen (IROpenAcc PTX aenv (Array sh c))

Persistent PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Compile.Cache

Embed PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Embed

Methods

embedForTarget :: PTX -> ObjectR PTX -> Q (TExp (ExecutableR PTX))

Execute PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute

Methods

map :: (Shape sh, Elt b) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> sh -> LLVM PTX (Array sh b)

generate :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> sh -> LLVM PTX (Array sh e)

transform :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> sh -> LLVM PTX (Array sh e)

backpermute :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> sh -> LLVM PTX (Array sh e)

fold :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> LLVM PTX (Array sh e)

fold1 :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> LLVM PTX (Array sh e)

foldSeg :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> DIM1 -> LLVM PTX (Array (sh :. Int) e)

fold1Seg :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> DIM1 -> LLVM PTX (Array (sh :. Int) e)

scanl :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> LLVM PTX (Array (sh :. Int) e)

scanl1 :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> LLVM PTX (Array (sh :. Int) e)

scanl' :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> LLVM PTX (Array (sh :. Int) e, Array sh e)

scanr :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> LLVM PTX (Array (sh :. Int) e)

scanr1 :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> LLVM PTX (Array (sh :. Int) e)

scanr' :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> (sh :. Int) -> LLVM PTX (Array (sh :. Int) e, Array sh e)

permute :: (Shape sh, Shape sh', Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> Bool -> sh -> Array sh' e -> LLVM PTX (Array sh' e)

stencil1 :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> sh -> LLVM PTX (Array sh e)

stencil2 :: (Shape sh, Elt e) => ExecutableR PTX -> Gamma aenv -> AvalR PTX aenv -> StreamR PTX -> sh -> sh -> LLVM PTX (Array sh e)

aforeign :: (Arrays as, Arrays bs) => String -> (StreamR PTX -> as -> LLVM PTX bs) -> StreamR PTX -> as -> LLVM PTX bs

Link PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Link

Associated Types

data ExecutableR PTX :: Type

Methods

linkForTarget :: ObjectR PTX -> LLVM PTX (ExecutableR PTX)

Compile PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Compile

Associated Types

data ObjectR PTX :: Type

Methods

compileForTarget :: DelayedOpenAcc aenv a -> Gamma aenv -> LLVM PTX (ObjectR PTX)

Foreign PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Foreign

Methods

foreignAcc :: (Foreign asm, Typeable a, Typeable b) => PTX -> asm (a -> b) -> Maybe (StreamR PTX -> a -> LLVM PTX b)

foreignExp :: (Foreign asm, Typeable x, Typeable y) => PTX -> asm (x -> y) -> Maybe (IRFun1 PTX () (x -> y))

Intrinsic PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.CodeGen.Intrinsic

Target PTX Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Target

Remote PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Array.Data

Methods

allocateRemote :: (Shape sh, Elt e) => sh -> LLVM PTX (Array sh e)

useRemoteR :: (ArrayElt e, ArrayPtrs e ~ Ptr a, Storable a, Typeable a, Typeable e) => Int -> Maybe (StreamR PTX) -> ArrayData e -> LLVM PTX ()

copyToRemoteR :: (ArrayElt e, ArrayPtrs e ~ Ptr a, Storable a, Typeable a, Typeable e) => Int -> Int -> Maybe (StreamR PTX) -> ArrayData e -> LLVM PTX ()

copyToHostR :: (ArrayElt e, ArrayPtrs e ~ Ptr a, Storable a, Typeable a, Typeable e) => Int -> Int -> Maybe (StreamR PTX) -> ArrayData e -> LLVM PTX ()

copyToPeerR :: (ArrayElt e, ArrayPtrs e ~ Ptr a, Storable a, Typeable a, Typeable e) => Int -> Int -> PTX -> Maybe (StreamR PTX) -> ArrayData e -> LLVM PTX ()

indexRemote :: Array sh e -> Int -> LLVM PTX e

Async PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Async

Associated Types

type StreamR PTX :: Type #

type EventR PTX :: Type #

Marshalable PTX Int 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Marshal

Methods

marshal' :: PTX -> StreamR PTX -> Int -> IO (DList (ArgR PTX))

Marshalable PTX Int32 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Marshal

Methods

marshal' :: PTX -> StreamR PTX -> Int32 -> IO (DList (ArgR PTX))

ArrayElt e => Marshalable PTX (ArrayData e) 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Marshal

Methods

marshal' :: PTX -> StreamR PTX -> ArrayData e -> IO (DList (ArgR PTX))

RemoteMemory (LLVM PTX) 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Array.Remote

Associated Types

type RemotePtr (LLVM PTX) :: Type -> Type

Methods

mallocRemote :: Int -> LLVM PTX (Maybe (RemotePtr (LLVM PTX) Word8))

pokeRemote :: PrimElt e a => Int -> RemotePtr (LLVM PTX) a -> ArrayData e -> LLVM PTX ()

peekRemote :: PrimElt e a => Int -> RemotePtr (LLVM PTX) a -> MutableArrayData e -> LLVM PTX ()

castRemotePtr :: proxy (LLVM PTX) -> RemotePtr (LLVM PTX) a -> RemotePtr (LLVM PTX) b

totalRemoteMem :: LLVM PTX Int64

availableRemoteMem :: LLVM PTX Int64

remoteAllocationSize :: LLVM PTX Int

data ExecutableR PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Link

data ExecutableR PTX = PTXR {}
data ObjectR PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Compile

data ObjectR PTX = ObjectR {}
type ArgR PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Marshal

type ArgR PTX = FunParam
type EventR PTX Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Async

type StreamR PTX Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Async

data KernelMetadata PTX 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.CodeGen.Base

data KernelMetadata PTX = KM_PTX LaunchConfig
type RemotePtr (LLVM PTX) 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Array.Remote

type RemotePtr (LLVM PTX) = DevicePtr

data Context Source #

An execution context, which is tied to a specific device and CUDA execution context.

Constructors

Context 
Instances
Eq Context Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Context

Methods

(==) :: Context -> Context -> Bool #

(/=) :: Context -> Context -> Bool #

Hashable Context Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Context

Methods

hashWithSalt :: Int -> Context -> Int #

hash :: Context -> Int #

liftIO :: MonadIO m => IO a -> m a #

Lift a computation from the IO monad.

withDevicePtr :: (ArrayElt e, ArrayPtrs e ~ Ptr a, Typeable e, Typeable a, Storable a) => ArrayData e -> (DevicePtr a -> LLVM PTX (Maybe Event, r)) -> LLVM PTX r Source #

Lookup the device memory associated with a given host array and do something with it.

copyToHostLazy :: Arrays arrs => arrs -> LLVM PTX arrs Source #

Copy an array from the remote device to the host. Although the Accelerate program is hyper-strict and will evaluate the computation as soon as any part of it is demanded, the individual array payloads are copied back to the host _only_ as they are demanded by the Haskell program. This has several consequences:

  1. If the device has multiple memcpy engines, only one will be used. The transfers are however associated with a non-default stream.
  2. Using seq to force an Array to head-normal form will initiate the computation, but not transfer the results back to the host. Requesting an array element or using deepseq to force to normal form is required to actually transfer the data.

cloneArrayAsync :: (Shape sh, Elt e) => Stream -> Array sh e -> LLVM PTX (Array sh e) Source #

Clone an array into a newly allocated array on the device.

type Async a = AsyncR PTX a Source #

type Stream = Lifetime Stream Source #

A Stream represents an independent sequence of computations executed on the GPU. Operations in different streams may be executed concurrently with each other, but operations in the same stream can never overlap. Events can be used for efficient cross-stream synchronisation.

type Event = Lifetime Event Source #

Events can be used for efficient device-side synchronisation between execution streams and between the host.

type family StreamR arch :: Type #

Streams (i.e. threads) can execute concurrently with other streams, but operations within the same stream proceed sequentially.

Instances
type StreamR PTX Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Async

type family EventR arch :: Type #

An Event marks a point in the execution stream, possibly in the future. Since execution within a stream is sequential, events can be used to test the progress of a computation and synchronise between different streams.

Instances
type EventR PTX Source # 
Instance details

Defined in Data.Array.Accelerate.LLVM.PTX.Execute.Async

join :: Async arch => StreamR arch -> LLVM arch () #

Mark the given execution stream as closed. The stream may still be executing in the background, but no new work may be submitted to it.

fork :: Async arch => LLVM arch (StreamR arch) #

Create a new execution stream that can be used to track (potentially parallel) computations

checkpoint :: Async arch => StreamR arch -> LLVM arch (EventR arch) #

Generate a new event at the end of the given execution stream. It will be filled once all prior work submitted to the stream has completed.

after :: Async arch => StreamR arch -> EventR arch -> LLVM arch () #

Make all future work submitted to the given execution stream wait until the given event has passed. Typically the event is from a different execution stream, therefore this function is intended to enable non-blocking cross-stream coordination.

block :: Async arch => EventR arch -> LLVM arch () #

Block execution of the calling thread until the given event has been recorded.

async :: Async arch => (StreamR arch -> LLVM arch a) -> LLVM arch (AsyncR arch a) #

Execute the given operation asynchronously in a new execution stream.

get :: Async arch => AsyncR arch a -> LLVM arch a #

Wait for an asynchronous operation to complete, then return it.

data AsyncR arch a #

The result of a potentially parallel computation which will be available at some point (presumably, in the future). This is essentially a write-once IVar.

Constructors

AsyncR !(EventR arch) !a 

Orphan instances

Foreign PTX Source # 
Instance details

Methods

foreignAcc :: (Foreign asm, Typeable a, Typeable b) => PTX -> asm (a -> b) -> Maybe (StreamR PTX -> a -> LLVM PTX b)

foreignExp :: (Foreign asm, Typeable x, Typeable y) => PTX -> asm (x -> y) -> Maybe (IRFun1 PTX () (x -> y))