{-# LANGUAGE CPP #-}
{-# LANGUAGE EmptyDataDecls #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
module Data.Array.Accelerate.LLVM.PTX.Target (
module Data.Array.Accelerate.LLVM.Target,
module Data.Array.Accelerate.LLVM.PTX.Target,
) where
import LLVM.AST.AddrSpace
import LLVM.AST.DataLayout
import LLVM.Target hiding ( Target )
import qualified LLVM.Target as LLVM
import qualified LLVM.Relocation as R
import qualified LLVM.CodeModel as CM
import qualified LLVM.CodeGenOpt as CGO
import Data.Array.Accelerate.Error
import Data.Array.Accelerate.LLVM.Target
import Data.Array.Accelerate.LLVM.Util
import Control.Parallel.Meta ( Executable )
import Data.Array.Accelerate.LLVM.PTX.Array.Table ( MemoryTable )
import Data.Array.Accelerate.LLVM.PTX.Context ( Context, deviceProperties )
import Data.Array.Accelerate.LLVM.PTX.Execute.Stream.Reservoir ( Reservoir )
import qualified Foreign.CUDA.Driver as CUDA
import Control.Monad.Except
import System.IO.Unsafe
import Text.Printf
import qualified Data.Map as Map
import qualified Data.Set as Set
data PTX = PTX {
ptxContext :: {-# UNPACK #-} !Context
, ptxMemoryTable :: {-# UNPACK #-} !MemoryTable
, ptxStreamReservoir :: {-# UNPACK #-} !Reservoir
, fillP :: {-# UNPACK #-} !Executable
}
instance Target PTX where
targetTriple _ = Just ptxTargetTriple
#if ACCELERATE_USE_NVVM
targetDataLayout _ = Nothing
#else
targetDataLayout _ = Just ptxDataLayout
#endif
ptxDeviceProperties :: PTX -> CUDA.DeviceProperties
ptxDeviceProperties = deviceProperties . ptxContext
ptxDataLayout :: DataLayout
ptxDataLayout = DataLayout
{ endianness = LittleEndian
, mangling = Nothing
, aggregateLayout = AlignmentInfo 0 (Just 64)
, stackAlignment = Nothing
, pointerLayouts = Map.fromList
[ (AddrSpace 0, (wordSize, AlignmentInfo wordSize (Just wordSize))) ]
, typeLayouts = Map.fromList $
[ ((IntegerAlign, 1), AlignmentInfo 8 (Just 8)) ] ++
[ ((IntegerAlign, i), AlignmentInfo i (Just i)) | i <- [8,16,32,64]] ++
[ ((VectorAlign, v), AlignmentInfo v (Just v)) | v <- [16,32,64,128]] ++
[ ((FloatAlign, f), AlignmentInfo f (Just f)) | f <- [32,64] ]
, nativeSizes = Just $ Set.fromList [ 16,32,64 ]
}
where
wordSize = bitSize (undefined :: Int)
ptxTargetTriple :: String
ptxTargetTriple =
case bitSize (undefined::Int) of
32 -> "nvptx-nvidia-cuda"
64 -> "nvptx64-nvidia-cuda"
_ -> $internalError "ptxTargetTriple" "I don't know what architecture I am"
withPTXTargetMachine
:: CUDA.DeviceProperties
-> (TargetMachine -> IO a)
-> IO a
withPTXTargetMachine dev go =
let CUDA.Compute m n = CUDA.computeCapability dev
isa = ptxISAVersion m n
sm = printf "sm_%d%d" m n
in
withTargetOptions $ \options -> do
withTargetMachine
ptxTarget
ptxTargetTriple
sm
(Map.singleton isa True)
options
R.Default
CM.Default
CGO.Default
go
ptxISAVersion :: Int -> Int -> CPUFeature
ptxISAVersion 2 _ = CPUFeature "ptx40"
ptxISAVersion 3 7 = CPUFeature "ptx41"
ptxISAVersion 3 _ = CPUFeature "ptx40"
ptxISAVersion 5 0 = CPUFeature "ptx40"
ptxISAVersion 5 2 = CPUFeature "ptx41"
ptxISAVersion 5 3 = CPUFeature "ptx42"
ptxISAVersion 6 _ = CPUFeature "ptx50"
ptxISAVersion _ _ = CPUFeature "ptx40"
{-# NOINLINE ptxTarget #-}
ptxTarget :: LLVM.Target
ptxTarget = unsafePerformIO $ do
initializeAllTargets
either error fst `fmap` runExceptT (lookupTarget Nothing ptxTargetTriple)