{-# LANGUAGE GADTs               #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications    #-}
{-# LANGUAGE ViewPatterns        #-}
{-# OPTIONS_HADDOCK hide #-}
-- |
-- Module      : Data.Array.Accelerate.LLVM.CodeGen.Array
-- Copyright   : [2015..2020] The Accelerate Team
-- License     : BSD3
--
-- Maintainer  : Trevor L. McDonell <trevor.mcdonell@gmail.com>
-- Stability   : experimental
-- Portability : non-portable (GHC extensions)
--

module Data.Array.Accelerate.LLVM.CodeGen.Array (

  readArray,
  writeArray,

) where

import Control.Applicative
import Prelude                                                      hiding ( read )
import Data.Bits

import LLVM.AST.Type.AddrSpace
import LLVM.AST.Type.Instruction
import LLVM.AST.Type.Instruction.Volatile
import LLVM.AST.Type.Operand
import LLVM.AST.Type.Representation

import Data.Array.Accelerate.Representation.Array
import Data.Array.Accelerate.Representation.Type

import Data.Array.Accelerate.LLVM.CodeGen.IR
import Data.Array.Accelerate.LLVM.CodeGen.Monad
import Data.Array.Accelerate.LLVM.CodeGen.Ptr
import Data.Array.Accelerate.LLVM.CodeGen.Sugar
import Data.Array.Accelerate.LLVM.CodeGen.Constant


-- | Read a value from an array at the given index
--
{-# INLINEABLE readArray #-}
readArray
    :: IntegralType int
    -> IRArray (Array sh e)
    -> Operands int
    -> CodeGen arch (Operands e)
readArray :: IntegralType int
-> IRArray (Array sh e)
-> Operands int
-> CodeGen arch (Operands e)
readArray IntegralType int
int (IRArray (ArrayR ShapeR sh
_ TypeR e
tp) Operands sh
_ Operands e
adata AddrSpace
addrspace Volatility
volatility) (IntegralType int -> Operands int -> Operand int
forall (dict :: * -> *) a.
(IROP dict, HasCallStack) =>
dict a -> Operands a -> Operand a
op IntegralType int
int -> Operand int
ix) =
  AddrSpace
-> Volatility
-> IntegralType int
-> Operand int
-> TypeR e
-> Operands e
-> CodeGen arch (Operands e)
forall int e arch.
AddrSpace
-> Volatility
-> IntegralType int
-> Operand int
-> TypeR e
-> Operands e
-> CodeGen arch (Operands e)
readArrayData AddrSpace
addrspace Volatility
volatility IntegralType int
int Operand int
ix TypeR e
tp Operands e
Operands e
adata

readArrayData
    :: AddrSpace
    -> Volatility
    -> IntegralType int
    -> Operand int
    -> TypeR e
    -> Operands e
    -> CodeGen arch (Operands e)
readArrayData :: AddrSpace
-> Volatility
-> IntegralType int
-> Operand int
-> TypeR e
-> Operands e
-> CodeGen arch (Operands e)
readArrayData AddrSpace
a Volatility
v IntegralType int
i Operand int
ix = TypeR e -> Operands e -> CodeGen arch (Operands e)
forall e arch. TypeR e -> Operands e -> CodeGen arch (Operands e)
read
  where
    read :: TypeR e -> Operands e -> CodeGen arch (Operands e)
    read :: TypeR e -> Operands e -> CodeGen arch (Operands e)
read TypeR e
TupRunit          Operands e
OP_Unit                = Operands () -> CodeGen arch (Operands ())
forall (m :: * -> *) a. Monad m => a -> m a
return Operands ()
OP_Unit
    read (TupRpair TupR ScalarType a1
t2 TupR ScalarType b
t1) (OP_Pair a2 a1)         = Operands a1 -> Operands b -> Operands (a1, b)
forall a b. Operands a -> Operands b -> Operands (a, b)
OP_Pair (Operands a1 -> Operands b -> Operands (a1, b))
-> CodeGen arch (Operands a1)
-> CodeGen arch (Operands b -> Operands (a1, b))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TupR ScalarType a1 -> Operands a1 -> CodeGen arch (Operands a1)
forall e arch. TypeR e -> Operands e -> CodeGen arch (Operands e)
read TupR ScalarType a1
t2 Operands a1
a2 CodeGen arch (Operands b -> Operands (a1, b))
-> CodeGen arch (Operands b) -> CodeGen arch (Operands (a1, b))
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> TupR ScalarType b -> Operands b -> CodeGen arch (Operands b)
forall e arch. TypeR e -> Operands e -> CodeGen arch (Operands e)
read TupR ScalarType b
t1 Operands b
a1
    read (TupRsingle ScalarType e
e)   (AddrSpace -> Operand e -> Operand (Ptr e)
forall t. HasCallStack => AddrSpace -> Operand t -> Operand (Ptr t)
asPtr AddrSpace
a (Operand e -> Operand (Ptr e))
-> (Operands e -> Operand e) -> Operands e -> Operand (Ptr e)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ScalarType e -> Operands e -> Operand e
forall (dict :: * -> *) a.
(IROP dict, HasCallStack) =>
dict a -> Operands a -> Operand a
op ScalarType e
e -> Operand (Ptr e)
arr) = ScalarType e -> Operand e -> Operands e
forall (dict :: * -> *) a.
(IROP dict, HasCallStack) =>
dict a -> Operand a -> Operands a
ir ScalarType e
e    (Operand e -> Operands e)
-> CodeGen arch (Operand e) -> CodeGen arch (Operands e)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> AddrSpace
-> Volatility
-> ScalarType e
-> IntegralType int
-> Operand (Ptr e)
-> Operand int
-> CodeGen arch (Operand e)
forall e int arch.
AddrSpace
-> Volatility
-> ScalarType e
-> IntegralType int
-> Operand (Ptr e)
-> Operand int
-> CodeGen arch (Operand e)
readArrayPrim AddrSpace
a Volatility
v ScalarType e
e IntegralType int
i Operand (Ptr e)
arr Operand int
ix

readArrayPrim
    :: AddrSpace
    -> Volatility
    -> ScalarType e
    -> IntegralType int
    -> Operand (Ptr e)
    -> Operand int
    -> CodeGen arch (Operand e)
readArrayPrim :: AddrSpace
-> Volatility
-> ScalarType e
-> IntegralType int
-> Operand (Ptr e)
-> Operand int
-> CodeGen arch (Operand e)
readArrayPrim AddrSpace
a Volatility
v ScalarType e
e IntegralType int
i Operand (Ptr e)
arr Operand int
ix = do
  Operand (Ptr e)
p <- AddrSpace
-> ScalarType e
-> IntegralType int
-> Operand (Ptr e)
-> Operand int
-> CodeGen arch (Operand (Ptr e))
forall e int arch.
AddrSpace
-> ScalarType e
-> IntegralType int
-> Operand (Ptr e)
-> Operand int
-> CodeGen arch (Operand (Ptr e))
getElementPtr AddrSpace
a ScalarType e
e IntegralType int
i Operand (Ptr e)
arr Operand int
ix
  Operand e
x <- AddrSpace
-> ScalarType e
-> Volatility
-> Operand (Ptr e)
-> CodeGen arch (Operand e)
forall e arch.
AddrSpace
-> ScalarType e
-> Volatility
-> Operand (Ptr e)
-> CodeGen arch (Operand e)
load AddrSpace
a ScalarType e
e Volatility
v Operand (Ptr e)
p
  Operand e -> CodeGen arch (Operand e)
forall (m :: * -> *) a. Monad m => a -> m a
return Operand e
x


-- | Write a value into an array at the given index
--
{-# INLINEABLE writeArray #-}
writeArray
    :: IntegralType int
    -> IRArray (Array sh e)
    -> Operands int
    -> Operands e
    -> CodeGen arch ()
writeArray :: IntegralType int
-> IRArray (Array sh e)
-> Operands int
-> Operands e
-> CodeGen arch ()
writeArray IntegralType int
int (IRArray (ArrayR ShapeR sh
_ TypeR e
tp) Operands sh
_ Operands e
adata AddrSpace
addrspace Volatility
volatility) (IntegralType int -> Operands int -> Operand int
forall (dict :: * -> *) a.
(IROP dict, HasCallStack) =>
dict a -> Operands a -> Operand a
op IntegralType int
int -> Operand int
ix) Operands e
val =
  AddrSpace
-> Volatility
-> IntegralType int
-> Operand int
-> TypeR e
-> Operands e
-> Operands e
-> CodeGen arch ()
forall int e arch.
AddrSpace
-> Volatility
-> IntegralType int
-> Operand int
-> TypeR e
-> Operands e
-> Operands e
-> CodeGen arch ()
writeArrayData AddrSpace
addrspace Volatility
volatility IntegralType int
int Operand int
ix TypeR e
tp Operands e
Operands e
adata Operands e
Operands e
val

writeArrayData
    :: AddrSpace
    -> Volatility
    -> IntegralType int
    -> Operand int
    -> TypeR e
    -> Operands e
    -> Operands e
    -> CodeGen arch ()
writeArrayData :: AddrSpace
-> Volatility
-> IntegralType int
-> Operand int
-> TypeR e
-> Operands e
-> Operands e
-> CodeGen arch ()
writeArrayData AddrSpace
a Volatility
v IntegralType int
i Operand int
ix = TypeR e -> Operands e -> Operands e -> CodeGen arch ()
forall e arch.
TypeR e -> Operands e -> Operands e -> CodeGen arch ()
write
  where
    write :: TypeR e -> Operands e -> Operands e -> CodeGen arch ()
    write :: TypeR e -> Operands e -> Operands e -> CodeGen arch ()
write TypeR e
TupRunit          Operands e
OP_Unit                 Operands e
OP_Unit        = () -> CodeGen arch ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
    write (TupRpair TupR ScalarType a1
t2 TupR ScalarType b
t1) (OP_Pair a2 a1)         (OP_Pair v2 v1) = TupR ScalarType b -> Operands b -> Operands b -> CodeGen arch ()
forall e arch.
TypeR e -> Operands e -> Operands e -> CodeGen arch ()
write TupR ScalarType b
t1 Operands b
a1 Operands b
v1 CodeGen arch () -> CodeGen arch () -> CodeGen arch ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> TupR ScalarType a1 -> Operands a1 -> Operands a1 -> CodeGen arch ()
forall e arch.
TypeR e -> Operands e -> Operands e -> CodeGen arch ()
write TupR ScalarType a1
t2 Operands a1
a2 Operands a1
v2
    write (TupRsingle ScalarType e
e)   (AddrSpace -> Operand e -> Operand (Ptr e)
forall t. HasCallStack => AddrSpace -> Operand t -> Operand (Ptr t)
asPtr AddrSpace
a (Operand e -> Operand (Ptr e))
-> (Operands e -> Operand e) -> Operands e -> Operand (Ptr e)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ScalarType e -> Operands e -> Operand e
forall (dict :: * -> *) a.
(IROP dict, HasCallStack) =>
dict a -> Operands a -> Operand a
op ScalarType e
e -> Operand (Ptr e)
arr) (ScalarType e -> Operands e -> Operand e
forall (dict :: * -> *) a.
(IROP dict, HasCallStack) =>
dict a -> Operands a -> Operand a
op ScalarType e
e -> Operand e
val)   = AddrSpace
-> Volatility
-> ScalarType e
-> IntegralType int
-> Operand (Ptr e)
-> Operand int
-> Operand e
-> CodeGen arch ()
forall e int arch.
AddrSpace
-> Volatility
-> ScalarType e
-> IntegralType int
-> Operand (Ptr e)
-> Operand int
-> Operand e
-> CodeGen arch ()
writeArrayPrim AddrSpace
a Volatility
v ScalarType e
e IntegralType int
i Operand (Ptr e)
arr Operand int
ix Operand e
val

writeArrayPrim
    :: AddrSpace
    -> Volatility
    -> ScalarType e
    -> IntegralType int
    -> Operand (Ptr e)
    -> Operand int
    -> Operand e
    -> CodeGen arch ()
writeArrayPrim :: AddrSpace
-> Volatility
-> ScalarType e
-> IntegralType int
-> Operand (Ptr e)
-> Operand int
-> Operand e
-> CodeGen arch ()
writeArrayPrim AddrSpace
a Volatility
v ScalarType e
e IntegralType int
i Operand (Ptr e)
arr Operand int
ix Operand e
x = do
  Operand (Ptr e)
p <- AddrSpace
-> ScalarType e
-> IntegralType int
-> Operand (Ptr e)
-> Operand int
-> CodeGen arch (Operand (Ptr e))
forall e int arch.
AddrSpace
-> ScalarType e
-> IntegralType int
-> Operand (Ptr e)
-> Operand int
-> CodeGen arch (Operand (Ptr e))
getElementPtr AddrSpace
a ScalarType e
e IntegralType int
i Operand (Ptr e)
arr Operand int
ix
  ()
_ <- AddrSpace
-> Volatility
-> ScalarType e
-> Operand (Ptr e)
-> Operand e
-> CodeGen arch ()
forall e arch.
AddrSpace
-> Volatility
-> ScalarType e
-> Operand (Ptr e)
-> Operand e
-> CodeGen arch ()
store AddrSpace
a Volatility
v ScalarType e
e Operand (Ptr e)
p Operand e
x
  () -> CodeGen arch ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()


-- | A wrapper around the GetElementPtr instruction, which correctly
-- computes the pointer offset for non-power-of-two SIMD types
--
getElementPtr
    :: AddrSpace
    -> ScalarType e
    -> IntegralType int
    -> Operand (Ptr e)
    -> Operand int
    -> CodeGen arch (Operand (Ptr e))
getElementPtr :: AddrSpace
-> ScalarType e
-> IntegralType int
-> Operand (Ptr e)
-> Operand int
-> CodeGen arch (Operand (Ptr e))
getElementPtr AddrSpace
_ SingleScalarType{}   IntegralType int
_ Operand (Ptr e)
arr Operand int
ix = Instruction (Ptr e) -> CodeGen arch (Operand (Ptr e))
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction (Ptr e) -> CodeGen arch (Operand (Ptr e)))
-> Instruction (Ptr e) -> CodeGen arch (Operand (Ptr e))
forall a b. (a -> b) -> a -> b
$ Operand (Ptr e) -> [Operand int] -> Instruction (Ptr e)
forall a i. Operand (Ptr a) -> [Operand i] -> Instruction (Ptr a)
GetElementPtr Operand (Ptr e)
arr [Operand int
ix]
getElementPtr AddrSpace
a (VectorScalarType VectorType (Vec n a1)
v) IntegralType int
i Operand (Ptr e)
arr Operand int
ix
  | VectorType Int
n SingleType a1
_ <- VectorType (Vec n a1)
v
  , IntegralDict int
IntegralDict   <- IntegralType int -> IntegralDict int
forall a. IntegralType a -> IntegralDict a
integralDict IntegralType int
i
  = if Int -> Int
forall a. Bits a => a -> Int
popCount Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1
       then Instruction (Ptr e) -> CodeGen arch (Operand (Ptr e))
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction (Ptr e) -> CodeGen arch (Operand (Ptr e)))
-> Instruction (Ptr e) -> CodeGen arch (Operand (Ptr e))
forall a b. (a -> b) -> a -> b
$ Operand (Ptr e) -> [Operand int] -> Instruction (Ptr e)
forall a i. Operand (Ptr a) -> [Operand i] -> Instruction (Ptr a)
GetElementPtr Operand (Ptr e)
arr [Operand int
ix]
       else do
          -- Note the initial zero into to the GEP instruction. It is not
          -- really recommended to use GEP to index into vector elements, but
          -- is not forcefully disallowed (at this time)
          Operand int
ix'  <- Instruction int -> CodeGen arch (Operand int)
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction int -> CodeGen arch (Operand int))
-> Instruction int -> CodeGen arch (Operand int)
forall a b. (a -> b) -> a -> b
$ NumType int -> Operand int -> Operand int -> Instruction int
forall a. NumType a -> Operand a -> Operand a -> Instruction a
Mul (IntegralType int -> NumType int
forall a. IntegralType a -> NumType a
IntegralNumType IntegralType int
i) Operand int
ix (IntegralType int -> int -> Operand int
forall a. IntegralType a -> a -> Operand a
integral IntegralType int
i (Int -> int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n))
          Operand (Ptr e)
p'   <- Instruction (Ptr e) -> CodeGen arch (Operand (Ptr e))
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction (Ptr e) -> CodeGen arch (Operand (Ptr e)))
-> Instruction (Ptr e) -> CodeGen arch (Operand (Ptr e))
forall a b. (a -> b) -> a -> b
$ Operand (Ptr e) -> [Operand int] -> Instruction (Ptr e)
forall a i. Operand (Ptr a) -> [Operand i] -> Instruction (Ptr a)
GetElementPtr Operand (Ptr e)
arr [IntegralType int -> int -> Operand int
forall a. IntegralType a -> a -> Operand a
integral IntegralType int
i int
0, Operand int
ix']
          Operand (Ptr (Vec n a1))
p    <- Instruction (Ptr (Vec n a1))
-> CodeGen arch (Operand (Ptr (Vec n a1)))
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction (Ptr (Vec n a1))
 -> CodeGen arch (Operand (Ptr (Vec n a1))))
-> Instruction (Ptr (Vec n a1))
-> CodeGen arch (Operand (Ptr (Vec n a1)))
forall a b. (a -> b) -> a -> b
$ PrimType (Ptr (Vec n a1))
-> Operand (Ptr e) -> Instruction (Ptr (Vec n a1))
forall b a.
PrimType (Ptr b) -> Operand (Ptr a) -> Instruction (Ptr b)
PtrCast (PrimType (Vec n a1) -> AddrSpace -> PrimType (Ptr (Vec n a1))
forall a. PrimType a -> AddrSpace -> PrimType (Ptr a)
PtrPrimType (ScalarType (Vec n a1) -> PrimType (Vec n a1)
forall a. ScalarType a -> PrimType a
ScalarPrimType (VectorType (Vec n a1) -> ScalarType (Vec n a1)
forall (n :: Nat) a1.
VectorType (Vec n a1) -> ScalarType (Vec n a1)
VectorScalarType VectorType (Vec n a1)
v)) AddrSpace
a) Operand (Ptr e)
p'
          Operand (Ptr (Vec n a1)) -> CodeGen arch (Operand (Ptr (Vec n a1)))
forall (m :: * -> *) a. Monad m => a -> m a
return Operand (Ptr (Vec n a1))
p


-- | A wrapper around the Load instruction, which splits non-power-of-two
-- SIMD types into a sequence of smaller reads.
--
-- Note: [Non-power-of-two loads and stores]
--
-- Splitting this operation a sequence of smaller power-of-two stores does
-- not work because those instructions may (will) violate alignment
-- restrictions, causing a general protection fault. So, we simply
-- implement those stores as a sequence of stores for each individual
-- element.
--
-- We could do runtime checks for what the pointer alignment is and perform
-- a vector store when we align on the right boundary, but I'm not sure the
-- extra complexity is worth it.
--
load :: AddrSpace
     -> ScalarType e
     -> Volatility
     -> Operand (Ptr e)
     -> CodeGen arch (Operand e)
load :: AddrSpace
-> ScalarType e
-> Volatility
-> Operand (Ptr e)
-> CodeGen arch (Operand e)
load AddrSpace
addrspace ScalarType e
e Volatility
v Operand (Ptr e)
p
  | SingleScalarType{} <- ScalarType e
e = Instruction e -> CodeGen arch (Operand e)
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction e -> CodeGen arch (Operand e))
-> Instruction e -> CodeGen arch (Operand e)
forall a b. (a -> b) -> a -> b
$ ScalarType e -> Volatility -> Operand (Ptr e) -> Instruction e
forall a.
ScalarType a -> Volatility -> Operand (Ptr a) -> Instruction a
Load ScalarType e
e Volatility
v Operand (Ptr e)
p
  | VectorScalarType VectorType (Vec n a1)
s <- ScalarType e
e
  , VectorType Int
n SingleType a1
base  <- VectorType (Vec n a1)
s
  , Int32
m                  <- Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n
  = if Int32 -> Int
forall a. Bits a => a -> Int
popCount Int32
m Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1
       then Instruction e -> CodeGen arch (Operand e)
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction e -> CodeGen arch (Operand e))
-> Instruction e -> CodeGen arch (Operand e)
forall a b. (a -> b) -> a -> b
$ ScalarType e -> Volatility -> Operand (Ptr e) -> Instruction e
forall a.
ScalarType a -> Volatility -> Operand (Ptr a) -> Instruction a
Load ScalarType e
e Volatility
v Operand (Ptr e)
p
       else do
         Operand (Ptr a1)
p' <- Instruction (Ptr a1) -> CodeGen arch (Operand (Ptr a1))
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction (Ptr a1) -> CodeGen arch (Operand (Ptr a1)))
-> Instruction (Ptr a1) -> CodeGen arch (Operand (Ptr a1))
forall a b. (a -> b) -> a -> b
$ PrimType (Ptr a1) -> Operand (Ptr e) -> Instruction (Ptr a1)
forall b a.
PrimType (Ptr b) -> Operand (Ptr a) -> Instruction (Ptr b)
PtrCast (PrimType a1 -> AddrSpace -> PrimType (Ptr a1)
forall a. PrimType a -> AddrSpace -> PrimType (Ptr a)
PtrPrimType (ScalarType a1 -> PrimType a1
forall a. ScalarType a -> PrimType a
ScalarPrimType (SingleType a1 -> ScalarType a1
forall a. SingleType a -> ScalarType a
SingleScalarType SingleType a1
base)) AddrSpace
addrspace) Operand (Ptr e)
p
         --
         let go :: Int32 -> Operand (Vec n a1) -> CodeGen arch (Operand (Vec n a1))
go Int32
i Operand (Vec n a1)
w
               | Int32
i Int32 -> Int32 -> Bool
forall a. Ord a => a -> a -> Bool
>= Int32
m    = Operand (Vec n a1) -> CodeGen arch (Operand (Vec n a1))
forall (m :: * -> *) a. Monad m => a -> m a
return Operand (Vec n a1)
w
               | Bool
otherwise = do
                   Operand (Ptr a1)
q  <- Instruction (Ptr a1) -> CodeGen arch (Operand (Ptr a1))
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction (Ptr a1) -> CodeGen arch (Operand (Ptr a1)))
-> Instruction (Ptr a1) -> CodeGen arch (Operand (Ptr a1))
forall a b. (a -> b) -> a -> b
$ Operand (Ptr a1) -> [Operand Int32] -> Instruction (Ptr a1)
forall a i. Operand (Ptr a) -> [Operand i] -> Instruction (Ptr a)
GetElementPtr Operand (Ptr a1)
p' [IntegralType Int32 -> Int32 -> Operand Int32
forall a. IntegralType a -> a -> Operand a
integral IntegralType Int32
forall a. IsIntegral a => IntegralType a
integralType Int32
i]
                   Operand a1
r  <- Instruction a1 -> CodeGen arch (Operand a1)
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction a1 -> CodeGen arch (Operand a1))
-> Instruction a1 -> CodeGen arch (Operand a1)
forall a b. (a -> b) -> a -> b
$ ScalarType a1 -> Volatility -> Operand (Ptr a1) -> Instruction a1
forall a.
ScalarType a -> Volatility -> Operand (Ptr a) -> Instruction a
Load (SingleType a1 -> ScalarType a1
forall a. SingleType a -> ScalarType a
SingleScalarType SingleType a1
base) Volatility
v Operand (Ptr a1)
q
                   Operand (Vec n a1)
w' <- Instruction (Vec n a1) -> CodeGen arch (Operand (Vec n a1))
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction (Vec n a1) -> CodeGen arch (Operand (Vec n a1)))
-> Instruction (Vec n a1) -> CodeGen arch (Operand (Vec n a1))
forall a b. (a -> b) -> a -> b
$ Int32 -> Operand (Vec n a1) -> Operand a1 -> Instruction (Vec n a1)
forall (n :: Nat) a.
Int32 -> Operand (Vec n a) -> Operand a -> Instruction (Vec n a)
InsertElement Int32
i Operand (Vec n a1)
w Operand a1
r
                   Int32 -> Operand (Vec n a1) -> CodeGen arch (Operand (Vec n a1))
go (Int32
iInt32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
+Int32
1) Operand (Vec n a1)
w'
         --
         Int32 -> Operand (Vec n a1) -> CodeGen arch (Operand (Vec n a1))
go Int32
0 (ScalarType e -> Operand e
forall a. ScalarType a -> Operand a
undef ScalarType e
e)


-- | A wrapper around the Store instruction, which splits non-power-of-two
-- SIMD types into a sequence of smaller writes.
--
-- See: [Non-power-of-two loads and stores]
--
store :: AddrSpace
      -> Volatility
      -> ScalarType e
      -> Operand (Ptr e)
      -> Operand e
      -> CodeGen arch ()
store :: AddrSpace
-> Volatility
-> ScalarType e
-> Operand (Ptr e)
-> Operand e
-> CodeGen arch ()
store AddrSpace
addrspace Volatility
volatility ScalarType e
e Operand (Ptr e)
p Operand e
v
  | SingleScalarType{} <- ScalarType e
e = Instruction () -> CodeGen arch ()
forall arch. HasCallStack => Instruction () -> CodeGen arch ()
do_ (Instruction () -> CodeGen arch ())
-> Instruction () -> CodeGen arch ()
forall a b. (a -> b) -> a -> b
$ Volatility -> Operand (Ptr e) -> Operand e -> Instruction ()
forall a.
Volatility -> Operand (Ptr a) -> Operand a -> Instruction ()
Store Volatility
volatility Operand (Ptr e)
p Operand e
v
  | VectorScalarType VectorType (Vec n a1)
s <- ScalarType e
e
  , VectorType Int
n SingleType a1
base  <- VectorType (Vec n a1)
s
  , Int32
m                  <- Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n
  = if Int32 -> Int
forall a. Bits a => a -> Int
popCount Int32
m Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1
       then Instruction () -> CodeGen arch ()
forall arch. HasCallStack => Instruction () -> CodeGen arch ()
do_ (Instruction () -> CodeGen arch ())
-> Instruction () -> CodeGen arch ()
forall a b. (a -> b) -> a -> b
$ Volatility -> Operand (Ptr e) -> Operand e -> Instruction ()
forall a.
Volatility -> Operand (Ptr a) -> Operand a -> Instruction ()
Store Volatility
volatility Operand (Ptr e)
p Operand e
v
       else do
         Operand (Ptr a1)
p' <- Instruction (Ptr a1) -> CodeGen arch (Operand (Ptr a1))
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction (Ptr a1) -> CodeGen arch (Operand (Ptr a1)))
-> Instruction (Ptr a1) -> CodeGen arch (Operand (Ptr a1))
forall a b. (a -> b) -> a -> b
$ PrimType (Ptr a1) -> Operand (Ptr e) -> Instruction (Ptr a1)
forall b a.
PrimType (Ptr b) -> Operand (Ptr a) -> Instruction (Ptr b)
PtrCast (PrimType a1 -> AddrSpace -> PrimType (Ptr a1)
forall a. PrimType a -> AddrSpace -> PrimType (Ptr a)
PtrPrimType (ScalarType a1 -> PrimType a1
forall a. ScalarType a -> PrimType a
ScalarPrimType (SingleType a1 -> ScalarType a1
forall a. SingleType a -> ScalarType a
SingleScalarType SingleType a1
base)) AddrSpace
addrspace) Operand (Ptr e)
p
         --
         let go :: Int32 -> CodeGen arch ()
go Int32
i
               | Int32
i Int32 -> Int32 -> Bool
forall a. Ord a => a -> a -> Bool
>= Int32
m    = () -> CodeGen arch ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
               | Bool
otherwise = do
                   Operand a1
x <- Instruction a1 -> CodeGen arch (Operand a1)
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction a1 -> CodeGen arch (Operand a1))
-> Instruction a1 -> CodeGen arch (Operand a1)
forall a b. (a -> b) -> a -> b
$ Int32 -> Operand (Vec n a1) -> Instruction a1
forall (n :: Nat) a. Int32 -> Operand (Vec n a) -> Instruction a
ExtractElement Int32
i Operand e
Operand (Vec n a1)
v
                   Operand (Ptr a1)
q <- Instruction (Ptr a1) -> CodeGen arch (Operand (Ptr a1))
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction (Ptr a1) -> CodeGen arch (Operand (Ptr a1)))
-> Instruction (Ptr a1) -> CodeGen arch (Operand (Ptr a1))
forall a b. (a -> b) -> a -> b
$ Operand (Ptr a1) -> [Operand Int32] -> Instruction (Ptr a1)
forall a i. Operand (Ptr a) -> [Operand i] -> Instruction (Ptr a)
GetElementPtr Operand (Ptr a1)
p' [IntegralType Int32 -> Int32 -> Operand Int32
forall a. IntegralType a -> a -> Operand a
integral IntegralType Int32
forall a. IsIntegral a => IntegralType a
integralType Int32
i]
                   Operand ()
_ <- Instruction () -> CodeGen arch (Operand ())
forall a arch.
HasCallStack =>
Instruction a -> CodeGen arch (Operand a)
instr' (Instruction () -> CodeGen arch (Operand ()))
-> Instruction () -> CodeGen arch (Operand ())
forall a b. (a -> b) -> a -> b
$ Volatility -> Operand (Ptr a1) -> Operand a1 -> Instruction ()
forall a.
Volatility -> Operand (Ptr a) -> Operand a -> Instruction ()
Store Volatility
volatility Operand (Ptr a1)
q Operand a1
x
                   Int32 -> CodeGen arch ()
go (Int32
iInt32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
+Int32
1)
         Int32 -> CodeGen arch ()
go Int32
0

{--
      let
          go :: forall arch n t. SingleType t -> Int32 -> Operand (Ptr t) -> Operand (Vec n t) -> CodeGen arch ()
          go t offset ptr' val'
            | offset >= size = return ()
            | otherwise      = do
                let remaining = size - offset
                    this      = setBit 0 (finiteBitSize remaining - countLeadingZeros remaining - 1)

                    vec'      = VectorType (fromIntegral this) t
                    ptr_vec'  = PtrPrimType (ScalarPrimType (VectorScalarType vec')) addrspace

                    repack :: Int32 -> Operand (Vec m t) -> CodeGen arch (Operand (Vec m t))
                    repack j u
                      | j >= this = return u
                      | otherwise = do
                          x <- instr' $ ExtractElement (offset + j) val'
                          v <- instr' $ InsertElement j u x
                          repack (j+1) v

                if remaining == 1
                   then do
                     x <- instr' $ ExtractElement offset val'
                     _ <- instr' $ Store volatility ptr' x
                     return ()

                   else do
                     v <- repack 0 $ undef (VectorScalarType vec')
                     p <- instr' $ PtrCast ptr_vec' ptr'
                     _ <- instr' $ Store volatility p v

                     q <- instr' $ GetElementPtr ptr' [integral integralType this]
                     go t (offset + this) q val'

      ptr' <- instr' $ PtrCast (PtrPrimType (ScalarPrimType (SingleScalarType base)) addrspace) ptr
      go base 0 ptr' val

  where
    VectorType (fromIntegral -> size) base = vec
--}