module Data.Primitive.SIMD.Int32X16 (Int32X16) where
import Data.Primitive.SIMD.Class
import GHC.Int
import GHC.Types
import GHC.Prim
import GHC.Ptr
import GHC.ST
import Foreign.Storable
import Control.Monad.Primitive
import Data.Primitive.Types
import Data.Primitive.ByteArray
import Data.Primitive.Addr
import Data.Monoid
import Data.Typeable
import qualified Data.Vector.Primitive as PV
import qualified Data.Vector.Primitive.Mutable as PMV
import Data.Vector.Unboxed (Unbox)
import qualified Data.Vector.Unboxed as UV
import Data.Vector.Generic (Vector(..))
import Data.Vector.Generic.Mutable (MVector(..))
data Int32X16 = Int32X16 Int32X4# Int32X4# Int32X4# Int32X4# deriving Typeable
abs' :: Int32 -> Int32
abs' (I32# x) = I32# (abs# x)
abs# :: Int# -> Int#
abs# x = case abs (I32# x) of
I32# y -> y
signum' :: Int32 -> Int32
signum' (I32# x) = I32# (signum# x)
signum# :: Int# -> Int#
signum# x = case signum (I32# x) of
I32# y -> y
instance Eq Int32X16 where
a == b = case unpackInt32X16 a of
(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16) -> case unpackInt32X16 b of
(y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14, y15, y16) -> x1 == y1 && x2 == y2 && x3 == y3 && x4 == y4 && x5 == y5 && x6 == y6 && x7 == y7 && x8 == y8 && x9 == y9 && x10 == y10 && x11 == y11 && x12 == y12 && x13 == y13 && x14 == y14 && x15 == y15 && x16 == y16
instance Ord Int32X16 where
a `compare` b = case unpackInt32X16 a of
(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16) -> case unpackInt32X16 b of
(y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14, y15, y16) -> x1 `compare` y1 <> x2 `compare` y2 <> x3 `compare` y3 <> x4 `compare` y4 <> x5 `compare` y5 <> x6 `compare` y6 <> x7 `compare` y7 <> x8 `compare` y8 <> x9 `compare` y9 <> x10 `compare` y10 <> x11 `compare` y11 <> x12 `compare` y12 <> x13 `compare` y13 <> x14 `compare` y14 <> x15 `compare` y15 <> x16 `compare` y16
instance Show Int32X16 where
showsPrec _ a s = case unpackInt32X16 a of
(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16) -> "Int32X16 (" ++ shows x1 (", " ++ shows x2 (", " ++ shows x3 (", " ++ shows x4 (", " ++ shows x5 (", " ++ shows x6 (", " ++ shows x7 (", " ++ shows x8 (", " ++ shows x9 (", " ++ shows x10 (", " ++ shows x11 (", " ++ shows x12 (", " ++ shows x13 (", " ++ shows x14 (", " ++ shows x15 (", " ++ shows x16 (")" ++ s))))))))))))))))
instance Num Int32X16 where
(+) = plusInt32X16
() = minusInt32X16
(*) = timesInt32X16
negate = negateInt32X16
abs = mapVector abs'
signum = mapVector signum'
fromInteger = broadcastVector . fromInteger
instance Bounded Int32X16 where
minBound = broadcastVector minBound
maxBound = broadcastVector maxBound
instance Storable Int32X16 where
sizeOf x = vectorSize x * elementSize x
alignment = sizeOf
peek (Ptr a) = readOffAddr (Addr a) 0
poke (Ptr a) = writeOffAddr (Addr a) 0
instance SIMDVector Int32X16 where
type Elem Int32X16 = Int32
type ElemTuple Int32X16 = (Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32)
nullVector = broadcastVector 0
vectorSize _ = 16
elementSize _ = 4
broadcastVector = broadcastInt32X16
unsafeInsertVector = unsafeInsertInt32X16
packVector = packInt32X16
unpackVector = unpackInt32X16
mapVector = mapInt32X16
zipVector = zipInt32X16
foldVector = foldInt32X16
sumVector = sumInt32X16
instance SIMDIntVector Int32X16 where
quotVector = quotInt32X16
remVector = remInt32X16
instance Prim Int32X16 where
sizeOf# a = let !(I# x) = sizeOf a in x
alignment# a = let !(I# x) = alignment a in x
indexByteArray# ba i = indexInt32X16Array (ByteArray ba) (I# i)
readByteArray# mba i s = let (ST r) = readInt32X16Array (MutableByteArray mba) (I# i) in r s
writeByteArray# mba i v s = let (ST r) = writeInt32X16Array (MutableByteArray mba) (I# i) v in case r s of { (# s', _ #) -> s' }
setByteArray# mba off n v s = let (ST r) = setByteArrayGeneric (MutableByteArray mba) (I# off) (I# n) v in case r s of { (# s', _ #) -> s' }
indexOffAddr# addr i = indexInt32X16OffAddr (Addr addr) (I# i)
readOffAddr# addr i s = let (ST r) = readInt32X16OffAddr (Addr addr) (I# i) in r s
writeOffAddr# addr i v s = let (ST r) = writeInt32X16OffAddr (Addr addr) (I# i) v in case r s of { (# s', _ #) -> s' }
setOffAddr# addr off n v s = let (ST r) = setOffAddrGeneric (Addr addr) (I# off) (I# n) v in case r s of { (# s', _ #) -> s' }
newtype instance UV.Vector Int32X16 = V_Int32X16 (PV.Vector Int32X16)
newtype instance UV.MVector s Int32X16 = MV_Int32X16 (PMV.MVector s Int32X16)
instance Vector UV.Vector Int32X16 where
basicUnsafeFreeze (MV_Int32X16 v) = V_Int32X16 <$> PV.unsafeFreeze v
basicUnsafeThaw (V_Int32X16 v) = MV_Int32X16 <$> PV.unsafeThaw v
basicLength (V_Int32X16 v) = PV.length v
basicUnsafeSlice start len (V_Int32X16 v) = V_Int32X16(PV.unsafeSlice start len v)
basicUnsafeIndexM (V_Int32X16 v) = PV.unsafeIndexM v
basicUnsafeCopy (MV_Int32X16 m) (V_Int32X16 v) = PV.unsafeCopy m v
elemseq _ = seq
instance MVector UV.MVector Int32X16 where
basicLength (MV_Int32X16 v) = PMV.length v
basicUnsafeSlice start len (MV_Int32X16 v) = MV_Int32X16(PMV.unsafeSlice start len v)
basicOverlaps (MV_Int32X16 v) (MV_Int32X16 w) = PMV.overlaps v w
basicUnsafeNew len = MV_Int32X16 <$> PMV.unsafeNew len
#if MIN_VERSION_vector(0,11,0)
basicInitialize (MV_Int32X16 v) = basicInitialize v
#endif
basicUnsafeRead (MV_Int32X16 v) = PMV.unsafeRead v
basicUnsafeWrite (MV_Int32X16 v) = PMV.unsafeWrite v
instance Unbox Int32X16
broadcastInt32X16 :: Int32 -> Int32X16
broadcastInt32X16 (I32# x) = case broadcastInt32X4# x of
v -> Int32X16 v v v v
packInt32X16 :: (Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32) -> Int32X16
packInt32X16 (I32# x1, I32# x2, I32# x3, I32# x4, I32# x5, I32# x6, I32# x7, I32# x8, I32# x9, I32# x10, I32# x11, I32# x12, I32# x13, I32# x14, I32# x15, I32# x16) = Int32X16 (packInt32X4# (# x1, x2, x3, x4 #)) (packInt32X4# (# x5, x6, x7, x8 #)) (packInt32X4# (# x9, x10, x11, x12 #)) (packInt32X4# (# x13, x14, x15, x16 #))
unpackInt32X16 :: Int32X16 -> (Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Int32)
unpackInt32X16 (Int32X16 m1 m2 m3 m4) = case unpackInt32X4# m1 of
(# x1, x2, x3, x4 #) -> case unpackInt32X4# m2 of
(# x5, x6, x7, x8 #) -> case unpackInt32X4# m3 of
(# x9, x10, x11, x12 #) -> case unpackInt32X4# m4 of
(# x13, x14, x15, x16 #) -> (I32# x1, I32# x2, I32# x3, I32# x4, I32# x5, I32# x6, I32# x7, I32# x8, I32# x9, I32# x10, I32# x11, I32# x12, I32# x13, I32# x14, I32# x15, I32# x16)
unsafeInsertInt32X16 :: Int32X16 -> Int32 -> Int -> Int32X16
unsafeInsertInt32X16 (Int32X16 m1 m2 m3 m4) (I32# y) _i@(I# ip) | _i < 4 = Int32X16 (insertInt32X4# m1 y (ip -# 0#)) m2 m3 m4
| _i < 8 = Int32X16 m1 (insertInt32X4# m2 y (ip -# 4#)) m3 m4
| _i < 12 = Int32X16 m1 m2 (insertInt32X4# m3 y (ip -# 8#)) m4
| otherwise = Int32X16 m1 m2 m3 (insertInt32X4# m4 y (ip -# 12#))
mapInt32X16 :: (Int32 -> Int32) -> Int32X16 -> Int32X16
mapInt32X16 f = mapInt32X16# (\ x -> case f (I32# x) of { I32# y -> y})
mapInt32X16# :: (Int# -> Int#) -> Int32X16 -> Int32X16
mapInt32X16# f = \ v -> case unpackInt32X16 v of
(I32# x1, I32# x2, I32# x3, I32# x4, I32# x5, I32# x6, I32# x7, I32# x8, I32# x9, I32# x10, I32# x11, I32# x12, I32# x13, I32# x14, I32# x15, I32# x16) -> packInt32X16 (I32# (f x1), I32# (f x2), I32# (f x3), I32# (f x4), I32# (f x5), I32# (f x6), I32# (f x7), I32# (f x8), I32# (f x9), I32# (f x10), I32# (f x11), I32# (f x12), I32# (f x13), I32# (f x14), I32# (f x15), I32# (f x16))
zipInt32X16 :: (Int32 -> Int32 -> Int32) -> Int32X16 -> Int32X16 -> Int32X16
zipInt32X16 f = \ v1 v2 -> case unpackInt32X16 v1 of
(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16) -> case unpackInt32X16 v2 of
(y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14, y15, y16) -> packInt32X16 (f x1 y1, f x2 y2, f x3 y3, f x4 y4, f x5 y5, f x6 y6, f x7 y7, f x8 y8, f x9 y9, f x10 y10, f x11 y11, f x12 y12, f x13 y13, f x14 y14, f x15 y15, f x16 y16)
foldInt32X16 :: (Int32 -> Int32 -> Int32) -> Int32X16 -> Int32
foldInt32X16 f' = \ v -> case unpackInt32X16 v of
(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16) -> x1 `f` x2 `f` x3 `f` x4 `f` x5 `f` x6 `f` x7 `f` x8 `f` x9 `f` x10 `f` x11 `f` x12 `f` x13 `f` x14 `f` x15 `f` x16
where f !x !y = f' x y
sumInt32X16 :: Int32X16 -> Int32
sumInt32X16 (Int32X16 x1 x2 x3 x4) = case unpackInt32X4# (plusInt32X4# x1 (plusInt32X4# x2 (plusInt32X4# x3 x4))) of
(# y1, y2, y3, y4 #) -> I32# y1 + I32# y2 + I32# y3 + I32# y4
plusInt32X16 :: Int32X16 -> Int32X16 -> Int32X16
plusInt32X16 (Int32X16 m1_1 m2_1 m3_1 m4_1) (Int32X16 m1_2 m2_2 m3_2 m4_2) = Int32X16 (plusInt32X4# m1_1 m1_2) (plusInt32X4# m2_1 m2_2) (plusInt32X4# m3_1 m3_2) (plusInt32X4# m4_1 m4_2)
minusInt32X16 :: Int32X16 -> Int32X16 -> Int32X16
minusInt32X16 (Int32X16 m1_1 m2_1 m3_1 m4_1) (Int32X16 m1_2 m2_2 m3_2 m4_2) = Int32X16 (minusInt32X4# m1_1 m1_2) (minusInt32X4# m2_1 m2_2) (minusInt32X4# m3_1 m3_2) (minusInt32X4# m4_1 m4_2)
timesInt32X16 :: Int32X16 -> Int32X16 -> Int32X16
timesInt32X16 (Int32X16 m1_1 m2_1 m3_1 m4_1) (Int32X16 m1_2 m2_2 m3_2 m4_2) = Int32X16 (timesInt32X4# m1_1 m1_2) (timesInt32X4# m2_1 m2_2) (timesInt32X4# m3_1 m3_2) (timesInt32X4# m4_1 m4_2)
quotInt32X16 :: Int32X16 -> Int32X16 -> Int32X16
quotInt32X16 (Int32X16 m1_1 m2_1 m3_1 m4_1) (Int32X16 m1_2 m2_2 m3_2 m4_2) = Int32X16 (quotInt32X4# m1_1 m1_2) (quotInt32X4# m2_1 m2_2) (quotInt32X4# m3_1 m3_2) (quotInt32X4# m4_1 m4_2)
remInt32X16 :: Int32X16 -> Int32X16 -> Int32X16
remInt32X16 (Int32X16 m1_1 m2_1 m3_1 m4_1) (Int32X16 m1_2 m2_2 m3_2 m4_2) = Int32X16 (remInt32X4# m1_1 m1_2) (remInt32X4# m2_1 m2_2) (remInt32X4# m3_1 m3_2) (remInt32X4# m4_1 m4_2)
negateInt32X16 :: Int32X16 -> Int32X16
negateInt32X16 (Int32X16 m1_1 m2_1 m3_1 m4_1) = Int32X16 (negateInt32X4# m1_1) (negateInt32X4# m2_1) (negateInt32X4# m3_1) (negateInt32X4# m4_1)
indexInt32X16Array :: ByteArray -> Int -> Int32X16
indexInt32X16Array (ByteArray a) (I# i) = Int32X16 (indexInt32X4Array# a ((i *# 4#) +# 0#)) (indexInt32X4Array# a ((i *# 4#) +# 1#)) (indexInt32X4Array# a ((i *# 4#) +# 2#)) (indexInt32X4Array# a ((i *# 4#) +# 3#))
readInt32X16Array :: PrimMonad m => MutableByteArray (PrimState m) -> Int -> m Int32X16
readInt32X16Array (MutableByteArray a) (I# i) = primitive (\ s0 -> case readInt32X4Array# a ((i *# 4#) +# 0#) s0 of
(# s1, m1 #) -> case readInt32X4Array# a ((i *# 4#) +# 1#) s1 of
(# s2, m2 #) -> case readInt32X4Array# a ((i *# 4#) +# 2#) s2 of
(# s3, m3 #) -> case readInt32X4Array# a ((i *# 4#) +# 3#) s3 of
(# s4, m4 #) -> (# s4, Int32X16 m1 m2 m3 m4 #))
writeInt32X16Array :: PrimMonad m => MutableByteArray (PrimState m) -> Int -> Int32X16 -> m ()
writeInt32X16Array (MutableByteArray a) (I# i) (Int32X16 m1 m2 m3 m4) = primitive_ (writeInt32X4Array# a ((i *# 4#) +# 0#) m1) >> primitive_ (writeInt32X4Array# a ((i *# 4#) +# 1#) m2) >> primitive_ (writeInt32X4Array# a ((i *# 4#) +# 2#) m3) >> primitive_ (writeInt32X4Array# a ((i *# 4#) +# 3#) m4)
indexInt32X16OffAddr :: Addr -> Int -> Int32X16
indexInt32X16OffAddr (Addr a) (I# i) = Int32X16 (indexInt32X4OffAddr# (plusAddr# a ((i *# 64#) +# 0#)) 0#) (indexInt32X4OffAddr# (plusAddr# a ((i *# 64#) +# 16#)) 0#) (indexInt32X4OffAddr# (plusAddr# a ((i *# 64#) +# 32#)) 0#) (indexInt32X4OffAddr# (plusAddr# a ((i *# 64#) +# 48#)) 0#)
readInt32X16OffAddr :: PrimMonad m => Addr -> Int -> m Int32X16
readInt32X16OffAddr (Addr a) (I# i) = primitive (\ s0 -> case (\ addr i' -> readInt32X4OffAddr# (plusAddr# addr i') 0#) a ((i *# 64#) +# 0#) s0 of
(# s1, m1 #) -> case (\ addr i' -> readInt32X4OffAddr# (plusAddr# addr i') 0#) a ((i *# 64#) +# 16#) s1 of
(# s2, m2 #) -> case (\ addr i' -> readInt32X4OffAddr# (plusAddr# addr i') 0#) a ((i *# 64#) +# 32#) s2 of
(# s3, m3 #) -> case (\ addr i' -> readInt32X4OffAddr# (plusAddr# addr i') 0#) a ((i *# 64#) +# 48#) s3 of
(# s4, m4 #) -> (# s4, Int32X16 m1 m2 m3 m4 #))
writeInt32X16OffAddr :: PrimMonad m => Addr -> Int -> Int32X16 -> m ()
writeInt32X16OffAddr (Addr a) (I# i) (Int32X16 m1 m2 m3 m4) = primitive_ (writeInt32X4OffAddr# (plusAddr# a ((i *# 64#) +# 0#)) 0# m1) >> primitive_ (writeInt32X4OffAddr# (plusAddr# a ((i *# 64#) +# 16#)) 0# m2) >> primitive_ (writeInt32X4OffAddr# (plusAddr# a ((i *# 64#) +# 32#)) 0# m3) >> primitive_ (writeInt32X4OffAddr# (plusAddr# a ((i *# 64#) +# 48#)) 0# m4)