module Data.Primitive.SIMD.Word32X8 (Word32X8) where
import Data.Primitive.SIMD.Class
import GHC.Word
import GHC.Types
import GHC.Prim
import GHC.Ptr
import GHC.ST
import Foreign.Storable
import Control.Monad.Primitive
import Data.Primitive.Types
import Data.Primitive.ByteArray
import Data.Primitive.Addr
import Data.Monoid
import Data.Typeable
import qualified Data.Vector.Primitive as PV
import qualified Data.Vector.Primitive.Mutable as PMV
import Data.Vector.Unboxed (Unbox)
import qualified Data.Vector.Unboxed as UV
import Data.Vector.Generic (Vector(..))
import Data.Vector.Generic.Mutable (MVector(..))
data Word32X8 = Word32X8 Word32X4# Word32X4# deriving Typeable
abs' :: Word32 -> Word32
abs' (W32# x) = W32# (abs# x)
abs# :: Word# -> Word#
abs# x = case abs (W32# x) of
W32# y -> y
signum' :: Word32 -> Word32
signum' (W32# x) = W32# (signum# x)
signum# :: Word# -> Word#
signum# x = case signum (W32# x) of
W32# y -> y
instance Eq Word32X8 where
a == b = case unpackWord32X8 a of
(x1, x2, x3, x4, x5, x6, x7, x8) -> case unpackWord32X8 b of
(y1, y2, y3, y4, y5, y6, y7, y8) -> x1 == y1 && x2 == y2 && x3 == y3 && x4 == y4 && x5 == y5 && x6 == y6 && x7 == y7 && x8 == y8
instance Ord Word32X8 where
a `compare` b = case unpackWord32X8 a of
(x1, x2, x3, x4, x5, x6, x7, x8) -> case unpackWord32X8 b of
(y1, y2, y3, y4, y5, y6, y7, y8) -> x1 `compare` y1 <> x2 `compare` y2 <> x3 `compare` y3 <> x4 `compare` y4 <> x5 `compare` y5 <> x6 `compare` y6 <> x7 `compare` y7 <> x8 `compare` y8
instance Show Word32X8 where
showsPrec _ a s = case unpackWord32X8 a of
(x1, x2, x3, x4, x5, x6, x7, x8) -> "Word32X8 (" ++ shows x1 (", " ++ shows x2 (", " ++ shows x3 (", " ++ shows x4 (", " ++ shows x5 (", " ++ shows x6 (", " ++ shows x7 (", " ++ shows x8 (")" ++ s))))))))
instance Num Word32X8 where
(+) = plusWord32X8
() = minusWord32X8
(*) = timesWord32X8
negate = mapVector negate
abs = mapVector abs'
signum = mapVector signum'
fromInteger = broadcastVector . fromInteger
instance Bounded Word32X8 where
minBound = broadcastVector minBound
maxBound = broadcastVector maxBound
instance Storable Word32X8 where
sizeOf x = vectorSize x * elementSize x
alignment = sizeOf
peek (Ptr a) = readOffAddr (Addr a) 0
poke (Ptr a) = writeOffAddr (Addr a) 0
instance SIMDVector Word32X8 where
type Elem Word32X8 = Word32
type ElemTuple Word32X8 = (Word32, Word32, Word32, Word32, Word32, Word32, Word32, Word32)
nullVector = broadcastVector 0
vectorSize _ = 8
elementSize _ = 4
broadcastVector = broadcastWord32X8
unsafeInsertVector = unsafeInsertWord32X8
packVector = packWord32X8
unpackVector = unpackWord32X8
mapVector = mapWord32X8
zipVector = zipWord32X8
foldVector = foldWord32X8
sumVector = sumWord32X8
instance SIMDIntVector Word32X8 where
quotVector = quotWord32X8
remVector = remWord32X8
instance Prim Word32X8 where
sizeOf# a = let !(I# x) = sizeOf a in x
alignment# a = let !(I# x) = alignment a in x
indexByteArray# ba i = indexWord32X8Array (ByteArray ba) (I# i)
readByteArray# mba i s = let (ST r) = readWord32X8Array (MutableByteArray mba) (I# i) in r s
writeByteArray# mba i v s = let (ST r) = writeWord32X8Array (MutableByteArray mba) (I# i) v in case r s of { (# s', _ #) -> s' }
setByteArray# mba off n v s = let (ST r) = setByteArrayGeneric (MutableByteArray mba) (I# off) (I# n) v in case r s of { (# s', _ #) -> s' }
indexOffAddr# addr i = indexWord32X8OffAddr (Addr addr) (I# i)
readOffAddr# addr i s = let (ST r) = readWord32X8OffAddr (Addr addr) (I# i) in r s
writeOffAddr# addr i v s = let (ST r) = writeWord32X8OffAddr (Addr addr) (I# i) v in case r s of { (# s', _ #) -> s' }
setOffAddr# addr off n v s = let (ST r) = setOffAddrGeneric (Addr addr) (I# off) (I# n) v in case r s of { (# s', _ #) -> s' }
newtype instance UV.Vector Word32X8 = V_Word32X8 (PV.Vector Word32X8)
newtype instance UV.MVector s Word32X8 = MV_Word32X8 (PMV.MVector s Word32X8)
instance Vector UV.Vector Word32X8 where
basicUnsafeFreeze (MV_Word32X8 v) = V_Word32X8 <$> PV.unsafeFreeze v
basicUnsafeThaw (V_Word32X8 v) = MV_Word32X8 <$> PV.unsafeThaw v
basicLength (V_Word32X8 v) = PV.length v
basicUnsafeSlice start len (V_Word32X8 v) = V_Word32X8(PV.unsafeSlice start len v)
basicUnsafeIndexM (V_Word32X8 v) = PV.unsafeIndexM v
basicUnsafeCopy (MV_Word32X8 m) (V_Word32X8 v) = PV.unsafeCopy m v
elemseq _ = seq
instance MVector UV.MVector Word32X8 where
basicLength (MV_Word32X8 v) = PMV.length v
basicUnsafeSlice start len (MV_Word32X8 v) = MV_Word32X8(PMV.unsafeSlice start len v)
basicOverlaps (MV_Word32X8 v) (MV_Word32X8 w) = PMV.overlaps v w
basicUnsafeNew len = MV_Word32X8 <$> PMV.unsafeNew len
#if MIN_VERSION_vector(0,11,0)
basicInitialize (MV_Word32X8 v) = basicInitialize v
#endif
basicUnsafeRead (MV_Word32X8 v) = PMV.unsafeRead v
basicUnsafeWrite (MV_Word32X8 v) = PMV.unsafeWrite v
instance Unbox Word32X8
broadcastWord32X8 :: Word32 -> Word32X8
broadcastWord32X8 (W32# x) = case broadcastWord32X4# x of
v -> Word32X8 v v
packWord32X8 :: (Word32, Word32, Word32, Word32, Word32, Word32, Word32, Word32) -> Word32X8
packWord32X8 (W32# x1, W32# x2, W32# x3, W32# x4, W32# x5, W32# x6, W32# x7, W32# x8) = Word32X8 (packWord32X4# (# x1, x2, x3, x4 #)) (packWord32X4# (# x5, x6, x7, x8 #))
unpackWord32X8 :: Word32X8 -> (Word32, Word32, Word32, Word32, Word32, Word32, Word32, Word32)
unpackWord32X8 (Word32X8 m1 m2) = case unpackWord32X4# m1 of
(# x1, x2, x3, x4 #) -> case unpackWord32X4# m2 of
(# x5, x6, x7, x8 #) -> (W32# x1, W32# x2, W32# x3, W32# x4, W32# x5, W32# x6, W32# x7, W32# x8)
unsafeInsertWord32X8 :: Word32X8 -> Word32 -> Int -> Word32X8
unsafeInsertWord32X8 (Word32X8 m1 m2) (W32# y) _i@(I# ip) | _i < 4 = Word32X8 (insertWord32X4# m1 y (ip -# 0#)) m2
| otherwise = Word32X8 m1 (insertWord32X4# m2 y (ip -# 4#))
mapWord32X8 :: (Word32 -> Word32) -> Word32X8 -> Word32X8
mapWord32X8 f = mapWord32X8# (\ x -> case f (W32# x) of { W32# y -> y})
mapWord32X8# :: (Word# -> Word#) -> Word32X8 -> Word32X8
mapWord32X8# f = \ v -> case unpackWord32X8 v of
(W32# x1, W32# x2, W32# x3, W32# x4, W32# x5, W32# x6, W32# x7, W32# x8) -> packWord32X8 (W32# (f x1), W32# (f x2), W32# (f x3), W32# (f x4), W32# (f x5), W32# (f x6), W32# (f x7), W32# (f x8))
zipWord32X8 :: (Word32 -> Word32 -> Word32) -> Word32X8 -> Word32X8 -> Word32X8
zipWord32X8 f = \ v1 v2 -> case unpackWord32X8 v1 of
(x1, x2, x3, x4, x5, x6, x7, x8) -> case unpackWord32X8 v2 of
(y1, y2, y3, y4, y5, y6, y7, y8) -> packWord32X8 (f x1 y1, f x2 y2, f x3 y3, f x4 y4, f x5 y5, f x6 y6, f x7 y7, f x8 y8)
foldWord32X8 :: (Word32 -> Word32 -> Word32) -> Word32X8 -> Word32
foldWord32X8 f' = \ v -> case unpackWord32X8 v of
(x1, x2, x3, x4, x5, x6, x7, x8) -> x1 `f` x2 `f` x3 `f` x4 `f` x5 `f` x6 `f` x7 `f` x8
where f !x !y = f' x y
sumWord32X8 :: Word32X8 -> Word32
sumWord32X8 (Word32X8 x1 x2) = case unpackWord32X4# (plusWord32X4# x1 x2) of
(# y1, y2, y3, y4 #) -> W32# y1 + W32# y2 + W32# y3 + W32# y4
plusWord32X8 :: Word32X8 -> Word32X8 -> Word32X8
plusWord32X8 (Word32X8 m1_1 m2_1) (Word32X8 m1_2 m2_2) = Word32X8 (plusWord32X4# m1_1 m1_2) (plusWord32X4# m2_1 m2_2)
minusWord32X8 :: Word32X8 -> Word32X8 -> Word32X8
minusWord32X8 (Word32X8 m1_1 m2_1) (Word32X8 m1_2 m2_2) = Word32X8 (minusWord32X4# m1_1 m1_2) (minusWord32X4# m2_1 m2_2)
timesWord32X8 :: Word32X8 -> Word32X8 -> Word32X8
timesWord32X8 (Word32X8 m1_1 m2_1) (Word32X8 m1_2 m2_2) = Word32X8 (timesWord32X4# m1_1 m1_2) (timesWord32X4# m2_1 m2_2)
quotWord32X8 :: Word32X8 -> Word32X8 -> Word32X8
quotWord32X8 (Word32X8 m1_1 m2_1) (Word32X8 m1_2 m2_2) = Word32X8 (quotWord32X4# m1_1 m1_2) (quotWord32X4# m2_1 m2_2)
remWord32X8 :: Word32X8 -> Word32X8 -> Word32X8
remWord32X8 (Word32X8 m1_1 m2_1) (Word32X8 m1_2 m2_2) = Word32X8 (remWord32X4# m1_1 m1_2) (remWord32X4# m2_1 m2_2)
indexWord32X8Array :: ByteArray -> Int -> Word32X8
indexWord32X8Array (ByteArray a) (I# i) = Word32X8 (indexWord32X4Array# a ((i *# 2#) +# 0#)) (indexWord32X4Array# a ((i *# 2#) +# 1#))
readWord32X8Array :: PrimMonad m => MutableByteArray (PrimState m) -> Int -> m Word32X8
readWord32X8Array (MutableByteArray a) (I# i) = primitive (\ s0 -> case readWord32X4Array# a ((i *# 2#) +# 0#) s0 of
(# s1, m1 #) -> case readWord32X4Array# a ((i *# 2#) +# 1#) s1 of
(# s2, m2 #) -> (# s2, Word32X8 m1 m2 #))
writeWord32X8Array :: PrimMonad m => MutableByteArray (PrimState m) -> Int -> Word32X8 -> m ()
writeWord32X8Array (MutableByteArray a) (I# i) (Word32X8 m1 m2) = primitive_ (writeWord32X4Array# a ((i *# 2#) +# 0#) m1) >> primitive_ (writeWord32X4Array# a ((i *# 2#) +# 1#) m2)
indexWord32X8OffAddr :: Addr -> Int -> Word32X8
indexWord32X8OffAddr (Addr a) (I# i) = Word32X8 (indexWord32X4OffAddr# (plusAddr# a ((i *# 32#) +# 0#)) 0#) (indexWord32X4OffAddr# (plusAddr# a ((i *# 32#) +# 16#)) 0#)
readWord32X8OffAddr :: PrimMonad m => Addr -> Int -> m Word32X8
readWord32X8OffAddr (Addr a) (I# i) = primitive (\ s0 -> case (\ addr i' -> readWord32X4OffAddr# (plusAddr# addr i') 0#) a ((i *# 32#) +# 0#) s0 of
(# s1, m1 #) -> case (\ addr i' -> readWord32X4OffAddr# (plusAddr# addr i') 0#) a ((i *# 32#) +# 16#) s1 of
(# s2, m2 #) -> (# s2, Word32X8 m1 m2 #))
writeWord32X8OffAddr :: PrimMonad m => Addr -> Int -> Word32X8 -> m ()
writeWord32X8OffAddr (Addr a) (I# i) (Word32X8 m1 m2) = primitive_ (writeWord32X4OffAddr# (plusAddr# a ((i *# 32#) +# 0#)) 0# m1) >> primitive_ (writeWord32X4OffAddr# (plusAddr# a ((i *# 32#) +# 16#)) 0# m2)