module LLVM.Extra.Vector (
size, sizeInTuple,
replicate, iterate, assemble,
shuffle,
rotateUp, rotateDown, reverse,
shiftUp, shiftDown,
shiftUpMultiZero, shiftDownMultiZero,
ShuffleMatch (shuffleMatch),
shuffleMatchTraversable,
Access (insert, extract),
insertTraversable,
extractTraversable,
insertChunk, modify,
map, mapChunks, zipChunksWith,
chop, concat, select,
signedFraction,
cumulate1, umul32to64,
Arithmetic
(sum, sumToPair, sumInterleavedToPair,
cumulate, dotProduct, mul),
Real
(min, max, abs,
truncate, floor, fraction),
) where
import qualified LLVM.Extra.Extension.X86 as X86
import qualified LLVM.Extra.Extension as Ext
import qualified LLVM.Extra.Monad as M
import qualified LLVM.Extra.Arithmetic as A
import qualified LLVM.Core as LLVM
import LLVM.Util.Loop (Phi, )
import LLVM.Core
(Value, ConstValue, valueOf, value, constOf, undef,
Vector, shufflevector, insertelement, extractelement, constVector,
IsConst, IsArithmetic, IsFloating,
IsPrimitive, IsPowerOf2,
CodeGenFunction, )
import Data.TypeLevel.Num (D2, )
import qualified Data.TypeLevel.Num as TypeNum
import Control.Monad.HT ((<=<), )
import Control.Monad (liftM2, liftM3, foldM, )
import Data.Tuple.HT (uncurry3, )
import qualified Data.List.HT as ListHT
import qualified Data.List as List
import Control.Applicative (liftA2, )
import qualified Control.Applicative as App
import qualified Data.Traversable as Trav
import Data.Int (Int8, Int16, Int32, Int64, )
import Data.Word (Word8, Word16, Word32, Word64, )
import Prelude hiding
(Real, truncate, floor, round,
map, zipWith, iterate, replicate, reverse, concat, sum, )
size ::
(TypeNum.Nat n) =>
Value (Vector n a) -> Int
size =
let sz :: (TypeNum.Nat n) => n -> Value (Vector n a) -> Int
sz n _ = TypeNum.toInt n
in sz undefined
replicate ::
(Access n a va) =>
a -> CodeGenFunction r va
replicate = replicateCore undefined
replicateCore ::
(Access n a va) =>
n -> a -> CodeGenFunction r va
replicateCore n =
assemble . List.replicate (TypeNum.toInt n)
assemble ::
(Access n a va) =>
[a] -> CodeGenFunction r va
assemble =
foldM (\v (k,x) -> insert (valueOf k) x v) LLVM.undefTuple .
List.zip [0..]
insertChunk ::
(Access m a ca, Access n a va) =>
Int -> ca ->
va -> CodeGenFunction r va
insertChunk k x =
M.chain $
List.zipWith
(\i j -> \v ->
extract (valueOf i) x >>= \e ->
insert (valueOf j) e v)
(take (sizeInTuple x) [0..])
[fromIntegral k ..]
iterate ::
(Access n a va) =>
(a -> CodeGenFunction r a) ->
a -> CodeGenFunction r va
iterate f x =
fmap snd $
iterateCore f x LLVM.undefTuple
iterateCore ::
(Access n a va) =>
(a -> CodeGenFunction r a) ->
a -> va ->
CodeGenFunction r (a, va)
iterateCore f x0 v0 =
foldM
(\(x,v) k ->
liftM2 (,) (f x)
(insert (valueOf k) x v))
(x0,v0)
(take (sizeInTuple v0) [0..])
shuffle ::
(Access m a ca, Access n a va) =>
va ->
ConstValue (Vector m Word32) ->
CodeGenFunction r ca
shuffle x i =
assemble =<<
mapM
(flip extract x <=< extractelement (value i) . valueOf)
(take (size (value i)) [0..])
sizeInTuple :: ShuffleMatch n v => v -> Int
sizeInTuple =
let sz :: (ShuffleMatch n v) => n -> v -> Int
sz n _ = TypeNum.toInt n
in sz undefined
rotateUp ::
(ShuffleMatch n v) =>
v -> CodeGenFunction r v
rotateUp x =
shuffleMatch
(constVector $ List.map constOf $
(fromIntegral (sizeInTuple x) 1) : [0..]) x
rotateDown ::
(ShuffleMatch n v) =>
v -> CodeGenFunction r v
rotateDown x =
shuffleMatch
(constVector $ List.map constOf $
List.take (sizeInTuple x 1) [1..] ++ [0]) x
reverse ::
(ShuffleMatch n v) =>
v -> CodeGenFunction r v
reverse x =
shuffleMatch
(constVector $ List.map constOf $
List.reverse $
List.take (sizeInTuple x) [0..]) x
shiftUp ::
(Access n a v) =>
a -> v -> CodeGenFunction r (a, v)
shiftUp x0 x = do
y <-
shuffleMatch
(constVector $ undef : List.map constOf [0..]) x
liftM2 (,)
(extract (LLVM.valueOf (fromIntegral (sizeInTuple x) 1)) x)
(insert (value LLVM.zero) x0 y)
shiftDown ::
(Access n a v) =>
a -> v -> CodeGenFunction r (a, v)
shiftDown x0 x = do
y <-
shuffleMatch
(constVector $
List.map constOf (List.take (sizeInTuple x 1) [1..]) ++ [undef]) x
liftM2 (,)
(extract (value LLVM.zero) x)
(insert (LLVM.valueOf (fromIntegral (sizeInTuple x) 1)) x0 y)
shiftUpMultiZero ::
(IsPrimitive a, IsPowerOf2 n) =>
Int ->
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
shiftUpMultiZero k x =
LLVM.shufflevector (LLVM.value LLVM.zero) x
(constVector $ List.map constOf $
take k [0..] ++ [(fromIntegral (sizeInTuple x)) ..])
shiftDownMultiZero ::
(IsPrimitive a, IsPowerOf2 n) =>
Int ->
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
shiftDownMultiZero k x =
LLVM.shufflevector x (LLVM.value LLVM.zero)
(constVector $ List.map constOf $
[(fromIntegral k) ..])
class
(LLVM.IsPowerOf2 n, Phi v) =>
ShuffleMatch n v | v -> n where
shuffleMatch ::
ConstValue (Vector n Word32) -> v -> CodeGenFunction r v
shuffleMatchTraversable ::
(ShuffleMatch n v, Trav.Traversable f) =>
ConstValue (Vector n Word32) -> f v -> CodeGenFunction r (f v)
shuffleMatchTraversable is v =
Trav.mapM (shuffleMatch is) v
class
(ShuffleMatch n v) =>
Access n a v | v -> a n, a n -> v where
insert :: Value Word32 -> a -> v -> CodeGenFunction r v
extract :: Value Word32 -> v -> CodeGenFunction r a
insertTraversable ::
(Access n a v, Trav.Traversable f, App.Applicative f) =>
Value Word32 -> f a -> f v -> CodeGenFunction r (f v)
insertTraversable n a v =
Trav.sequence (liftA2 (insert n) a v)
extractTraversable ::
(Access n a v, Trav.Traversable f) =>
Value Word32 -> f v -> CodeGenFunction r (f a)
extractTraversable n v =
Trav.mapM (extract n) v
instance
(LLVM.IsPowerOf2 n, LLVM.IsPrimitive a) =>
ShuffleMatch n (Value (Vector n a)) where
shuffleMatch is v = shufflevector v (value undef) is
instance
(LLVM.IsPowerOf2 n, LLVM.IsPrimitive a) =>
Access n (Value a) (Value (Vector n a)) where
insert k a v = insertelement v a k
extract k v = extractelement v k
instance
(ShuffleMatch n v0, ShuffleMatch n v1) =>
ShuffleMatch n (v0, v1) where
shuffleMatch is (v0,v1) =
liftM2 (,)
(shuffleMatch is v0)
(shuffleMatch is v1)
instance
(Access n a0 v0, Access n a1 v1) =>
Access n (a0, a1) (v0, v1) where
insert k (a0,a1) (v0,v1) =
liftM2 (,)
(insert k a0 v0)
(insert k a1 v1)
extract k (v0,v1) =
liftM2 (,)
(extract k v0)
(extract k v1)
instance
(ShuffleMatch n v0, ShuffleMatch n v1, ShuffleMatch n v2) =>
ShuffleMatch n (v0, v1, v2) where
shuffleMatch is (v0,v1,v2) =
liftM3 (,,)
(shuffleMatch is v0)
(shuffleMatch is v1)
(shuffleMatch is v2)
instance
(Access n a0 v0, Access n a1 v1, Access n a2 v2) =>
Access n (a0, a1, a2) (v0, v1, v2) where
insert k (a0,a1,a2) (v0,v1,v2) =
liftM3 (,,)
(insert k a0 v0)
(insert k a1 v1)
(insert k a2 v2)
extract k (v0,v1,v2) =
liftM3 (,,)
(extract k v0)
(extract k v1)
(extract k v2)
modify ::
(Access n a va) =>
Value Word32 ->
(a -> CodeGenFunction r a) ->
(va -> CodeGenFunction r va)
modify k f v =
flip (insert k) v =<< f =<< extract k v
map ::
(Access n a va, Access n b vb) =>
(a -> CodeGenFunction r b) ->
(va -> CodeGenFunction r vb)
map f a =
foldM
(\b n ->
extract (valueOf n) a >>=
f >>=
flip (insert (valueOf n)) b)
LLVM.undefTuple
(take (sizeInTuple a) [0..])
mapChunks ::
(Access m a ca, Access m b cb,
Access n a va, Access n b vb) =>
(ca -> CodeGenFunction r cb) ->
(va -> CodeGenFunction r vb)
mapChunks f a =
foldM
(\b (am,k) ->
am >>= \ac ->
f ac >>= \bc ->
insertChunk (k * sizeInTuple ac) bc b)
LLVM.undefTuple $
List.zip (chop a) [0..]
zipChunksWith ::
(Access m a ca, Access m b cb, Access m c cc,
Access n a va, Access n b vb, Access n c vc) =>
(ca -> cb -> CodeGenFunction r cc) ->
(va -> vb -> CodeGenFunction r vc)
zipChunksWith f a b =
mapChunks (uncurry f) (a,b)
mapAuto ::
(Access m a ca, Access m b cb,
Access n a va, Access n b vb) =>
(a -> CodeGenFunction r b) ->
Ext.T (ca -> CodeGenFunction r cb) ->
(va -> CodeGenFunction r vb)
mapAuto f g a =
Ext.run (map f a) $
Ext.with g $ \op -> mapChunks op a
zipAutoWith ::
(Access m a ca, Access m b cb, Access m c cc,
Access n a va, Access n b vb, Access n c vc) =>
(a -> b -> CodeGenFunction r c) ->
Ext.T (ca -> cb -> CodeGenFunction r cc) ->
(va -> vb -> CodeGenFunction r vc)
zipAutoWith f g a b =
mapAuto (uncurry f) (fmap uncurry g) (a,b)
dotProductPartial ::
(LLVM.IsPowerOf2 n, LLVM.IsPrimitive a, LLVM.IsArithmetic a) =>
Int ->
Value (Vector n a) ->
Value (Vector n a) ->
CodeGenFunction r (Value a)
dotProductPartial n x y =
sumPartial n =<< A.mul x y
sumPartial ::
(LLVM.IsPowerOf2 n, LLVM.IsPrimitive a, LLVM.IsArithmetic a) =>
Int ->
Value (Vector n a) ->
CodeGenFunction r (Value a)
sumPartial n x =
foldl1
(M.liftR2 A.add)
(List.map (LLVM.extractelement x . valueOf) $ take n $ [0..])
chop ::
(Access m a ca, Access n a va) =>
va -> [CodeGenFunction r ca]
chop = chopCore undefined
chopCore ::
(Access m a ca, Access n a va) =>
m -> va -> [CodeGenFunction r ca]
chopCore m x =
List.map (shuffle x . constVector) $
ListHT.sliceVertical (TypeNum.toInt m) $
List.map constOf $
take (sizeInTuple x) [0..]
concat ::
(Access m a ca, Access n a va) =>
[ca] -> CodeGenFunction r va
concat xs =
foldM
(\v0 (js,c) ->
foldM
(\v (i,j) -> do
x <- extract (valueOf i) c
insert (valueOf j) x v)
v0 $
List.zip [0..] js)
LLVM.undefTuple $
List.zip
(ListHT.sliceVertical (sizeInTuple (head xs)) [0..])
xs
getLowestPair ::
Value (Vector n a) ->
CodeGenFunction r (Value a, Value a)
getLowestPair x =
liftM2 (,)
(extractelement x (valueOf 0))
(extractelement x (valueOf 1))
_reduceAddInterleaved ::
(IsArithmetic a, IsPrimitive a,
IsPowerOf2 n, IsPowerOf2 m, TypeNum.Mul D2 m n) =>
m ->
Value (Vector n a) ->
CodeGenFunction r (Value (Vector m a))
_reduceAddInterleaved tm v = do
let m = TypeNum.toInt tm
x <- shuffle v (constVector $ List.map constOf $ take m [0..])
y <- shuffle v (constVector $ List.map constOf $ take m [fromIntegral m ..])
A.add x y
sumGeneric ::
(IsArithmetic a, IsPrimitive a, IsPowerOf2 n) =>
Value (Vector n a) ->
CodeGenFunction r (Value a)
sumGeneric =
flip extractelement (valueOf 0) <=<
reduceSumInterleaved 1
sumToPairGeneric ::
(Arithmetic a, IsPowerOf2 n) =>
Value (Vector n a) ->
CodeGenFunction r (Value a, Value a)
sumToPairGeneric v =
let n2 = div (size v) 2
in sumInterleavedToPair =<<
shufflevector v (value undef)
(constVector $
List.map (constOf . fromIntegral) $
concatMap (\k -> [k, k+n2]) $
take n2 [0..])
reduceSumInterleaved ::
(IsArithmetic a, IsPrimitive a, IsPowerOf2 n) =>
Int ->
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
reduceSumInterleaved m x0 =
let go ::
(IsArithmetic a, IsPrimitive a, IsPowerOf2 n) =>
Int ->
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
go n x =
if m==n
then return x
else
let n2 = div n 2
in go n2
=<< A.add x
=<< shufflevector x (value undef)
(constVector $ List.map constOf (take n2 [fromIntegral n2 ..])
++ List.repeat undef)
in go (size x0) x0
cumulateGeneric, _cumulateSimple ::
(IsArithmetic a, IsPrimitive a, IsPowerOf2 n) =>
Value a -> Value (Vector n a) ->
CodeGenFunction r (Value a, Value (Vector n a))
_cumulateSimple a x =
foldM
(\(a0,y0) k -> do
a1 <- A.add a0 =<< extract (valueOf k) x
y1 <- insert (valueOf k) a0 y0
return (a1,y1))
(a, LLVM.undefTuple)
(take (sizeInTuple x) $ [0..])
cumulateGeneric =
cumulateFrom1 cumulate1
cumulateFrom1 ::
(IsArithmetic a, IsPrimitive a, IsPowerOf2 n) =>
(Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))) ->
Value a -> Value (Vector n a) ->
CodeGenFunction r (Value a, Value (Vector n a))
cumulateFrom1 cum a x0 = do
(b,x1) <- shiftUp a x0
y <- cum x1
z <- A.add b =<< extract (valueOf (fromIntegral (sizeInTuple x0) 1)) y
return (z,y)
cumulate1 ::
(IsArithmetic a, IsPrimitive a, IsPowerOf2 n) =>
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
cumulate1 x =
foldM
(\y k -> A.add y =<< shiftUpMultiZero k y)
x
(takeWhile (<sizeInTuple x) $ List.iterate (2*) 1)
signedFraction ::
(IsFloating a, IsConst a, Real a, IsPowerOf2 n) =>
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
signedFraction x =
A.sub x =<< truncate x
floorGeneric ::
(IsFloating a, IsConst a, Real a, IsPowerOf2 n) =>
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
floorGeneric = floorLogical A.fcmp
fractionGeneric ::
(IsFloating a, IsConst a, Real a, IsPowerOf2 n) =>
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
fractionGeneric = fractionLogical A.fcmp
select ::
(LLVM.IsFirstClass a, IsPrimitive a, IsPowerOf2 n,
LLVM.CmpRet a Bool) =>
Value (Vector n Bool) ->
Value (Vector n a) ->
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
select b x y =
map (uncurry3 LLVM.select) (b, x, y)
_floorSelect ::
(Num a, IsFloating a, IsConst a, Real a, IsPowerOf2 n) =>
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
_floorSelect x =
do xr <- truncate x
b <- A.fcmp LLVM.FPOLE xr x
select b xr =<< A.sub xr =<< replicate (valueOf 1)
_fractionSelect ::
(Num a, IsFloating a, IsConst a, Real a, IsPowerOf2 n) =>
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
_fractionSelect x =
do xf <- signedFraction x
b <- A.fcmp LLVM.FPOGE xf (value LLVM.zero)
select b xf =<< A.add xf =<< replicate (valueOf 1)
selectLogical ::
(LLVM.IsFirstClass a, IsPrimitive a,
LLVM.IsInteger i, IsPrimitive i,
LLVM.IsSized (Vector n a) s, LLVM.IsSized (Vector n i) s,
IsPowerOf2 n) =>
Value (Vector n i) ->
Value (Vector n a) ->
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
selectLogical b x y = do
bneg <- LLVM.inv b
xm <- A.and b =<< LLVM.bitcastUnify x
ym <- A.and bneg =<< LLVM.bitcastUnify y
LLVM.bitcastUnify =<< A.or xm ym
floorLogical ::
(IsFloating a, IsConst a, Real a,
IsPrimitive i, LLVM.IsInteger i, IsPowerOf2 n) =>
(LLVM.FPPredicate ->
Value (Vector n a) ->
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n i))) ->
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
floorLogical cmp x =
do xr <- truncate x
b <- cmp LLVM.FPOGT xr x
A.add xr =<< LLVM.sitofp b
fractionLogical ::
(IsFloating a, IsConst a, Real a,
IsPrimitive i, LLVM.IsInteger i, IsPowerOf2 n) =>
(LLVM.FPPredicate ->
Value (Vector n a) ->
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n i))) ->
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
fractionLogical cmp x =
do xf <- signedFraction x
b <- cmp LLVM.FPOLT xf (value LLVM.zero)
A.sub xf =<< LLVM.sitofp b
orderBy ::
(IsPowerOf2 m,
LLVM.IsFirstClass a, IsPrimitive a,
LLVM.IsInteger i, IsPrimitive i,
LLVM.IsSized (Vector m a) s, LLVM.IsSized (Vector m i) s) =>
Ext.T (Value (Vector m a) -> Value (Vector m a) -> CodeGenFunction r (Value (Vector m i))) ->
Ext.T (Value (Vector m a) -> Value (Vector m a) -> CodeGenFunction r (Value (Vector m a)))
orderBy cmp =
Ext.with cmp $ \pcmpgt x y ->
pcmpgt x y >>= \b -> selectLogical b y x
order ::
(IsPowerOf2 n, IsPowerOf2 m,
LLVM.IsFirstClass a, IsPrimitive a,
LLVM.IsInteger i, IsPrimitive i,
LLVM.IsSized (Vector m a) s, LLVM.IsSized (Vector m i) s) =>
(Value a -> Value a -> CodeGenFunction r (Value a)) ->
Ext.T (Value (Vector m a) -> Value (Vector m a) -> CodeGenFunction r (Value (Vector m i))) ->
Ext.T (Value (Vector m a) -> Value (Vector m a) -> CodeGenFunction r (Value (Vector m a))) ->
(Value (Vector n a) -> Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)))
order byScalar byCmp byChunk x y =
map (uncurry byScalar) (x,y)
`Ext.run`
(Ext.with byCmp $ \pcmpgt ->
mapChunks (\(cx,cy) ->
pcmpgt cx cy >>= \b -> selectLogical b cy cx) (x,y))
`Ext.run`
(Ext.with byChunk $ \psel ->
zipChunksWith psel x y)
class (IsArithmetic a, IsPrimitive a) => Arithmetic a where
sum ::
(IsPowerOf2 n) =>
Value (Vector n a) ->
CodeGenFunction r (Value a)
sum = sumGeneric
sumToPair ::
(IsPowerOf2 n) =>
Value (Vector n a) ->
CodeGenFunction r (Value a, Value a)
sumToPair = sumToPairGeneric
sumInterleavedToPair ::
(IsPowerOf2 n) =>
Value (Vector n a) ->
CodeGenFunction r (Value a, Value a)
sumInterleavedToPair v =
getLowestPair =<< reduceSumInterleaved 2 v
cumulate ::
(IsPowerOf2 n) =>
Value a -> Value (Vector n a) ->
CodeGenFunction r (Value a, Value (Vector n a))
cumulate = cumulateGeneric
dotProduct ::
(IsPowerOf2 n) =>
Value (Vector n a) ->
Value (Vector n a) ->
CodeGenFunction r (Value a)
dotProduct x y =
dotProductPartial (size x) x y
mul ::
(IsPowerOf2 n) =>
Value (Vector n a) ->
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
mul = A.mul
instance Arithmetic Float where
sum x =
Ext.runWhen (size x >= 4) (sumGeneric x) $
Ext.with X86.haddps $ \haddp ->
do chunkSum <-
foldl1 (M.liftR2 A.add) $ chop x
y <- haddp chunkSum (value undef)
z <- haddp y (value undef)
extractelement z (valueOf 0)
sumToPair x =
Ext.runWhen (size x >= 4) (getLowestPair x) $
Ext.with X86.haddps $ \haddp ->
let
reduce [] = []
reduce [_] = error "vector must have size power of two"
reduce (x0:x1:xs) =
M.liftR2 haddp x0 x1 : reduce xs
go [] = error "vector must not be empty"
go [c] =
getLowestPair
=<< flip haddp (value undef)
=<< c
go cs = go (reduce cs)
in go $ chop x
dotProduct x y =
Ext.run (sum =<< A.mul x y) $
Ext.with X86.dpps $ \dpp ->
foldl1 (M.liftR2 A.add) $
List.zipWith
(\mx my -> do
cx <- mx
cy <- my
flip extractelement (valueOf 0)
=<< dpp cx cy (valueOf 0xF1))
(chop x)
(chop y)
instance Arithmetic Double where
instance Arithmetic Int8 where
instance Arithmetic Int16 where
instance Arithmetic Int32 where
instance Arithmetic Int64 where
instance Arithmetic Word8 where
instance Arithmetic Word16 where
instance Arithmetic Word64 where
instance Arithmetic Word32 where
mul x y =
A.mul x y
`Ext.run`
(Ext.with X86.pmuludq $ \pmul ->
zipChunksWith
(\cx cy -> do
evenX <- LLVM.shufflevector cx (value undef)
(constVector [constOf 0, undef, constOf 2, undef])
evenY <- LLVM.shufflevector cy (value undef)
(constVector [constOf 0, undef, constOf 2, undef])
evenZ64 <- pmul evenX evenY
evenZ <- LLVM.bitcastUnify evenZ64
oddX <- LLVM.shufflevector cx (value undef)
(constVector [constOf 1, undef, constOf 3, undef])
oddY <- LLVM.shufflevector cy (value undef)
(constVector [constOf 1, undef, constOf 3, undef])
oddZ64 <- pmul oddX oddY
oddZ <- LLVM.bitcastUnify oddZ64
LLVM.shufflevector evenZ oddZ
(constVector [constOf 0, constOf 4, constOf 2, constOf 6]))
x y)
`Ext.run`
(Ext.with X86.pmulld $ \pmul ->
zipChunksWith pmul x y)
umul32to64 ::
(IsPowerOf2 n) =>
Value (Vector n Word32) ->
Value (Vector n Word32) ->
CodeGenFunction r (Value (Vector n Word64))
umul32to64 x y =
(do x64 <- map LLVM.zext x
y64 <- map LLVM.zext y
A.mul x64 y64)
`Ext.run`
(Ext.with X86.pmuludq $ \pmul ->
zipChunksWith
(\cx cy -> do
evenX <- LLVM.shufflevector cx (value undef)
(constVector [constOf 0, undef, constOf 2, undef])
evenY <- LLVM.shufflevector cy (value undef)
(constVector [constOf 0, undef, constOf 2, undef])
evenZ <- pmul evenX evenY
oddX <- LLVM.shufflevector cx (value undef)
(constVector [constOf 1, undef, constOf 3, undef])
oddY <- LLVM.shufflevector cy (value undef)
(constVector [constOf 1, undef, constOf 3, undef])
oddZ <- pmul oddX oddY
assemble =<< (sequence $
extract (valueOf 0) evenZ :
extract (valueOf 0) oddZ :
extract (valueOf 1) evenZ :
extract (valueOf 1) oddZ :
[]))
x y)
class (Arithmetic a, LLVM.CmpRet a Bool, IsConst a) =>
Real a where
min, max ::
(IsPowerOf2 n) =>
Value (Vector n a) ->
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
abs ::
(IsPowerOf2 n) =>
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
truncate, floor, fraction ::
(IsPowerOf2 n) =>
Value (Vector n a) ->
CodeGenFunction r (Value (Vector n a))
instance Real Float where
min = zipAutoWith A.fmin X86.minps
max = zipAutoWith A.fmax X86.maxps
abs = mapAuto A.fabs X86.absps
truncate x =
(LLVM.sitofp .
(id :: Value (Vector n Int32) -> Value (Vector n Int32))
<=< LLVM.fptosi) x
`Ext.run`
(Ext.with X86.roundps $ \round ->
mapChunks (flip round (valueOf 3)) x)
floor x =
floorGeneric x
`Ext.run`
(Ext.with X86.cmpps $ \cmp ->
mapChunks (floorLogical cmp) x)
`Ext.run`
(Ext.with X86.roundps $ \round ->
mapChunks (flip round (valueOf 1)) x)
fraction x =
fractionGeneric x
`Ext.run`
(Ext.with X86.cmpps $ \cmp ->
mapChunks (fractionLogical cmp) x)
`Ext.run`
(Ext.with X86.roundps $ \round ->
mapChunks (\c -> A.sub c =<< flip round (valueOf 1) c) x)
instance Real Double where
min = zipAutoWith A.fmin X86.minpd
max = zipAutoWith A.fmax X86.maxpd
abs = mapAuto A.fabs X86.abspd
truncate x =
(LLVM.sitofp .
(id :: Value (Vector n Int64) -> Value (Vector n Int64))
<=< LLVM.fptosi) x
`Ext.run`
(Ext.with X86.roundpd $ \round ->
mapChunks (flip round (valueOf 3)) x)
floor x =
floorGeneric x
`Ext.run`
(Ext.with X86.cmppd $ \cmp ->
mapChunks (floorLogical cmp) x)
`Ext.run`
(Ext.with X86.roundpd $ \round ->
mapChunks (flip round (valueOf 1)) x)
fraction x =
fractionGeneric x
`Ext.run`
(Ext.with X86.cmppd $ \cmp ->
mapChunks (fractionLogical cmp) x)
`Ext.run`
(Ext.with X86.roundpd $ \round ->
mapChunks (\c -> A.sub c =<< flip round (valueOf 1) c) x)
instance Real Int8 where
min = order A.smin X86.pcmpgtb X86.pminsb
max = order A.smax (fmap flip X86.pcmpgtb) X86.pmaxsb
abs = mapAuto A.sabs X86.pabsb
truncate = return
floor = return
fraction = const $ return (value LLVM.zero)
instance Real Int16 where
min = order A.smin X86.pcmpgtw X86.pminsw
max = order A.smax (fmap flip X86.pcmpgtw) X86.pmaxsw
abs = mapAuto A.sabs X86.pabsw
truncate = return
floor = return
fraction = const $ return (value LLVM.zero)
instance Real Int32 where
min = order A.smin X86.pcmpgtd X86.pminsd
max = order A.smax (fmap flip X86.pcmpgtd) X86.pmaxsd
abs = mapAuto A.sabs X86.pabsd
truncate = return
floor = return
fraction = const $ return (value LLVM.zero)
instance Real Int64 where
min = zipAutoWith A.smin (orderBy X86.pcmpgtq)
max = zipAutoWith A.smax (orderBy (fmap flip X86.pcmpgtq))
abs = mapAuto A.sabs $
Ext.with (orderBy (fmap flip X86.pcmpgtq)) $
\smax x -> smax x =<< LLVM.neg x
truncate = return
floor = return
fraction = const $ return (value LLVM.zero)
instance Real Word8 where
min = order A.umin X86.pcmpugtb X86.pminub
max = order A.umax (fmap flip X86.pcmpugtb) X86.pmaxub
abs = return
truncate = return
floor = return
fraction = const $ return (value LLVM.zero)
instance Real Word16 where
min = order A.umin X86.pcmpugtw X86.pminuw
max = order A.umax (fmap flip X86.pcmpugtw) X86.pmaxuw
abs = return
truncate = return
floor = return
fraction = const $ return (value LLVM.zero)
instance Real Word32 where
min = order A.umin X86.pcmpugtd X86.pminud
max = order A.umax (fmap flip X86.pcmpugtd) X86.pmaxud
abs = return
truncate = return
floor = return
fraction = const $ return (value LLVM.zero)
instance Real Word64 where
min = zipAutoWith A.umin (orderBy X86.pcmpugtq)
max = zipAutoWith A.umax (orderBy (fmap flip X86.pcmpugtq))
abs = return
truncate = return
floor = return
fraction = const $ return (value LLVM.zero)