{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleContexts #-} module LLVM.Extra.Vector ( Simple (shuffleMatch, extract), C (insert), Element, Size, Canonical, Construct, size, sizeInTuple, replicate, iterate, assemble, shuffle, rotateUp, rotateDown, reverse, shiftUp, shiftDown, shiftUpMultiZero, shiftDownMultiZero, shuffleMatchTraversable, shuffleMatchAccess, shuffleMatchPlain1, shuffleMatchPlain2, insertTraversable, extractTraversable, extractAll, Constant, constant, insertChunk, modify, map, mapChunks, zipChunksWith, chop, concat, signedFraction, cumulate1, Arithmetic (sum, sumToPair, sumInterleavedToPair, cumulate, dotProduct, mul), Real (min, max, abs, signum, truncate, floor, fraction), ) where import qualified LLVM.Extra.Tuple as Tuple import qualified LLVM.Extra.ArithmeticPrivate as A import qualified LLVM.Util.Intrinsic as Intrinsic import qualified LLVM.Core as LLVM import LLVM.Core (Value, ConstValue, valueOf, value, constOf, undef, Vector, insertelement, extractelement, IsConst, IsArithmetic, IsFloating, IsPrimitive, CodeGenFunction, ) import qualified Type.Data.Num.Decimal as TypeNum import Type.Data.Num.Decimal ((:+:)) import qualified Control.Applicative as App import qualified Control.Monad.HT as M import Control.Monad.HT ((<=<), ) import Control.Monad (liftM2, liftM3, foldM, ) import Control.Applicative (liftA2, ) import qualified Data.Traversable as Trav import qualified Data.Foldable as Fold import qualified Data.NonEmpty.Class as NonEmptyC import qualified Data.NonEmpty as NonEmpty import qualified Data.List.HT as ListHT import qualified Data.List as List import Data.NonEmpty ((!:), ) import Data.Int (Int8, Int16, Int32, Int64, ) import Data.Word (Word8, Word16, Word32, Word64, Word) import Prelude hiding (Real, truncate, floor, round, map, zipWith, iterate, replicate, reverse, concat, sum, ) -- * target independent functions {- | Allow to work on records of vectors as if they are vectors of records. This is a reasonable approach for records of different element types since processor vectors can only be built from elements of the same type. But also, say, for chunked stereo signal this makes sense. In this case we would work on @Stereo (Value a)@. Formerly we used a two-way dependency Vector <-> (Element, Size). Now we have only the dependency Vector -> (Element, Size). This means that we need some more type annotations as in umul32to64/assemble, on the other hand we can allow multiple vector types with respect to the same element type. E.g. we can provide a vector type with pair elements where the pair elements are interleaved in the vector. -} class (Simple v) => C v where insert :: Value Word32 -> Element v -> v -> CodeGenFunction r v class (TypeNum.Positive (Size v), Tuple.Phi v, Tuple.Undefined v) => Simple v where type Element v :: * type Size v :: * shuffleMatch :: ConstValue (Vector (Size v) Word32) -> v -> CodeGenFunction r v extract :: Value Word32 -> v -> CodeGenFunction r (Element v) instance (TypeNum.Positive n, LLVM.IsPrimitive a) => Simple (Value (Vector n a)) where type Element (Value (Vector n a)) = Value a type Size (Value (Vector n a)) = n shuffleMatch is v = shuffleMatchPlain1 v is extract k v = extractelement v k instance (TypeNum.Positive n, LLVM.IsPrimitive a) => C (Value (Vector n a)) where insert k a v = insertelement v a k instance (Simple v0, Simple v1, Size v0 ~ Size v1) => Simple (v0, v1) where type Element (v0, v1) = (Element v0, Element v1) type Size (v0, v1) = Size v0 shuffleMatch is (v0,v1) = liftM2 (,) (shuffleMatch is v0) (shuffleMatch is v1) extract k (v0,v1) = liftM2 (,) (extract k v0) (extract k v1) instance (C v0, C v1, Size v0 ~ Size v1) => C (v0, v1) where insert k (a0,a1) (v0,v1) = liftM2 (,) (insert k a0 v0) (insert k a1 v1) instance (Simple v0, Simple v1, Simple v2, Size v0 ~ Size v1, Size v1 ~ Size v2) => Simple (v0, v1, v2) where type Element (v0, v1, v2) = (Element v0, Element v1, Element v2) type Size (v0, v1, v2) = Size v0 shuffleMatch is (v0,v1,v2) = liftM3 (,,) (shuffleMatch is v0) (shuffleMatch is v1) (shuffleMatch is v2) extract k (v0,v1,v2) = liftM3 (,,) (extract k v0) (extract k v1) (extract k v2) instance (C v0, C v1, C v2, Size v0 ~ Size v1, Size v1 ~ Size v2) => C (v0, v1, v2) where insert k (a0,a1,a2) (v0,v1,v2) = liftM3 (,,) (insert k a0 v0) (insert k a1 v1) (insert k a2 v2) newtype Constant n a = Constant a constant :: (TypeNum.Positive n) => a -> Constant n a constant = Constant instance Functor (Constant n) where {-# INLINE fmap #-} fmap f (Constant a) = Constant (f a) instance App.Applicative (Constant n) where {-# INLINE pure #-} pure = Constant {-# INLINE (<*>) #-} Constant f <*> Constant a = Constant (f a) instance Fold.Foldable (Constant n) where {-# INLINE foldMap #-} foldMap = Trav.foldMapDefault instance Trav.Traversable (Constant n) where {-# INLINE sequenceA #-} sequenceA (Constant a) = fmap Constant a instance (Tuple.Phi a) => Tuple.Phi (Constant n a) where phi = Tuple.phiTraversable addPhi = Tuple.addPhiFoldable instance (Tuple.Undefined a) => Tuple.Undefined (Constant n a) where undef = Tuple.undefPointed instance (TypeNum.Positive n, Tuple.Phi a, Tuple.Undefined a) => Simple (Constant n a) where type Element (Constant n a) = a type Size (Constant n a) = n shuffleMatch _ = return extract _ (Constant a) = return a class (n ~ Size (Construct n a), a ~ Element (Construct n a), C (Construct n a)) => Canonical n a where type Construct n a :: * instance (TypeNum.Positive n, LLVM.IsPrimitive a) => Canonical n (Value a) where type Construct n (Value a) = Value (Vector n a) instance (Canonical n a0, Canonical n a1) => Canonical n (a0, a1) where type Construct n (a0, a1) = (Construct n a0, Construct n a1) instance (Canonical n a0, Canonical n a1, Canonical n a2) => Canonical n (a0, a1, a2) where type Construct n (a0, a1, a2) = (Construct n a0, Construct n a1, Construct n a2) size :: (TypeNum.Positive n) => Value (Vector n a) -> Int size = let sz :: (TypeNum.Positive n) => TypeNum.Singleton n -> Value (Vector n a) -> Int sz n _ = TypeNum.integralFromSingleton n in sz TypeNum.singleton {- | Manually assemble a vector of equal values. Better use ScalarOrVector.replicate. -} replicate :: (C v) => Element v -> CodeGenFunction r v replicate = replicateCore TypeNum.singleton replicateCore :: (C v) => TypeNum.Singleton (Size v) -> Element v -> CodeGenFunction r v replicateCore n = assemble . List.replicate (TypeNum.integralFromSingleton n) {- | construct a vector out of single elements You must assert that the length of the list matches the vector size. This can be considered the inverse of 'extractAll'. -} assemble :: (C v) => [Element v] -> CodeGenFunction r v assemble = foldM (\v (k,x) -> insert (valueOf k) x v) Tuple.undef . List.zip [0..] {- sends GHC into an infinite loop foldM (\(k,x) -> insert (valueOf k) x) Tuple.undef . List.zip [0..] -} insertChunk :: (C c, C v, Element c ~ Element v) => Int -> c -> v -> CodeGenFunction r v insertChunk k x = M.chain $ List.zipWith (\i j -> \v -> extract (valueOf i) x >>= \e -> insert (valueOf j) e v) (take (sizeInTuple x) [0..]) [fromIntegral k ..] iterate :: (C v) => (Element v -> CodeGenFunction r (Element v)) -> Element v -> CodeGenFunction r v iterate f x = fmap snd $ iterateCore f x Tuple.undef iterateCore :: (C v) => (Element v -> CodeGenFunction r (Element v)) -> Element v -> v -> CodeGenFunction r (Element v, v) iterateCore f x0 v0 = foldM (\(x,v) k -> liftM2 (,) (f x) (insert (valueOf k) x v)) (x0,v0) (take (sizeInTuple v0) [0..]) {- | Manually implement vector shuffling using insertelement and extractelement. In contrast to LLVM's built-in instruction it supports distinct vector sizes, but it allows only one input vector (or a tuple of vectors, but we cannot shuffle between them). For more complex shuffling we recommend 'extractAll' and 'assemble'. -} shuffle :: (C v, C w, Element v ~ Element w) => v -> ConstValue (Vector (Size w) Word32) -> CodeGenFunction r w shuffle x i = assemble =<< mapM (flip extract x <=< extractelement (value i) . valueOf) (take (size (value i)) [0..]) sizeInTuple :: Simple v => v -> Int sizeInTuple = let sz :: Simple v => TypeNum.Singleton (Size v) -> v -> Int sz n _ = TypeNum.integralFromSingleton n in sz TypeNum.singleton constCyclicVector :: (IsConst a, TypeNum.Positive n) => NonEmpty.T [] a -> ConstValue (Vector n a) constCyclicVector = LLVM.constCyclicVector . fmap constOf {- | Rotate one element towards the higher elements. I don't want to call it rotateLeft or rotateRight, because there is no prefered layout for the vector elements. In Intel's instruction manual vector elements are indexed like the bits, that is from right to left. However, when working with Haskell list and enumeration syntax, the start index is left. -} rotateUp :: (Simple v) => v -> CodeGenFunction r v rotateUp x = shuffleMatch (constCyclicVector $ (fromIntegral (sizeInTuple x) - 1) !: [0..]) x rotateDown :: (Simple v) => v -> CodeGenFunction r v rotateDown x = shuffleMatch (constCyclicVector $ NonEmpty.snoc (List.take (sizeInTuple x - 1) [1..]) 0) x reverse :: (Simple v) => v -> CodeGenFunction r v reverse x = shuffleMatch (constCyclicVector $ maybe (error "vector size must be positive") NonEmpty.reverse $ NonEmpty.fetch $ List.take (sizeInTuple x) [0..]) x shiftUp :: (C v) => Element v -> v -> CodeGenFunction r (Element v, v) shiftUp x0 x = do y <- shuffleMatch (LLVM.constCyclicVector $ undef !: List.map constOf [0..]) x liftM2 (,) (extract (LLVM.valueOf (fromIntegral (sizeInTuple x) - 1)) x) (insert (value LLVM.zero) x0 y) shiftDown :: (C v) => Element v -> v -> CodeGenFunction r (Element v, v) shiftDown x0 x = do y <- shuffleMatch (LLVM.constCyclicVector $ NonEmpty.snoc (List.map constOf $ List.take (sizeInTuple x - 1) [1..]) undef) x liftM2 (,) (extract (value LLVM.zero) x) (insert (LLVM.valueOf (fromIntegral (sizeInTuple x) - 1)) x0 y) shiftUpMultiZero :: (C v, Tuple.Zero (Element v)) => Int -> v -> LLVM.CodeGenFunction r v shiftUpMultiZero n v = assemble . take (sizeInTuple v) . (List.replicate n Tuple.zero ++) =<< extractAll v shiftDownMultiZero :: (C v, Tuple.Zero (Element v)) => Int -> v -> LLVM.CodeGenFunction r v shiftDownMultiZero n v = assemble . take (sizeInTuple v) . (++ List.repeat Tuple.zero) . List.drop n =<< extractAll v shuffleMatchTraversable :: (Simple v, Trav.Traversable f) => ConstValue (Vector (Size v) Word32) -> f v -> CodeGenFunction r (f v) shuffleMatchTraversable is v = Trav.mapM (shuffleMatch is) v {- | Implement the 'shuffleMatch' method using the methods of the 'C' class. -} shuffleMatchAccess :: (C v) => ConstValue (Vector (Size v) Word32) -> v -> CodeGenFunction r v shuffleMatchAccess is v = assemble =<< mapM (flip extract v <=< flip extract (value is) . valueOf) (take (size (value is)) [0..]) shuffleMatchPlain1 :: (TypeNum.Positive n, IsPrimitive a) => Value (Vector n a) -> ConstValue (Vector n Word32) -> CodeGenFunction r (Value (Vector n a)) shuffleMatchPlain1 x = shuffleMatchPlain2 x (value undef) shuffleMatchPlain2 :: (TypeNum.Positive n, IsPrimitive a) => Value (Vector n a) -> Value (Vector n a) -> ConstValue (Vector n Word32) -> CodeGenFunction r (Value (Vector n a)) shuffleMatchPlain2 = LLVM.shufflevector insertTraversable :: (C v, Trav.Traversable f, App.Applicative f) => Value Word32 -> f (Element v) -> f v -> CodeGenFunction r (f v) insertTraversable n a v = Trav.sequence (liftA2 (insert n) a v) extractTraversable :: (Simple v, Trav.Traversable f) => Value Word32 -> f v -> CodeGenFunction r (f (Element v)) extractTraversable n v = Trav.mapM (extract n) v {- | provide the elements of a vector as a list of individual virtual registers This can be considered the inverse of 'assemble'. -} extractAll :: (Simple v) => v -> LLVM.CodeGenFunction r [Element v] extractAll = sequence . extractList extractList :: (Simple v) => v -> [LLVM.CodeGenFunction r (Element v)] extractList x = List.map (flip extract x . LLVM.valueOf) (take (sizeInTuple x) [0..]) modify :: (C v) => Value Word32 -> (Element v -> CodeGenFunction r (Element v)) -> (v -> CodeGenFunction r v) modify k f v = flip (insert k) v =<< f =<< extract k v {- | Like LLVM.Util.Loop.mapVector but the loop is unrolled, which is faster since it can be packed by the code generator. -} map, _mapByFold :: (C v, C w, Size v ~ Size w) => (Element v -> CodeGenFunction r (Element w)) -> (v -> CodeGenFunction r w) map f = assemble <=< mapM f <=< extractAll _mapByFold f a = foldM (\b n -> extract (valueOf n) a >>= f >>= flip (insert (valueOf n)) b) Tuple.undef (take (sizeInTuple a) [0..]) mapChunks :: (C ca, C cb, Size ca ~ Size cb, C va, C vb, Size va ~ Size vb, Element ca ~ Element va, Element cb ~ Element vb) => (ca -> CodeGenFunction r cb) -> (va -> CodeGenFunction r vb) mapChunks f a = foldM (\b (am,k) -> am >>= \ac -> f ac >>= \bc -> insertChunk (k * sizeInTuple ac) bc b) Tuple.undef $ List.zip (chop a) [0..] zipChunksWith :: (C ca, C cb, C cc, Size ca ~ Size cb, Size cb ~ Size cc, C va, C vb, C vc, Size va ~ Size vb, Size vb ~ Size vc, Element ca ~ Element va, Element cb ~ Element vb, Element cc ~ Element vc) => (ca -> cb -> CodeGenFunction r cc) -> (va -> vb -> CodeGenFunction r vc) zipChunksWith f a b = mapChunks (uncurry f) (a,b) mapChunks2 :: (C ca, C cb, Size ca ~ Size cb, C la, C lb, Size la ~ Size lb, C va, C vb, Size va ~ Size vb, Element ca ~ Element va, Element la ~ Element va, Element cb ~ Element vb, Element lb ~ Element vb) => (ca -> CodeGenFunction r cb) -> (la -> CodeGenFunction r lb) -> (va -> CodeGenFunction r vb) mapChunks2 f g a = do let chunkSize :: C ca => (ca -> cgf) -> TypeNum.Singleton (Size ca) -> Int chunkSize _ = TypeNum.integralFromSingleton xs <- extractAll a case ListHT.viewR $ ListHT.sliceVertical (chunkSize g TypeNum.singleton) xs of Nothing -> assemble [] Just (cs,c) -> do ds <- mapM (extractAll <=< g <=< assemble) cs d <- if List.length c <= chunkSize f TypeNum.singleton then fmap List.concat $ mapM (extractAll <=< f <=< assemble) $ ListHT.sliceVertical (chunkSize f TypeNum.singleton) c else extractAll =<< g =<< assemble c assemble $ List.concat ds ++ d _zipChunks2With :: (C ca, C cb, C cc, Size ca ~ Size cb, Size cb ~ Size cc, C la, C lb, C lc, Size la ~ Size lb, Size lb ~ Size lc, C va, C vb, C vc, Size va ~ Size vb, Size vb ~ Size vc, Element ca ~ Element va, Element la ~ Element va, Element cb ~ Element vb, Element lb ~ Element vb, Element cc ~ Element vc, Element lc ~ Element vc) => (ca -> cb -> CodeGenFunction r cc) -> (la -> lb -> CodeGenFunction r lc) -> (va -> vb -> CodeGenFunction r vc) _zipChunks2With f g a b = mapChunks2 (uncurry f) (uncurry g) (a,b) {- | Ideally on ix86 with SSE41 this would be translated to 'dpps'. -} dotProductPartial :: (TypeNum.Positive n, LLVM.IsPrimitive a, LLVM.IsArithmetic a) => Int -> Value (Vector n a) -> Value (Vector n a) -> CodeGenFunction r (Value a) dotProductPartial n x y = sumPartial n =<< A.mul x y sumPartial :: (TypeNum.Positive n, LLVM.IsPrimitive a, LLVM.IsArithmetic a) => Int -> Value (Vector n a) -> CodeGenFunction r (Value a) sumPartial n x = foldl1 {- quite the same as (+) using LLVM.Arithmetic instances, but requires less type constraints -} (M.liftJoin2 A.add) (List.map (LLVM.extractelement x . valueOf) $ take n $ [0..]) {- | If the target vector type is a native type then the chop operation produces no actual machine instruction. (nop) If the vector cannot be evenly divided into chunks the last chunk will be padded with undefined values. -} chop :: (C c, C v, Element c ~ Element v) => v -> [CodeGenFunction r c] chop = chopCore TypeNum.singleton chopCore :: (C c, C v, Element c ~ Element v) => TypeNum.Singleton (Size c) -> v -> [CodeGenFunction r c] chopCore m x = List.map (assemble <=< sequence) $ ListHT.sliceVertical (TypeNum.integralFromSingleton m) $ extractList x {- | The target size is determined by the type. If the chunk list provides more data, the exceeding data is dropped. If the chunk list provides too few data, the target vector is filled with undefined elements. -} concat :: (C c, C v, Element c ~ Element v) => [c] -> CodeGenFunction r v concat xs = foldM (\v0 (js,c) -> foldM (\v (i,j) -> do x <- extract (valueOf i) c insert (valueOf j) x v) v0 $ List.zip [0..] js) Tuple.undef $ List.zip (ListHT.sliceVertical (sizeInTuple (head xs)) [0..]) xs getLowestPair :: (TypeNum.Positive n, IsPrimitive a) => Value (Vector n a) -> CodeGenFunction r (Value a, Value a) getLowestPair x = liftM2 (,) (extractelement x (valueOf 0)) (extractelement x (valueOf 1)) _reduceAddInterleaved :: (IsArithmetic a, IsPrimitive a, TypeNum.Positive n, TypeNum.Positive m, (m :+: m) ~ n) => TypeNum.Singleton m -> Value (Vector n a) -> CodeGenFunction r (Value (Vector m a)) _reduceAddInterleaved tm v = do let m = TypeNum.integralFromSingleton tm x <- shuffle v (constCyclicVector $ NonEmptyC.iterate succ 0) y <- shuffle v (constCyclicVector $ NonEmptyC.iterate succ m) A.add x y sumGeneric :: (IsArithmetic a, IsPrimitive a, TypeNum.Positive n) => Value (Vector n a) -> CodeGenFunction r (Value a) sumGeneric = flip extractelement (valueOf 0) <=< reduceSumInterleaved 1 sumToPairGeneric :: (Arithmetic a, TypeNum.Positive n) => Value (Vector n a) -> CodeGenFunction r (Value a, Value a) sumToPairGeneric v = let n2 = div (size v) 2 in sumInterleavedToPair =<< shuffleMatchPlain1 v (maybe (error "vector size must be positive") LLVM.constCyclicVector $ NonEmpty.fetch $ List.map (constOf . fromIntegral) $ concatMap (\k -> [k, k+n2]) [0..]) {- | We partition a vector of size n into chunks of size m and add these chunks using vector additions. We do this by repeated halving of the vector, since this way we do not need assumptions about the native vector size. We reduce the vector size only virtually, that is we maintain the vector size and fill with undefined values. This is reasonable since LLVM-2.5 and LLVM-2.6 does not allow shuffling between vectors of different size and because it likes to do computations on Vector D2 Float in MMX registers on ix86 CPU's, which interacts badly with FPU usage. Since we fill the vector with undefined values, LLVM actually treats the vectors like vectors of smaller size. -} reduceSumInterleaved :: (IsArithmetic a, IsPrimitive a, TypeNum.Positive n) => Int -> Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) reduceSumInterleaved m x0 = let go :: (IsArithmetic a, IsPrimitive a, TypeNum.Positive n) => Int -> Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) go n x = if m==n then return x else let n2 = div n 2 in go n2 =<< A.add x =<< shuffleMatchPlain1 x (LLVM.constCyclicVector $ NonEmpty.appendLeft (List.map constOf $ take n2 [fromIntegral n2 ..]) (NonEmptyC.repeat undef)) in go (size x0) x0 cumulateGeneric, _cumulateSimple :: (IsArithmetic a, IsPrimitive a, TypeNum.Positive n) => Value a -> Value (Vector n a) -> CodeGenFunction r (Value a, Value (Vector n a)) _cumulateSimple a x = foldM (\(a0,y0) k -> do a1 <- A.add a0 =<< extract (valueOf k) x y1 <- insert (valueOf k) a0 y0 return (a1,y1)) (a, Tuple.undef) (take (sizeInTuple x) $ [0..]) cumulateGeneric = cumulateFrom1 cumulate1 cumulateFrom1 :: (IsArithmetic a, IsPrimitive a, TypeNum.Positive n) => (Value (Vector n a) -> CodeGenFunction r (Value (Vector n a))) -> Value a -> Value (Vector n a) -> CodeGenFunction r (Value a, Value (Vector n a)) cumulateFrom1 cum a x0 = do (b,x1) <- shiftUp a x0 y <- cum x1 z <- A.add b =<< extract (valueOf (fromIntegral (sizeInTuple x0) - 1)) y return (z,y) {- | Needs (log n) vector additions -} cumulate1 :: (IsArithmetic a, IsPrimitive a, TypeNum.Positive n) => Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) cumulate1 x = foldM (\y k -> A.add y =<< shiftUpMultiZero k y) x (takeWhile ( Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) signumInt x = do let zero = LLVM.value LLVM.zero negative <- A.cmp LLVM.CmpLT x zero positive <- A.cmp LLVM.CmpGT x zero map (\(n,p) -> LLVM.select n (valueOf (-1)) =<< LLVM.select p (valueOf 1) (LLVM.value LLVM.zero)) (negative, positive) signumWord :: (TypeNum.Positive n, IsPrimitive a, IsArithmetic a, IsConst a, Num a, LLVM.CmpRet a, LLVM.CmpResult a ~ b, IsPrimitive b, LLVM.IsInteger b) => Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) signumWord x = do positive <- A.cmp LLVM.CmpGT x (LLVM.value LLVM.zero) map (\p -> LLVM.select p (valueOf 1) (LLVM.value LLVM.zero)) positive -} signumIntGeneric :: (TypeNum.Positive n, {- TypeNum.Positive (n :*: LLVM.SizeOf a), -} IsPrimitive a, LLVM.IsInteger a, LLVM.CmpRet a, LLVM.CmpResult a ~ b, IsPrimitive b, LLVM.IsInteger b) => Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) signumIntGeneric x = do let zero = LLVM.value LLVM.zero negative <- LLVM.sadapt =<< A.cmp LLVM.CmpLT x zero positive <- LLVM.sadapt =<< A.cmp LLVM.CmpGT x zero A.sub negative positive signumWordGeneric :: (TypeNum.Positive n, IsPrimitive a, LLVM.IsInteger a, LLVM.CmpRet a, LLVM.CmpResult a ~ b, IsPrimitive b, LLVM.IsInteger b) => Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) signumWordGeneric x = LLVM.zadapt =<< A.cmp LLVM.CmpGT x (LLVM.value LLVM.zero) signumFloatGeneric :: (TypeNum.Positive n, IsPrimitive a, IsArithmetic a, IsFloating a, LLVM.CmpRet a, LLVM.CmpResult a ~ b, IsPrimitive b, LLVM.IsInteger b) => Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) signumFloatGeneric x = do let zero = LLVM.value LLVM.zero negative <- LLVM.sitofp =<< A.cmp LLVM.CmpLT x zero positive <- LLVM.sitofp =<< A.cmp LLVM.CmpGT x zero A.sub negative positive signedFraction :: (IsFloating a, IsConst a, Real a, TypeNum.Positive n) => Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) signedFraction x = A.sub x =<< truncate x -- * target independent functions with target dependent optimizations {- | The order of addition is chosen for maximum efficiency. We do not try to prevent cancelations. -} class (IsArithmetic a, IsPrimitive a) => Arithmetic a where sum :: (TypeNum.Positive n) => Value (Vector n a) -> CodeGenFunction r (Value a) sum = sumGeneric {- | The first result value is the sum of all vector elements from 0 to @div n 2 + 1@ and the second result value is the sum of vector elements from @div n 2@ to @n-1@. n must be at least D2. -} sumToPair :: (TypeNum.Positive n) => Value (Vector n a) -> CodeGenFunction r (Value a, Value a) sumToPair = sumToPairGeneric {- | Treat the vector as concatenation of pairs and all these pairs are added. Useful for stereo signal processing. n must be at least D2. -} sumInterleavedToPair :: (TypeNum.Positive n) => Value (Vector n a) -> CodeGenFunction r (Value a, Value a) sumInterleavedToPair v = getLowestPair =<< reduceSumInterleaved 2 v cumulate :: (TypeNum.Positive n) => Value a -> Value (Vector n a) -> CodeGenFunction r (Value a, Value (Vector n a)) cumulate = cumulateGeneric dotProduct :: (TypeNum.Positive n) => Value (Vector n a) -> Value (Vector n a) -> CodeGenFunction r (Value a) dotProduct x y = dotProductPartial (size x) x y mul :: (TypeNum.Positive n) => Value (Vector n a) -> Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) mul = A.mul instance Arithmetic Float where instance Arithmetic Double where instance Arithmetic Int where instance Arithmetic Int8 where instance Arithmetic Int16 where instance Arithmetic Int32 where instance Arithmetic Int64 where instance Arithmetic Word where instance Arithmetic Word8 where instance Arithmetic Word16 where instance Arithmetic Word32 where instance Arithmetic Word64 where class (Arithmetic a, LLVM.CmpRet a, LLVM.IsPrimitive a, IsConst a) => Real a where min, max :: (TypeNum.Positive n) => Value (Vector n a) -> Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) abs :: (TypeNum.Positive n) => Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) signum :: (TypeNum.Positive n) => Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) truncate, floor, fraction :: (TypeNum.Positive n) => Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) instance Real Float where min = Intrinsic.min max = Intrinsic.max abs = Intrinsic.abs signum = signumFloatGeneric truncate = Intrinsic.truncate floor = Intrinsic.floor fraction = A.fraction instance Real Double where min = Intrinsic.min max = Intrinsic.max abs = Intrinsic.abs signum = signumFloatGeneric truncate = Intrinsic.truncate floor = Intrinsic.floor fraction = A.fraction instance Real Int where min = A.min max = A.max abs = A.abs signum = signumIntGeneric truncate = return floor = return fraction = const $ return (value LLVM.zero) instance Real Int8 where min = A.min max = A.max abs = A.abs signum = signumIntGeneric truncate = return floor = return fraction = const $ return (value LLVM.zero) instance Real Int16 where min = A.min max = A.max abs = A.abs signum = signumIntGeneric truncate = return floor = return fraction = const $ return (value LLVM.zero) instance Real Int32 where min = A.min max = A.max abs = A.abs signum = signumIntGeneric truncate = return floor = return fraction = const $ return (value LLVM.zero) instance Real Int64 where min = A.min max = A.max abs = A.abs signum = signumIntGeneric truncate = return floor = return fraction = const $ return (value LLVM.zero) instance Real Word where min = A.min max = A.max abs = return signum = signumWordGeneric truncate = return floor = return fraction = const $ return (value LLVM.zero) instance Real Word8 where min = A.min max = A.max abs = return signum = signumWordGeneric truncate = return floor = return fraction = const $ return (value LLVM.zero) instance Real Word16 where min = A.min max = A.max abs = return signum = signumWordGeneric truncate = return floor = return fraction = const $ return (value LLVM.zero) instance Real Word32 where min = A.min max = A.max abs = return signum = signumWordGeneric truncate = return floor = return fraction = const $ return (value LLVM.zero) instance Real Word64 where min = A.min max = A.max abs = return signum = signumWordGeneric truncate = return floor = return fraction = const $ return (value LLVM.zero)