{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE FlexibleContexts #-} {- | This maintains old code for LLVM-2.6 where vector comparison and select on X86 did not work or generated cumbersome assembly code. It may still be useful for testing. -} module LLVM.Extra.VectorAlt where import qualified LLVM.Extra.Vector as Vector import qualified LLVM.Extra.Arithmetic as A import qualified LLVM.Util.Intrinsic as Intrinsic import qualified LLVM.Core.Guided as Guided import qualified LLVM.Core as LLVM import LLVM.Core (CodeGenFunction, Value, valueOf, value, Vector, CmpRet, IsConst, IsArithmetic, IsFloating, IsPrimitive) import qualified Type.Data.Num.Decimal as TypeNum import Data.Tuple.HT (uncurry3, ) import Data.Int (Int8, Int16, Int32, Int64, ) import Data.Word (Word8, Word16, Word32, Word64, ) import Prelude hiding (max, min, abs, signum, floor, truncate) {- Can be used for both integer and float types, but we need it only for Float types, because LLVM produces ugly code for Float and even more ugly code for Double. -} signum :: (TypeNum.Positive n, IsPrimitive a, IsPrimitive b, IsArithmetic b) => (Value (Vector n a) -> Value (Vector n a) -> CodeGenFunction r (Value (Vector n b))) -> Value (Vector n a) -> CodeGenFunction r (Value (Vector n b)) signum gt x = do let zero = LLVM.value LLVM.zero negative <- gt zero x positive <- gt x zero A.sub negative positive ext2 :: (TypeNum.Positive n) => Value (Vector n Bool) -> CodeGenFunction r (Value (Vector n (LLVM.IntN TypeNum.D2))) ext2 = Guided.extBool Guided.vector {- | This has least instruction count for Vector D4 Float on X86. -} signumFloat :: (TypeNum.Positive n, IsPrimitive a, IsArithmetic a, IsFloating a, LLVM.CmpRet a, LLVM.CmpResult a ~ Bool) => Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) signumFloat x = do let zero = LLVM.value LLVM.zero negative <- ext2 =<< A.cmp LLVM.CmpLT x zero positive <- ext2 =<< A.cmp LLVM.CmpGT x zero LLVM.sitofp =<< A.sub negative positive select :: (TypeNum.Positive n, LLVM.IsFirstClass a, IsPrimitive a, LLVM.CmpRet a, LLVM.CmpResult a ~ Bool) => Value (Vector n Bool) -> Value (Vector n a) -> Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) select b x y = Vector.map (uncurry3 LLVM.select) (b, x, y) floor :: (TypeNum.Positive n, IsFloating a, Vector.Real a) => Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) floor = floorLogical A.fcmp fraction :: (TypeNum.Positive n, IsFloating a, Vector.Real a) => Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) fraction = fractionLogical A.fcmp floorLogical :: (TypeNum.Positive n, IsFloating a, Vector.Real a, IsPrimitive i, LLVM.IsInteger i) => (LLVM.FPPredicate -> Value (Vector n a) -> Value (Vector n a) -> CodeGenFunction r (Value (Vector n i))) -> Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) floorLogical cmp x = do xr <- Intrinsic.truncate x b <- cmp LLVM.FPOGT xr x A.add xr =<< LLVM.sitofp b fractionLogical :: (TypeNum.Positive n, IsFloating a, Vector.Real a, IsPrimitive i, LLVM.IsInteger i) => (LLVM.FPPredicate -> Value (Vector n a) -> Value (Vector n a) -> CodeGenFunction r (Value (Vector n i))) -> Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) fractionLogical cmp x = do xf <- Vector.signedFraction x b <- cmp LLVM.FPOLT xf (value LLVM.zero) A.sub xf =<< LLVM.sitofp b {- | 'floor' implemented using 'select'. This will need jumps. -} floorSelect :: (TypeNum.Positive n, Num a, IsFloating a, Vector.Real a) => Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) floorSelect x = do xr <- Intrinsic.truncate x b <- A.fcmp LLVM.FPOLE xr x select b xr =<< A.sub xr =<< Vector.replicate (valueOf 1) {- | 'fraction' implemented using 'select'. This will need jumps. -} fractionSelect :: (TypeNum.Positive n, Num a, IsFloating a, Vector.Real a) => Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) fractionSelect x = do xf <- Vector.signedFraction x b <- A.fcmp LLVM.FPOGE xf (value LLVM.zero) select b xf =<< A.add xf =<< Vector.replicate (valueOf 1) class (LLVM.IsSized a, LLVM.IsSized (Mask a), LLVM.SizeOf a ~ LLVM.SizeOf (Mask a), LLVM.IsPrimitive a, LLVM.IsPrimitive (Mask a), LLVM.IsInteger (Mask a)) => Maskable a where type Mask a :: * instance Maskable Int8 where type Mask Int8 = Int8 instance Maskable Int16 where type Mask Int16 = Int16 instance Maskable Int32 where type Mask Int32 = Int32 instance Maskable Int64 where type Mask Int64 = Int64 instance Maskable Word8 where type Mask Word8 = Int8 instance Maskable Word16 where type Mask Word16 = Int16 instance Maskable Word32 where type Mask Word32 = Int32 instance Maskable Word64 where type Mask Word64 = Int64 instance Maskable Float where type Mask Float = Int32 instance Maskable Double where type Mask Double = Int64 makeMask :: (Maskable a, TypeNum.Positive n) => Value (Vector n a) -> Value (Vector n Bool) -> CodeGenFunction r (Value (Vector n (Mask a))) makeMask _ = Guided.extBool Guided.vector min, max :: (IsConst a, IsArithmetic a, CmpRet a, Maskable a, TypeNum.Positive n) => Value (Vector n a) -> Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) min x y = do b <- makeMask x =<< A.cmp LLVM.CmpLT x y selectLogical b x y max x y = do b <- makeMask x =<< A.cmp LLVM.CmpGT x y selectLogical b x y abs :: (IsConst a, IsArithmetic a, CmpRet a, Maskable a, TypeNum.Positive n) => Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) abs x = max x =<< LLVM.neg x {- | Another implementation of 'select', this time in terms of binary logical operations. The selecting integers must be (-1) for selecting an element from the first operand and 0 for selecting an element from the second operand. This leads to optimal code. On SSE41 this could be done with blendvps or blendvpd. -} selectLogical :: (LLVM.IsFirstClass a, IsPrimitive a, LLVM.IsInteger i, IsPrimitive i, LLVM.IsSized a, LLVM.IsSized i, LLVM.SizeOf a ~ LLVM.SizeOf i, TypeNum.Positive n) => Value (Vector n i) -> Value (Vector n a) -> Value (Vector n a) -> CodeGenFunction r (Value (Vector n a)) selectLogical b x y = do bneg <- LLVM.inv b xm <- A.and b =<< Guided.bitcast Guided.vector x ym <- A.and bneg =<< Guided.bitcast Guided.vector y Guided.bitcast Guided.vector =<< A.or xm ym