{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE FlexibleContexts #-} module Main where import LLVM.Extra.Control (arrayLoop, ) import qualified LLVM.Extra.ScalarOrVector as SoV import qualified LLVM.Extra.Vector as Vector import qualified LLVM.Extra.Extension.X86 as X86 import qualified LLVM.Extra.Extension as Ext import qualified LLVM.Extra.Class as Class import qualified LLVM.Extra.Arithmetic as A import LLVM.Core import LLVM.ExecutionEngine (simpleFunction, ) import qualified System.IO as IO import Types.Data.Num(D4, ) import Data.Word (Word32, ) import qualified Foreign.Storable as St import Foreign.Marshal.Array (allocaArray, ) import Control.Monad.Trans.State (StateT(StateT), runStateT, ) import Control.Monad (liftM2, ) type Vec = ConstValue (Vector D4 Float) constVec :: Float -> CodeGenFunction r (Value (Vector D4 Float)) constVec x = return $ valueOf $ toVector (x,x,x,x) constVecInsert :: Float -> CodeGenFunction r (Value (Vector D4 Float)) constVecInsert x' = let x = valueOf x' in foldr (\n mv v -> insertelement v x (valueOf n) >>= mv) return [0..3] (value (undef :: Vec)) {- This implementation cannot make use of vector operations, because 'frem' is only available in the FPU. -} fractionVector0 :: (IsFloating c, ABinOp a (Value (Vector D4 Float)), ABinOpResult a (Value (Vector D4 Float)) ~ (v c)) => a -> CodeGenFunction r (v c) fractionVector0 x = frem x =<< constVec 1 {- This call fill (fromIntegral len) ptr (toVector (0.01003, 0.01001, 0.00999, 0.00997)) >> would not work, because Vector is not of type Generic. -} mChorusVectorArg :: CodeGenModule (Function (Word32 -> Ptr Float -> Vector D4 Float -> IO Float)) mChorusVectorArg = createFunction ExternalLinkage $ \ size ptr freq -> do const1 <- constVec 1 const2 <- constVec 2 s <- arrayLoop size ptr (value (zero :: Vec)) $ \ ptri phase -> do y <- sub const1 =<< mul const2 phase s0 <- extractelement y (valueOf 0) s1 <- extractelement y (valueOf 1) s2 <- extractelement y (valueOf 2) s3 <- extractelement y (valueOf 3) s01 <- add s0 s1 s23 <- add s2 s3 s0123 <- add s01 s23 flip store ptri =<< A.mul (valueOf 0.25) s0123 Vector.fraction =<< add phase freq ss <- extractelement s (valueOf 0) ret (ss :: Value Float) {- | differing vector sizes are allowed according to documentation, but not supported by C++ library of LLVM-2.5 mixReduceSize :: Value (Vector D4 Float) -> CodeGenFunction r (Value Float) mixReduceSize y = do y01 <- shufflevector y (value undef) (constVector [constOf 0, constOf 1]) y23 <- shufflevector y (value undef) (constVector [constOf 2, constOf 3]) z <- add (y01 :: Value (Vector D2 Float)) (y23 :: Value (Vector D2 Float)) s0 <- extractelement z (valueOf 0) s1 <- extractelement z (valueOf 1) mul (0.25 :: Float) =<< add s0 s1 -} {- Here we do use consistently Vectors of size 4. Since we declare the upper floats as undefined the code is efficient. -} mixGeneric :: Value (Vector D4 Float) -> CodeGenFunction r (Value Float) mixGeneric y = do -- that is translated to movhlps y23 <- shufflevector y (value undef) (constVector [constOf 2, constOf 3, undef, undef]) z <- A.add y y23 s0 <- extractelement z (valueOf 0) s1 <- extractelement z (valueOf 1) mul (0.25 :: Float) =<< add s0 s1 {- Needs the horizontal add instruction from the SSSE3 extension in ix86 CPUs. -} mixHorizontal :: Value (Vector D4 Float) -> CodeGenFunction r (Value Float) mixHorizontal y = do z <- Ext.runUnsafe X86.haddps (value undef) y s <- Ext.runUnsafe X86.haddps (value undef) z mul (0.25 :: Float) =<< extractelement s (valueOf 0) {- Needs the dot product instruction from the SSE4 extension in ix86 CPUs. -} mixDotProduct :: Value (Vector D4 Float) -> CodeGenFunction r (Value Float) mixDotProduct y = do x <- SoV.replicate (valueOf 0.25) z <- Ext.runUnsafe X86.dpps x y (valueOf 0xF1) extractelement z (valueOf 0) mChorusVector :: CodeGenModule (Function (Word32 -> Ptr Float -> Float -> Float -> Float -> Float -> IO Float)) mChorusVector = createFunction ExternalLinkage $ \ size ptr f0 f1 f2 f3 -> do freq <- Vector.assemble [f0,f1,f2,f3] const1 <- constVec 1 const2 <- constVec (-2) s <- arrayLoop size ptr (value (zero :: Vec)) $ \ ptri phase -> do flip store ptri =<< mixHorizontal =<< add const1 =<< mul const2 phase Vector.fraction =<< A.add phase freq ss <- extractelement s (valueOf 0) ret ss waveSaw :: Value Float -> CodeGenFunction r (Value Float) waveSaw t = sub (1 :: Float) =<< mul (2 :: Float) t osciSaw :: Value Float -> Value Float -> CodeGenFunction r (Value Float, Value Float) osciSaw freq phase = liftM2 (,) (waveSaw phase) (SoV.incPhase freq phase) mChorus :: CodeGenModule (Function (Word32 -> Ptr Float -> Float -> Float -> Float -> Float -> IO Float)) mChorus = createFunction ExternalLinkage $ \ size ptr f0 f1 f2 f3 -> do s <- arrayLoop size ptr Class.zeroTuple $ \ ptri ((phase0, phase1), (phase2, phase3)) -> do (y0, phase0') <- osciSaw f0 phase0 (y1, phase1') <- osciSaw f1 phase1 (y2, phase2') <- osciSaw f2 phase2 (y3, phase3') <- osciSaw f3 phase3 y01 <- A.add y0 y1 y23 <- A.add y2 y3 y0123 <- A.add y01 y23 flip store ptri =<< mul (0.25 :: Float) y0123 return ((phase0', phase1'), (phase2', phase3')) ret (fst (fst s) :: Value Float) sawOsciAction :: Value Float -> StateT (Value Float) (CodeGenFunction r) (Value Float) sawOsciAction freq = StateT $ osciSaw freq {- (***) :: StateT s m a -> StateT t m b -> StateT (s,t) m (a,b) (***) sta stb = StateT $ \(s0,t0) -> do (a,s1) <- runStateT sta s0 (b,t1) <- runStateT stb t0 return ((a,b), (s1,t1)) -} (=+=) :: StateT s (CodeGenFunction r) (Value Float) -> StateT t (CodeGenFunction r) (Value Float) -> StateT (s,t) (CodeGenFunction r) (Value Float) (=+=) sta stb = StateT $ \(s0,t0) -> do (a,s1) <- runStateT sta s0 (b,t1) <- runStateT stb t0 c <- add a b return (c, (s1,t1)) mChorusMonadic :: CodeGenModule (Function (Word32 -> Ptr Float -> Float -> Float -> Float -> Float -> IO Float)) mChorusMonadic = createFunction ExternalLinkage $ \ size ptr f0 f1 f2 f3 -> do s <- arrayLoop size ptr Class.zeroTuple $ \ ptri phases -> do (y, phases') <- flip runStateT phases $ (sawOsciAction f0 =+= sawOsciAction f1) =+= (sawOsciAction f2 =+= sawOsciAction f3) flip store ptri =<< mul (0.25 :: Float) y return phases' ret (fst (fst s)) renderChorus :: IO () renderChorus = do m <- newModule _f <- defineModule m mChorusVector writeBitcodeToFile "array.bc" m fill <- simpleFunction mChorusVector IO.withFile "speedtest.f32" IO.WriteMode $ \h -> let len = 10000000 in allocaArray len $ \ ptr -> fill (fromIntegral len) ptr 0.01003 0.01001 0.00999 0.00997 >> IO.hPutBuf h ptr (len*St.sizeOf(undefined::Float)) mSaw :: CodeGenModule (Function (Word32 -> Ptr Float -> Float -> IO Float)) mSaw = createFunction ExternalLinkage $ \ size ptr freq -> do s <- arrayLoop size ptr (valueOf 0) $ \ ptri phase -> do (y, phase') <- osciSaw freq phase store y ptri return phase' ret (s :: Value Float) renderSaw :: IO () renderSaw = do fill <- simpleFunction mSaw IO.withFile "speedtest.f32" IO.WriteMode $ \h -> let len = 10000000 in allocaArray len $ \ ptr -> fill (fromIntegral len) ptr 0.01 >> IO.hPutBuf h ptr (len*St.sizeOf(undefined::Float)) mRamp :: CodeGenModule (Function (Word32 -> Ptr Float -> Float -> IO Float)) mRamp = createFunction ExternalLinkage $ \ size ptr slope -> do s <- arrayLoop size ptr (valueOf 0) $ \ ptri y -> do store y ptri add slope y ret (s :: Value Float) renderRamp :: IO () renderRamp = do fill <- simpleFunction mRamp IO.withFile "speedtest.f32" IO.WriteMode $ \h -> let len = 10000000 in allocaArray len $ \ ptr -> fill (fromIntegral len) ptr (recip $ fromIntegral len) >> IO.hPutBuf h ptr (len*St.sizeOf(undefined::Float)) main :: IO () main = do initializeNativeTarget renderChorus