{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE ForeignFunctionInterface #-}
module Synthesizer.LLVM.Parameterized.Signal (
   T,
   adjacentNodes02,
   adjacentNodes13,
   amplify,
   amplifyStereo,
   Sig.empty,
   append,
   cycle,
   drop,
   exponential2,
   exponentialCore,
   exponentialBounded2,
   exponentialBoundedCore,
   interpolateConstant,
   iterate,
   lazySize,
   map,
   mapSimple,
   mapAccum,
   Sig.mix,
   Sig.mixExt,
   noise,
   noiseCore,
   osci,
   osciCore,
   osciSaw,
   osciSimple,
   parabolaCore,
   parabolaFadeIn,
   parabolaFadeInInf,
   parabolaFadeInMap,
   parabolaFadeOut,
   parabolaFadeOutInf,
   parabolaFadeOutMap,
   piecewiseConstant,
   ramp,
   rampCore,
   rampInf,
   rampSlope,
   reparameterize,
   tail,
   constant,
   Sig.envelope,
   Sig.envelopeStereo,
   simple,
   zip,
   zipWith,
   zipWithSimple,

   fromStorableVector,
   fromStorableVectorLazy,

   render,
   renderChunky,
   run,
   runChunky,
   runChunkyPattern,
   runChunkyPlugged,

   -- for testing
   noiseCoreAlt,
   ) where

import Synthesizer.LLVM.Parameterized.SignalPrivate
import qualified Synthesizer.LLVM.Simple.SignalPrivate as SigPriv
import qualified Synthesizer.LLVM.Simple.Signal as Sig
import qualified Synthesizer.LLVM.CausalParameterized.ProcessPrivate as CausalP
import qualified Synthesizer.LLVM.Causal.Process as Causal
import qualified Synthesizer.LLVM.Plug.Output as POut
import qualified Synthesizer.LLVM.Interpolation as Interpolation
import qualified Synthesizer.LLVM.ConstantPiece as Const
import qualified Synthesizer.LLVM.Parameter as Param
import Synthesizer.Causal.Class (($*), ($<), )

import qualified Synthesizer.LLVM.Frame.Stereo as Stereo
import qualified Synthesizer.LLVM.Frame as Frame
import qualified Synthesizer.LLVM.Random as Rnd
import qualified Synthesizer.LLVM.Wave as Wave
import qualified Synthesizer.LLVM.Execution as Exec
import qualified Synthesizer.LLVM.ForeignPtr as ForeignPtr
import qualified Synthesizer.LLVM.Alloc as Alloc

import qualified Synthesizer.LLVM.Storable.ChunkIterator as ChunkIt
import qualified Synthesizer.LLVM.Storable.Vector as SVU
import qualified Data.StorableVector.Lazy.Pattern as SVP
import qualified Data.StorableVector.Lazy as SVL
import qualified Data.StorableVector as SV
import qualified Data.StorableVector.Base as SVB

import qualified Data.EventList.Relative.BodyTime as EventList
import qualified Numeric.NonNegative.Chunky as Chunky
import qualified Numeric.NonNegative.Wrapper as NonNeg

import qualified LLVM.Extra.Arithmetic as A
import qualified LLVM.Extra.ScalarOrVector as SoV
import qualified LLVM.Extra.MaybeContinuation as MaybeCont
import qualified LLVM.Extra.Maybe as Maybe
import qualified LLVM.Extra.Memory as Memory
import LLVM.Extra.Class (MakeValueTuple, ValueTuple, Undefined, undefTuple, )
import LLVM.Extra.Arithmetic (advanceArrayElementPtr, )
import LLVM.Extra.Control (whileLoop, )

import qualified LLVM.ExecutionEngine as EE
import qualified LLVM.Util.Loop as Loop
import qualified LLVM.Core as LLVM
import LLVM.Core
          (CodeGenFunction, ret, Value, value, valueOf,
           IsSized, IsConst, IsArithmetic, IsFloating,
           CodeGenModule, Function, )

import qualified Type.Data.Num.Decimal as TypeNum

import Control.Monad.HT ((<=<), )
import Control.Monad (when, )
import Control.Arrow ((^<<), )
import Control.Applicative (Applicative, liftA2, liftA3, pure, (<$>), )
import Control.Functor.HT (void, )

import qualified Algebra.Transcendental as Trans
import qualified Algebra.RealField as RealField
import qualified Algebra.Algebraic as Algebraic
import qualified Algebra.Field as Field
import qualified Algebra.Additive as Additive

import Data.Functor.Compose (Compose(Compose))
import Data.Tuple.HT (mapSnd, )
import Data.Word (Word8, Word32, )
import Data.Int (Int32, )
import Foreign.Storable.Tuple ()
import Foreign.Storable (Storable, )
import Foreign.ForeignPtr (touchForeignPtr, withForeignPtr, )
import Foreign.Ptr (Ptr, nullPtr, )
import Control.Exception (bracket, )
import qualified System.Unsafe as Unsafe

import qualified Synthesizer.LLVM.Debug.Storable as DebugSt
import qualified Synthesizer.LLVM.Debug.Counter as Counter

import NumericPrelude.Numeric
import NumericPrelude.Base hiding (and, tail, iterate, map, zip, zipWith, cycle, drop, )

-- for debugChunkyMain
import qualified Control.Monad.Trans.Reader as R


reparameterize :: Param.T q p -> T p a -> T q a
reparameterize p (Cons start alloca stop next create delete) =
   Cons start alloca stop next (create . Param.get p) delete


-- * timeline edit

{- |
@tail empty@ generates the empty signal.
-}
tail :: T p a -> T p a
tail (Cons next alloca start stop createIOContext deleteIOContext) = Cons
   next
   alloca
   (\parameter -> do
      local <- alloca
      (c,s0) <- start parameter
      MaybeCont.resolve (next c local s0)
         (return (c,s0))
         (\(_a,s1) -> return (c,s1)))
   stop
   createIOContext
   deleteIOContext

drop :: Param.T p Int -> T p a -> T p a
drop n (Cons next alloca start stop createIOContext deleteIOContext) =
   Param.with (Param.word32 n) $ \getN valueN -> Cons
   next
   alloca
   (\(parameter, i0) -> do
      local <- alloca
      (c,s0) <- start parameter
      (_, _, s3) <-
         whileLoop (valueOf True, valueN i0, s0)
            (\(cont,i1,_s1) ->
               A.and cont =<<
                  A.cmp LLVM.CmpGT i1 A.zero)
            (\(_cont,i1,s1) -> do
               (cont, s2) <-
                  MaybeCont.resolve (next c local s1)
                     (return (valueOf False, s1))
                     (\(_a,s) -> return (valueOf True, s))
               i2 <- A.dec i1
               return (cont, i2, s2))
      return (c, s3))
   stop
   (\p -> do
      (ioContext, param) <- createIOContext p
      return (ioContext, (param, getN p)))
   deleteIOContext


cycle ::
   (Loop.Phi a, Undefined a) =>
   T p a -> T p a
cycle (Cons next alloca start stop createIOContext deleteIOContext) =
   Cons
      (\parameter local (c0,s0) ->
          MaybeCont.alternative
             (fmap (mapSnd ((,) c0)) $ next c0 local s0)
             (do (c1,s1) <- MaybeCont.lift $ start parameter
                 (b0,s2) <- next c1 local s1
                 return (b0,(c1,s2))))
      alloca
      (\parameter -> do
         contextState <- start parameter
         return (parameter, contextState))
      (\_parameter contextState -> uncurry stop contextState)
      createIOContext
      deleteIOContext


-- * signal modifiers

{- |
Stretch signal in time by a certain factor.

This can be used for doing expensive computations
of filter parameters at a lower rate.
Alternatively, we could provide an adaptive @map@
that recomputes output values only if the input value changes,
or if the input value differs from the last processed one by a certain amount.
-}
interpolateConstant ::
   (Memory.C a,
    IsFloating b, SoV.IntegerConstant b, LLVM.CmpRet b, LLVM.CmpResult b ~ Bool,
    Storable b, MakeValueTuple b, ValueTuple b ~ (Value b),
    Memory.FirstClass b, IsSized (Memory.Stored b)) =>
   Param.T p b -> T p a -> T p a
interpolateConstant k sig =
   CausalP.toSignal
      (Causal.quantizeLift (CausalP.fromSignal sig) $< constant k)


amplify ::
   (A.PseudoRing al, Storable a,
    MakeValueTuple a, ValueTuple a ~ al, Memory.C al) =>
   Param.T p a -> T p al -> T p al
amplify =
   map Frame.amplifyMono

amplifyStereo ::
   (A.PseudoRing al, Storable a,
    MakeValueTuple a, ValueTuple a ~ al, Memory.C al) =>
   Param.T p a -> T p (Stereo.T al) -> T p (Stereo.T al)
amplifyStereo =
   map Frame.amplifyStereo


mapAccum ::
   (Storable pnh, MakeValueTuple pnh, ValueTuple pnh ~ pnl, Memory.C pnl,
    Storable psh, MakeValueTuple psh, ValueTuple psh ~ psl, Memory.C psl,
    Memory.C s) =>
   (forall r. pnl -> a -> s -> CodeGenFunction r (b,s)) ->
   (forall r. psl -> CodeGenFunction r s) ->
   Param.T p pnh ->
   Param.T p psh ->
   T p a -> T p b
mapAccum next start n s xs =
   CausalP.mapAccum next start n s $* xs

adjacentNodes02 ::
   (Memory.C a, Undefined a) =>
   T p a -> T p (Interpolation.Nodes02 a)
adjacentNodes02 =
   tail
   .
   Sig.mapAccum
      (\new old -> return (Interpolation.Nodes02 old new, new))
      (return undefTuple)

adjacentNodes13 ::
   (MakeValueTuple ah, Storable ah, ValueTuple ah ~ a,
    Memory.C a, Undefined a) =>
   Param.T p ah -> T p a -> T p (Interpolation.Nodes13 a)
adjacentNodes13 yp0 =
   tail .
   tail .
   mapAccum
      (\() new (x0, x1, x2) ->
         return (Interpolation.Nodes13 x0 x1 x2 new, (x1, x2, new)))
      (\y0 -> return (undefTuple, undefTuple, Param.value yp0 y0))
      (pure ()) yp0



-- * signal generators


exponentialCore ::
   (Storable a, MakeValueTuple a, ValueTuple a ~ al,
    Memory.C al, A.PseudoRing al) =>
   Param.T p a -> Param.T p a -> T p al
exponentialCore =
   iterate A.mul

exponential2 ::
   (Trans.C a, Storable a, MakeValueTuple a, ValueTuple a ~ (Value a),
    Memory.FirstClass a, IsSized (Memory.Stored a),
    IsArithmetic a, IsConst a) =>
   Param.T p a -> Param.T p a -> T p (Value a)
exponential2 halfLife =
   exponentialCore (0.5 ** recip halfLife)


exponentialBoundedCore ::
   (Storable a, MakeValueTuple a, ValueTuple a ~ al,
    Memory.C al, A.PseudoRing al, A.Real al) =>
   Param.T p a -> Param.T p a -> Param.T p a ->
   T p al
exponentialBoundedCore bound decay =
   iterate
      (\(b,k) y -> A.max b =<< A.mul k y)
      (liftA2 (,) bound decay)

{- |
Exponential curve that remains at the bound value
if it would fall below otherwise.
This way you can avoid extremal values, e.g. denormalized ones.
The initial value and the bound value must be positive.
-}
exponentialBounded2 ::
   (Trans.C a, Storable a, MakeValueTuple a, ValueTuple a ~ (Value a),
    Memory.FirstClass a, IsSized (Memory.Stored a),
    SoV.Real a, IsConst a) =>
   Param.T p a -> Param.T p a -> Param.T p a ->
   T p (Value a)
exponentialBounded2 bound halfLife =
   exponentialBoundedCore bound (0.5 ** recip halfLife)


osciCore ::
   (Storable t, MakeValueTuple t, ValueTuple t ~ tl,
    Memory.C tl, A.Fraction tl) =>
   Param.T p t -> Param.T p t -> T p tl
osciCore phase freq =
   iterate A.incPhase freq phase

osci ::
   (Storable t, MakeValueTuple t, ValueTuple t ~ tl,
    Storable c, MakeValueTuple c, ValueTuple c ~ cl,
    Memory.C cl,
    Memory.C tl, A.Fraction tl, A.IntegerConstant tl) =>
   (forall r. cl -> tl -> CodeGenFunction r y) ->
   Param.T p c ->
   Param.T p t -> Param.T p t -> T p y
osci wave waveParam phase freq =
   map wave waveParam $ osciCore phase freq

osciSimple ::
   (Storable t, MakeValueTuple t, ValueTuple t ~ tl,
    Memory.C tl, A.Fraction tl, A.IntegerConstant tl) =>
   (forall r. tl -> CodeGenFunction r y) ->
   Param.T p t -> Param.T p t -> T p y
osciSimple wave =
   osci (const wave) (return ())

osciSaw ::
   (Storable a, MakeValueTuple a, ValueTuple a ~ al,
    Memory.C al, A.PseudoRing al, A.Fraction al, A.IntegerConstant al) =>
   Param.T p a -> Param.T p a -> T p al
osciSaw =
   osciSimple Wave.saw



rampCore ::
   (Storable a, MakeValueTuple a, ValueTuple a ~ al,
    Memory.C al, A.Additive al, A.IntegerConstant al) =>
   Param.T p a -> Param.T p a -> T p al
rampCore = iterate A.add

parabolaCore ::
   (Storable a, MakeValueTuple a, ValueTuple a ~ al,
    Memory.C al, A.Additive al, A.IntegerConstant al) =>
   Param.T p a -> Param.T p a -> Param.T p a -> T p al
parabolaCore d2 d1 start =
   CausalP.apply (CausalP.integrate start) $
   rampCore d2 d1



rampInf, rampSlope,
 parabolaFadeInInf, parabolaFadeOutInf ::
   (Field.C a, Storable a, MakeValueTuple a, ValueTuple a ~ al,
    Memory.C al, A.Additive al, A.IntegerConstant al) =>
   Param.T p a -> T p al
rampSlope slope  =  rampCore slope Additive.zero
rampInf dur  =  rampSlope (recip dur)

{-
t*(2-t) = 1 - (t-1)^2

(t+d)*(2-t-d) - t*(2-t)
   = d*(2-t) - d*t - d^2
   = 2*d*(1-t) - d^2
   = d*(2*(1-t) - d)

2*d*(1-t-d) + d^2  -  (2*d*(1-t) + d^2)
   = -2*d^2
-}
parabolaFadeInInf dur =
   parabolaCore
      (fmap (\d -> -2*d*d)  $ recip dur)
      (fmap (\d -> d*(2-d)) $ recip dur)
      Additive.zero

{-
1-t^2
-}
parabolaFadeOutInf dur =
   parabolaCore
      (fmap (\d -> -2*d*d) $ recip dur)
      (fmap (\d ->   -d*d) $ recip dur)
      one

ramp,
 parabolaFadeIn, parabolaFadeOut,
 parabolaFadeInMap, parabolaFadeOutMap ::
   (RealField.C a, Storable a, MakeValueTuple a, ValueTuple a ~ al,
    Memory.C al, A.PseudoRing al, A.IntegerConstant al) =>
   Param.T p a -> T p al

ramp dur =
   CausalP.apply (CausalP.take (fmap round dur)) $
   rampInf dur

parabolaFadeIn dur =
   CausalP.apply (CausalP.take (fmap round dur)) $
   parabolaFadeInInf dur

parabolaFadeOut dur =
   CausalP.apply (CausalP.take (fmap round dur)) $
   parabolaFadeOutInf dur

parabolaFadeInMap dur =
   -- t*(2-t)
   CausalP.apply (CausalP.mapSimple (\t -> A.mul t =<< A.sub (A.fromInteger' 2) t)) $
   ramp dur

parabolaFadeOutMap dur =
   -- 1-t^2
   CausalP.apply (CausalP.mapSimple (\t -> A.sub (A.fromInteger' 1) =<< A.mul t t)) $
   ramp dur


{- |
@noise seed rate@

The @rate@ parameter is for adjusting the amplitude
such that it is uniform across different sample rates
and after frequency filters.
The @rate@ is the ratio of the current sample rate to the default sample rate,
where the variance of the samples would be one.
If you want that at sample rate 22050 the variance is 1,
then in order to get a consistent volume at sample rate 44100
you have to set @rate = 2@.

I use the variance as quantity and not the amplitude,
because the amplitude makes only sense for uniformly distributed samples.
However, frequency filters transform the probabilistic density of the samples
towards the normal distribution according to the central limit theorem.
-}
noise ::
   (Algebraic.C a, IsFloating a, IsConst a,
    LLVM.ShapeOf a ~ LLVM.ScalarShape,
    Memory.C (Value a),
    MakeValueTuple a, ValueTuple a ~ (Value a), Storable a) =>
   Param.T p Word32 ->
   Param.T p a ->
   T p (Value a)
noise seed rate =
   let m2 = fromInteger $ div Rnd.modulus 2
   in  map (\r y ->
          A.mul r
           =<< flip A.sub (valueOf $ m2+1)
           =<< int31tofp y)
          (sqrt (3 * rate) / return m2) $
       noiseCore seed

{-
sitofp is a single instruction on x86
and thus we use it, since the arguments are below 2^31.
-}
int31tofp ::
   (IsFloating a, LLVM.ShapeOf a ~ LLVM.ScalarShape) =>
   Value Word32 -> CodeGenFunction r (Value a)
int31tofp =
   LLVM.inttofp <=<
   (LLVM.bitcast ::
       Value Word32 -> CodeGenFunction r (Value Int32))

noiseCore, noiseCoreAlt ::
   Param.T p Word32 ->
   T p (Value Word32)
noiseCore seed =
   iterate (const Rnd.nextCG)
      (return ()) ((+1) . flip mod (Rnd.modulus-1) ^<< seed)

noiseCoreAlt seed =
   iterate (const Rnd.nextCG32)
      (return ()) ((+1) . flip mod (Rnd.modulus-1) ^<< seed)


-- * conversion from and to storable vectors

fromStorableVector ::
   (Storable a, MakeValueTuple a, ValueTuple a ~ value, Memory.C value) =>
   Param.T p (SV.Vector a) ->
   T p value
fromStorableVector selectVec =
   Cons
      (\() () (p0,l0) -> do
         cont <- MaybeCont.lift $ A.cmp LLVM.CmpGT l0 A.zero
         MaybeCont.withBool cont $ do
            y1 <- Memory.load p0
            p1 <- advanceArrayElementPtr p0
            l1 <- A.dec l0
            return (y1,(p1,l1)))
      (return ())
      (return . (,) ())
      (\() _ -> return ())
      (\p ->
         let (fp,ptr,l) = SVU.unsafeToPointers $ Param.get selectVec p
         in  return (fp, (ptr, fromIntegral l :: Word32)))
      -- keep the foreign ptr alive
      touchForeignPtr

{-
This function calls back into the Haskell function 'ChunkIt.next'
that returns a pointer to the data of the next chunk
and advances to the next chunk in the sequence.
-}
fromStorableVectorLazy ::
   (Storable a, MakeValueTuple a, ValueTuple a ~ value, Memory.C value) =>
   Param.T p (SVL.Vector a) ->
   T p value
fromStorableVectorLazy = SigPriv.flattenChunks . storableVectorChunks

storableVectorChunks ::
   (Storable a, MakeValueTuple a, ValueTuple a ~ value, Memory.C value,
    Memory.Struct value ~ struct) =>
   Param.T p (SVL.Vector a) ->
   T p (Value (Ptr struct), Value Word32)
storableVectorChunks sig =
   Cons
      (SigPriv.storableVectorNextChunk
         "Parameterized.Signal.fromStorableVectorLazy.nextChunk")
      LLVM.alloca
      (\s -> return (s, ()))
      (\ _s _ -> return ())
      (\p -> do
          s <- ChunkIt.new (Param.get sig p)
          return (s, s))
      ChunkIt.dispose



piecewiseConstant ::
   (Storable a, MakeValueTuple a, ValueTuple a ~ value, Memory.C value) =>
   Param.T p (EventList.T NonNeg.Int a) ->
   T p value
piecewiseConstant =
   Const.flatten . Const.piecewiseConstant



{- |
Turns a lazy chunky size into a signal generator with unit element type.
The signal length is the only information that the generator provides.
Using 'zipWith' you can use this signal as a lazy 'take'.
-}
lazySize ::
   Param.T p SVP.LazySize ->
   T p ()
lazySize =
   Const.flatten . Const.lazySize



createFunction ::
   (Functor genMod, EE.ExecutionFunction fun) =>
   Exec.Importer fun -> genMod (Function fun) ->
   Compose genMod EE.EngineAccess fun
createFunction importer modul =
   Compose $ EE.getExecutionFunction importer <$> modul

createFinalizer ::
   (Applicative genMod, EE.ExecutionFunction fun) =>
   Exec.Importer fun -> genMod (Function fun) ->
   Compose genMod EE.EngineAccess (EE.ExecutionEngine, fun)
createFinalizer importer modul =
   liftA2 (,)
      (Compose $ pure EE.getEngine)
      (createFunction importer modul)


foreign import ccall safe "dynamic" derefFillPtr ::
   Exec.Importer (Ptr param -> Word32 -> Ptr a -> IO Word32)


moduleFill ::
   (Memory.C value, Memory.Struct value ~ struct,
    Memory.C parameters, Memory.Struct parameters ~ paramStruct,
    Loop.Phi state, Undefined state) =>
   (forall r z.
    (Loop.Phi z) =>
    context -> local -> state -> MaybeCont.T r z (value, state)) ->
   (forall r. CodeGenFunction r local) ->
   (forall r.
    parameters -> CodeGenFunction r (context, state)) ->
   (forall r.
    context -> state -> CodeGenFunction r ()) ->
   CodeGenModule
      (Function (Ptr paramStruct -> Word32 -> Ptr struct -> IO Word32))
moduleFill next alloca start stop =
   Exec.createLLVMFunction "fillsignalblock" $
   \paramPtr size bPtr -> do
      param <- Memory.load paramPtr
      (c,s) <- start param
      local <- alloca
      (pos,se) <- MaybeCont.arrayLoop size bPtr s $ \ ptri s0 -> do
         (y,s1) <- next c local s0
         MaybeCont.lift $ Memory.store y ptri
         return s1
      Maybe.for se $ stop c
      ret pos

debugMain ::
   forall parameters struct paramStruct.
   (Storable parameters,
    LLVM.IsType struct,
    LLVM.IsType paramStruct, IsSized paramStruct) =>
   CodeGenModule
      (Function (Ptr paramStruct -> Word32 -> Ptr struct -> IO Word32)) ->
   parameters ->
   IO (Function (Word32 -> Ptr (Ptr Word8) -> IO Word32))
debugMain sigModule params = do
   paramArray <-
      DebugSt.withConstArray params (\arr -> do
         ptr <- LLVM.alloca
         LLVM.store (value arr) =<< LLVM.bitcast ptr
         return ptr)

   m <- LLVM.newModule

   mainFunc <- LLVM.defineModule m (do
      LLVM.setTarget LLVM.hostTriple
      mallocBytes <- LLVM.newNamedFunction LLVM.ExternalLinkage "malloc" ::
         LLVM.TFunction (Ptr Word8 -> IO (Ptr struct))
      fill <- sigModule
      Exec.createLLVMFunction "main" $ \ _argc _argv -> do
         paramPtr <- paramArray
         let chunkSize = LLVM.valueOf 100000
             basePtr = LLVM.valueOf nullPtr
         buffer <-
            LLVM.call mallocBytes =<<
            LLVM.bitcast =<<
            LLVM.getElementPtr basePtr (chunkSize, ())
         _done <-
            LLVM.call fill paramPtr chunkSize (asTypeOf buffer basePtr)
         ret (A.zero :: LLVM.Value Word32))

   Counter.with Exec.counter $ R.ReaderT $ \cnt -> do
      LLVM.writeBitcodeToFile ("main" ++ Counter.format 3 cnt ++ ".bc") m

   return mainFunc


run ::
   (Storable a, MakeValueTuple a, ValueTuple a ~ value, Memory.C value) =>
   T p value ->
   IO (Int -> p -> SV.Vector a)
run (Cons next alloca start stop createIOContext deleteIOContext) =
   do -- this compiles once and is much faster than simpleFunction
      let modul = moduleFill next alloca start stop
      fill <- Exec.compileModule $ createFunction derefFillPtr modul

      return $ \len p ->
         Unsafe.performIO $
         bracket (createIOContext p) (deleteIOContext . fst) $
         \ (_,params) -> do
            when False $ void $ debugMain modul params

            SVB.createAndTrim len $ \ ptr ->
               Alloc.with params $ \paramPtr ->
               fmap fromIntegral $
                  fill (Memory.castTuplePtr paramPtr)
                     (fromIntegral len) (Memory.castTuplePtr ptr)

{- |
This is not really a function, see 'renderChunky'.
-}
render ::
   (Storable a, MakeValueTuple a, ValueTuple a ~ value, Memory.C value) =>
   T p value -> Int -> p -> SV.Vector a
render gen = Unsafe.performIO $ run gen


foreign import ccall safe "dynamic" derefStartPtr ::
   Exec.Importer (Ptr b -> IO (Ptr a))

foreign import ccall safe "dynamic" derefStopPtr ::
   Exec.Importer (Ptr a -> IO ())

foreign import ccall safe "dynamic" derefChunkPtr ::
   Exec.Importer (Ptr contextStateStruct -> Word32 -> Ptr struct -> IO Word32)


moduleStart ::
   (Memory.C parameters, Memory.Struct parameters ~ paramStruct,
    Memory.C context, Memory.C state,
    Memory.Struct (context, Maybe.T state) ~ contextStateStruct) =>
   (forall r. parameters -> CodeGenFunction r (context, state)) ->
   CodeGenModule (Function (Ptr paramStruct -> IO (Ptr contextStateStruct)))
moduleStart start =
   Exec.createLLVMFunction "startsignal" $
   \paramPtr -> do
      pptr <- LLVM.malloc
      flip Memory.store pptr . mapSnd Maybe.just
         =<< start =<< Memory.load paramPtr
      ret pptr

moduleStop ::
   (Memory.C context, Memory.C state,
    Memory.Struct (context, Maybe.T state) ~ contextStateStruct) =>
   (forall r. context -> state -> CodeGenFunction r ()) ->
   CodeGenModule (Function (Ptr contextStateStruct -> IO ()))
moduleStop stop =
   Exec.createLLVMFunction "stopsignal" $
   \contextStatePtr -> do
      (c,ms) <- Memory.load contextStatePtr
      Maybe.for ms $ stop c
      LLVM.free contextStatePtr
      ret ()

moduleNext ::
   (Memory.C value, Memory.Struct value ~ struct,
    Memory.C context, Memory.C state,
    Memory.Struct (context, Maybe.T state) ~ contextStateStruct) =>
   (forall r z.
    (Loop.Phi z) =>
    context -> local -> state -> MaybeCont.T r z (value, state)) ->
   (forall r. CodeGenFunction r local) ->
   CodeGenModule
      (Function (Ptr contextStateStruct -> Word32 -> Ptr struct -> IO Word32))
moduleNext next alloca =
   Exec.createLLVMFunction "fillsignal" $
   \contextStatePtr loopLen ptr -> do
      (context, msInit) <- Memory.load contextStatePtr
      local <- alloca
      (pos,msExit) <-
         Maybe.run msInit (return (A.zero, Maybe.nothing)) $ \sInit ->
            MaybeCont.arrayLoop loopLen ptr sInit $ \ ptri s0 -> do
         (y,s1) <- next context local s0
         MaybeCont.lift $ Memory.store y ptri
         return s1
      sptr <- LLVM.getElementPtr0 contextStatePtr (TypeNum.d1, ())
      Memory.store msExit sptr
      ret pos


moduleNextPlugged ::
   (Memory.C context, Memory.C state,
    Memory.Struct (context, Maybe.T state) ~ contextStateStruct,
    Undefined stateOut, Loop.Phi stateOut,
    Memory.C paramValueOut, Memory.Struct paramValueOut ~ paramStructOut) =>
   (forall r z.
    (Loop.Phi z) =>
    context -> local -> state -> MaybeCont.T r z (value, state)) ->
   (forall r. CodeGenFunction r local) ->
   (forall r.
    paramValueOut ->
    value -> stateOut -> LLVM.CodeGenFunction r stateOut) ->
   (forall r.
    paramValueOut ->
    LLVM.CodeGenFunction r stateOut) ->
   CodeGenModule
      (Function
         (Ptr contextStateStruct -> Word32 -> Ptr paramStructOut -> IO Word32))
moduleNextPlugged next alloca nextOut startOut =
   Exec.createLLVMFunction "fillsignal" $
   \contextStatePtr loopLen outPtr -> do
      (context, msInit) <- Memory.load contextStatePtr
      outParam <- Memory.load outPtr
      outInit <- startOut outParam
      local <- alloca
      (pos,msExit) <-
         Maybe.run msInit (return (A.zero, Maybe.nothing)) $ \sInit ->
            MaybeCont.fixedLengthLoop loopLen (sInit, outInit) $
               \ (s0,out0) -> do
         (y,s1) <- next context local s0
         out1 <- MaybeCont.lift $ nextOut outParam y out0
         return (s1, out1)
      sptr <- LLVM.getElementPtr0 contextStatePtr (TypeNum.d1, ())
      Memory.store (fmap fst msExit) sptr
      ret pos

debugChunkyMain ::
   forall parameters struct paramStruct contextStateStruct.
   (Storable parameters,
    LLVM.IsType struct,
    LLVM.IsType contextStateStruct,
    LLVM.IsType paramStruct, IsSized paramStruct) =>
   CodeGenModule
      (Function (Ptr paramStruct -> IO (Ptr contextStateStruct)),
       Function (Ptr contextStateStruct -> IO ()),
       Function (Ptr contextStateStruct ->
                 Word32 -> Ptr struct -> IO Word32)) ->
   parameters ->
   IO (Function (Word32 -> Ptr (Ptr Word8) -> IO Word32))
debugChunkyMain sigModule params = do
{-
This does not work, since we cannot add (Mul n D32 s) constraint
to the function argument in reifyIntegral.
   nextArray <-
      DebugSt.withConstArray nextParam (\arr -> do
         ptr <- LLVM.alloca
         LLVM.store (value arr) ptr
         LLVM.bitcast ptr)
-}
   paramArray <-
      DebugSt.withConstArray params (\arr -> do
         ptr <- LLVM.alloca
         LLVM.store (value arr) =<< LLVM.bitcast ptr
         return ptr)

   m <- LLVM.newModule

   mainFunc <- LLVM.defineModule m (do
      LLVM.setTarget LLVM.hostTriple
      mallocBytes <- LLVM.newNamedFunction LLVM.ExternalLinkage "malloc" ::
         LLVM.TFunction (Ptr Word8 -> IO (Ptr struct))
      (start, stop, fill) <- sigModule
      Exec.createLLVMFunction "main" $ \ _argc _argv -> do
         contextState <- LLVM.call start =<< paramArray
         let chunkSize = LLVM.valueOf 100000
             basePtr = LLVM.valueOf nullPtr
         buffer <-
            LLVM.call mallocBytes =<<
            LLVM.bitcast =<<
            LLVM.getElementPtr basePtr (chunkSize, ())
         _done <-
            LLVM.call fill contextState chunkSize (asTypeOf buffer basePtr)
         _ <- LLVM.call stop contextState
         ret (A.zero :: LLVM.Value Word32))

   Counter.with Exec.counter $ R.ReaderT $ \cnt -> do
      LLVM.writeBitcodeToFile ("main" ++ Counter.format 3 cnt ++ ".bc") m

   return mainFunc



{- |
Renders a signal generator to a chunky storable vector with given pattern.
If the pattern is shorter than the generated signal
this means that the signal is shortened.
-}
runChunkyPattern, _runChunkyPattern ::
   (Storable a, MakeValueTuple a, ValueTuple a ~ value, Memory.C value) =>
   T p value ->
   IO (SVP.LazySize -> p -> SVL.Vector a)
_runChunkyPattern =
   fmap (\f size -> SVL.fromChunks . f size) .
   flip runChunkyPatternPlugged POut.deflt

runChunkyPattern
      (Cons next alloca start stop createIOContext deleteIOContext) = do

   let startF = moduleStart start
   let stopF = moduleStop stop
   let nextF = moduleNext next alloca

   (startFunc, stopFunc, fill) <-
      Exec.compileModule $
      liftA3 (,,)
         (createFunction derefStartPtr startF)
         (createFinalizer derefStopPtr stopF)
         (createFunction derefChunkPtr nextF)

   return $
      \ lazysize p -> SVL.fromChunks $ Unsafe.performIO $ do
         (ioContext, param) <- createIOContext p

{-
         putStr "nextParam: "
         DebugSt.format nextParam >>= putStrLn
-}
         when False $ Counter.with DebugSt.dumpCounter $ do
            DebugSt.dump "param" param

         when False $ void $
            debugChunkyMain (liftA3 (,,) startF stopF nextF) param

         statePtr <- ForeignPtr.newParam stopFunc startFunc param
         ioContextPtr <- ForeignPtr.newAux (deleteIOContext ioContext)

         let go cs =
                Unsafe.interleaveIO $
                case cs of
                   [] -> return []
                   SVL.ChunkSize size : rest -> do
                      v <-
                         withForeignPtr statePtr $ \sptr ->
                         SVB.createAndTrim size $
                         fmap fromIntegral .
                         fill sptr (fromIntegral size) .
                         Memory.castTuplePtr
                      touchForeignPtr ioContextPtr
                      (if SV.length v > 0
                         then fmap (v:)
                         else id) $
                         (if SV.length v < size
                            then return []
                            else go rest)
         go (Chunky.toChunks lazysize)

runChunkyPatternPlugged ::
   T p value ->
   POut.T value chunk ->
   IO (SVP.LazySize -> p -> [chunk])
runChunkyPatternPlugged
      (Cons next alloca start stop createIOContext deleteIOContext)
      (POut.Cons nextOut startOut createOut deleteOut) = do

   (startFunc, stopFunc, fill) <-
      Exec.compileModule $
      liftA3 (,,)
         (createFunction derefStartPtr $ moduleStart start)
         (createFinalizer derefStopPtr $ moduleStop stop)
         (createFunction derefChunkPtr $
          moduleNextPlugged next alloca nextOut startOut)

   return $
      \ lazysize p -> Unsafe.performIO $ do
         (ioContext, param) <- createIOContext p

         statePtr <- ForeignPtr.newParam stopFunc startFunc param
         ioContextPtr <- ForeignPtr.newAux (deleteIOContext ioContext)

         let go cs =
                Unsafe.interleaveIO $
                case cs of
                   [] -> return []
                   SVL.ChunkSize maximumSize : rest -> do
                      (contextOut,paramOut) <- createOut maximumSize
                      actualSize <-
                         fmap fromIntegral $
                         Alloc.with paramOut $ \outptr ->
                         withForeignPtr statePtr $ \sptr ->
                         fill sptr
                            (fromIntegral maximumSize)
                            (Memory.castTuplePtr outptr)
                      when (fromIntegral actualSize > maximumSize) $
                         error $ "Parametrized.Signal: " ++
                                 "output size " ++ show actualSize ++
                                 " > input size " ++ show maximumSize
                      v <- deleteOut actualSize contextOut
                      touchForeignPtr ioContextPtr
                      (if actualSize > 0
                         then fmap (v:)
                         else id) $
                         (if actualSize < maximumSize
                            then return []
                            else go rest)
         go (Chunky.toChunks lazysize)

runChunky, _runChunky ::
   (Storable a, MakeValueTuple a, ValueTuple a ~ value, Memory.C value) =>
   T p value ->
   IO (SVL.ChunkSize -> p -> SVL.Vector a)
runChunky sig =
   flip fmap (runChunkyPattern sig) $ \f size p ->
      f (Chunky.fromChunks (repeat size)) p

_runChunky =
   fmap (\f size -> SVL.fromChunks . f size) .
   flip runChunkyPlugged POut.deflt

runChunkyPlugged ::
   T p value ->
   POut.T value chunk ->
   IO (SVL.ChunkSize -> p -> [chunk])
runChunkyPlugged sig plug =
   flip fmap (runChunkyPatternPlugged sig plug) $ \f size p ->
      f (Chunky.fromChunks (repeat size)) p

{- |
This looks like a function,
but it is not a function since it depends on LLVM being initialized
with LLVM.initializeNativeTarget before.
It is also problematic since you cannot control when and how often
the underlying LLVM code is compiled.
The compilation cannot be observed, thus it is referential transparent.
But this influences performance considerably
and I assume that you use this package exclusively for performance reasons.
-}
renderChunky ::
   (Storable a, MakeValueTuple a, ValueTuple a ~ value, Memory.C value) =>
   SVL.ChunkSize -> T p value ->
   p -> SVL.Vector a
renderChunky size gen =
   Unsafe.performIO (runChunky gen) size