{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE ForeignFunctionInterface #-}
module Synthesizer.LLVM.Parameterized.Signal (
   T(Cons), simple, map, mapSimple, iterate,
   module Synthesizer.LLVM.Parameterized.Signal
   ) where

import Synthesizer.LLVM.Parameterized.SignalPrivate
import qualified Synthesizer.LLVM.CausalParameterized.ProcessPrivate as Causal
import qualified Synthesizer.LLVM.Parameter as Param

import qualified Synthesizer.LLVM.Random as Rnd
import qualified Synthesizer.LLVM.Wave as Wave
import qualified Synthesizer.LLVM.Sample as Sample
import qualified Synthesizer.LLVM.Execution as Exec
import qualified LLVM.Extra.ScalarOrVector as SoV
import qualified LLVM.Extra.MaybeContinuation as Maybe
import qualified LLVM.Extra.Representation as Rep
import LLVM.Extra.Control (whileLoop, ifThen, )

import qualified Synthesizer.LLVM.Storable.ChunkIterator as ChunkIt
import qualified Synthesizer.LLVM.Storable.LazySizeIterator as SizeIt
import qualified Data.StorableVector.Lazy.Pattern as SVP
import qualified Data.StorableVector.Lazy as SVL
import qualified Data.StorableVector as SV
import qualified Data.StorableVector.Base as SVB

import qualified Synthesizer.LLVM.EventIterator as EventIt
import qualified Data.EventList.Relative.BodyTime as EventList
import qualified Numeric.NonNegative.Chunky as Chunky
import qualified Numeric.NonNegative.Wrapper as NonNeg

import qualified Synthesizer.LLVM.Frame.Stereo as Stereo

import qualified LLVM.Extra.Arithmetic as A
import qualified LLVM.Extra.ScalarOrVector as SoV
import LLVM.Extra.Arithmetic (advanceArrayElementPtr, )

import LLVM.Core as LLVM
import qualified LLVM.Util.Loop as Loop
import qualified Data.TypeLevel.Num as TypeNum

import Control.Monad (liftM2, liftM3, )
import Control.Arrow ((^<<), )
import Control.Applicative (liftA2, )

import qualified Algebra.Transcendental as Trans
import qualified Algebra.RealField as RealField
import qualified Algebra.Algebraic as Algebraic
import qualified Algebra.Field as Field
import qualified Algebra.Ring as Ring
import qualified Algebra.Additive as Additive

import Data.Word (Word32, )
import Foreign.Storable.Tuple ()
import Foreign.Storable (Storable, poke, )
import Foreign.Marshal.Array (advancePtr, )
import qualified Foreign.Marshal.Array as Array
import qualified Foreign.Marshal.Alloc as Alloc
import Foreign.ForeignPtr
          (unsafeForeignPtrToPtr, touchForeignPtr, withForeignPtr, )
import Foreign.Ptr (FunPtr, nullPtr, )
import Control.Exception (bracket, )
import System.IO.Unsafe (unsafePerformIO, unsafeInterleaveIO, )

import Data.Tuple.HT (swap, )

import NumericPrelude.Numeric
import NumericPrelude.Base hiding (and, iterate, map, zip, zipWith, )


infixl 0 $#

($#) :: (Param.T p a -> b) -> (a -> b)
($#) f a = f (return a)


mapAccum ::
   (Storable pnh, MakeValueTuple pnh pnl, Rep.Memory pnl pnp, IsSized pnp pns,
    Storable psh, MakeValueTuple psh psl, Rep.Memory psl psp, IsSized psp pss,
    Rep.Memory s struct, IsSized struct sa) =>
   (forall r. pnl -> a -> s -> CodeGenFunction r (b,s)) ->
   (forall r. psl -> CodeGenFunction r s) ->
   Param.T p pnh ->
   Param.T p psh ->
   T p a -> T p b
mapAccum f startS selectParamF selectParamS
      (Cons next start createIOContext deleteIOContext) =
   Cons
      (\(parameterF, parameter) (sa0,ss0) -> do
         (a,sa1) <- next parameter sa0
         (b,ss1) <- Maybe.lift $ f (Param.value selectParamF parameterF) a ss0
         return (b, (sa1,ss1)))
      (\(parameterF, parameter) ->
         liftM2 (,) (start parameter) (startS (Param.value selectParamS parameterF)))
      (\p -> do
         (ioContext, (nextParam, startParam)) <- createIOContext p
         return (ioContext, ((Param.get selectParamF p, nextParam),
                             (Param.get selectParamS p, startParam))))
      deleteIOContext


zipWith ::
   (Storable ph, MakeValueTuple ph pl, Rep.Memory pl pp, IsSized pp ps) =>
   (forall r. pl -> a -> b -> CodeGenFunction r c) ->
   Param.T p ph ->
   T p a -> T p b -> T p c
zipWith f selectParamF
      (Cons nextA startA createIOContextA deleteIOContextA)
      (Cons nextB startB createIOContextB deleteIOContextB) =
   Cons
      (\(parameterF, (parameterA, parameterB)) (sa0,sb0) -> do
         (a,sa1) <- nextA parameterA sa0
         (b,sb1) <- nextB parameterB sb0
         c <- Maybe.lift $ f (Param.value selectParamF parameterF) a b
         return (c, (sa1,sb1)))
      (\(parameterA, parameterB) ->
         liftM2 (,)
            (startA parameterA)
            (startB parameterB))
      (\p -> do
         (ca,(nextParamA,startParamA)) <- createIOContextA p
         (cb,(nextParamB,startParamB)) <- createIOContextB p
         return ((ca,cb),
            ((Param.get selectParamF p, (nextParamA,  nextParamB)),
             (startParamA, startParamB))))
      (\(ca,cb) ->
         deleteIOContextA ca >>
         deleteIOContextB cb)

zipWithSimple ::
   (forall r. a -> b -> CodeGenFunction r c) ->
   T p a -> T p b -> T p c
zipWithSimple f =
   zipWith (const f) (return ())

zip :: T p a -> T p b -> T p (a,b)
zip = zipWithSimple (\a b -> return (a,b))


-- * timeline edit

{- |
@tail empty@ generates the empty signal.
-}
tail ::
   T p a -> T p a
tail (Cons next start createIOContext deleteIOContext) = Cons
   next
   (\(nextParameter, startParameter) -> do
      s0 <- start startParameter
      Maybe.resolve (next nextParameter s0)
         (return s0)
         (\(_a,s1) -> return s1))
   (\p -> do
      (ioContext, (nextParam, startParam)) <- createIOContext p
      return (ioContext, (nextParam, (nextParam, startParam))))
   deleteIOContext

drop ::
   Param.T p Int ->
   T p a -> T p a
drop n (Cons next start createIOContext deleteIOContext) =
      let n32 = fmap (fromIntegral :: Int -> Word32) n in Cons
   next
   (\(nextParameter, i0, startParameter) -> do
      s0 <- start startParameter
      (_, _, s3) <-
         whileLoop (valueOf True, Param.value n32 i0, s0)
            (\(cont,i1,_s1) ->
               A.and cont =<<
                  A.icmp IntUGT i1 (value LLVM.zero))
            (\(_cont,i1,s1) -> do
               (cont, s2) <-
                  Maybe.resolve (next nextParameter s1)
                     (return (valueOf False, s1))
                     (\(_a,s) -> return (valueOf True, s))
               i2 <- A.dec i1
               return (cont, i2, s2))
      return s3)
   (\p -> do
      (ioContext, (nextParam, startParam)) <- createIOContext p
      return (ioContext, (nextParam,
         (nextParam, Param.get n32 p, startParam))))
   deleteIOContext

{- |
Appending many signals is inefficient,
since in cascadingly appended signals the parts are counted in an unary way.
Concatenating infinitely many signals is impossible.
If you want to concatenate a lot of signals,
please render them to lazy storable vectors first.
-}
{-
We might save a little space by using a union
for the states of the first and the second signal generator.
-}
append ::
   (Loop.Phi a) =>
   T p a -> T p a -> T p a
append
      (Cons nextA startA createIOContextA deleteIOContextA)
      (Cons nextB startB createIOContextB deleteIOContextB) =
   Cons
      (\(parameterA, parameterB) (firstPart,(sa0,sb0)) ->
            Maybe.fromBool $ do
         (contA, (a,sa1)) <-
            ifThen firstPart (valueOf False, (undefTuple,sa0))
               (Maybe.toBool $ nextA parameterA sa0)
         secondPart <- inv contA
         (contB, (b,sb1)) <-
            ifThen secondPart (valueOf True, (a,sb0))
               (Maybe.toBool $ nextB parameterB sb0)
         return (contB, (b, (contA, (sa1,sb1)))))
      (\(parameterA, parameterB) ->
         fmap ((,) (valueOf True)) $
         liftM2 (,)
            (startA parameterA)
            (startB parameterB))
      (\p -> do
         (ca,(nextParamA,startParamA)) <- createIOContextA p
         (cb,(nextParamB,startParamB)) <- createIOContextB p
         return ((ca,cb),
            ((nextParamA, nextParamB),
             (startParamA, startParamB))))
      (\(ca,cb) ->
         deleteIOContextA ca >>
         deleteIOContextB cb)


-- * signal modifiers

{- |
Stretch signal in time by a certain factor.

This can be used for doing expensive computations
of filter parameters at a lower rate.
Alternatively, we could provide an adaptive @map@
that recomputes output values only if the input value changes,
or if the input value differs from the last processed one by a certain amount.
-}
interpolateConstant ::
   (Rep.Memory a struct, IsSized struct size,
    Ring.C b,
    IsFloating b, CmpRet b Bool,
    Storable b, MakeValueTuple b (Value b),
    IsConst b, IsFirstClass b, IsSized b sb) =>
   Param.T p b -> T p a -> T p a
interpolateConstant k
      (Cons next start createIOContext deleteIOContext) =
   Cons
      (\(kl,parameter) yState0 -> do
         ((y1,state1), ss1) <-
            Maybe.fromBool $
            whileLoop
               (valueOf True, yState0)
               (\(cont1, (_, ss1)) ->
                  and cont1 =<< A.fcmp FPOLE ss1 (value LLVM.zero))
               (\(_,((_,state01), ss1)) ->
                  Maybe.toBool $ liftM2 (,)
                     (next parameter state01)
                     (Maybe.lift $ A.add ss1 (Param.value k kl)))

         ss2 <- Maybe.lift $ A.sub ss1 (valueOf Ring.one)
         return (y1, ((y1,state1),ss2)))

{- using this initialization code we would not need undefined values
      (do sa <- start
          (a,_) <- next sa
          return (sa, a, valueOf 0))
-}
      (fmap (\sa -> ((undefTuple, sa), value LLVM.zero)) . start)
      (\p -> do
         (ioContext, (nextParam, startParam)) <- createIOContext p
         return (ioContext, ((Param.get k p, nextParam), startParam)))
      deleteIOContext



mix ::
   (IsArithmetic a) =>
   T p (Value a) -> T p (Value a) -> T p (Value a)
mix =
   zipWithSimple Sample.mixMono

mixStereo ::
   (IsArithmetic a) =>
   T p (Stereo.T (Value a)) -> T p (Stereo.T (Value a)) -> T p (Stereo.T (Value a))
mixStereo =
   zipWithSimple Sample.mixStereo


envelope ::
   (IsArithmetic a) =>
   T p (Value a) -> T p (Value a) -> T p (Value a)
envelope =
   zipWithSimple Sample.amplifyMono

envelopeStereo ::
   (IsArithmetic a) =>
   T p (Value a) -> T p (Stereo.T (Value a)) -> T p (Stereo.T (Value a))
envelopeStereo =
   zipWithSimple Sample.amplifyStereo

amplify ::
   (IsArithmetic a, Storable a,
    MakeValueTuple a (Value a), IsFirstClass a, IsSized a size) =>
   Param.T p a -> T p (Value a) -> T p (Value a)
amplify =
   map Sample.amplifyMono

amplifyStereo ::
   (IsArithmetic a, Storable a,
    MakeValueTuple a (Value a), IsFirstClass a, IsSized a size) =>
   Param.T p a -> T p (Stereo.T (Value a)) -> T p (Stereo.T (Value a))
amplifyStereo =
   map Sample.amplifyStereo


-- * signal generators

constant ::
   (Storable a, MakeValueTuple a al,
    Rep.Memory al packed, IsSized packed s) =>
   Param.T p a -> T p al
constant x =
   simple
      (\pl () -> return (pl, ()))
      return
      x
      (return ())


exponentialCore ::
   (Storable a, MakeValueTuple a (Value a),
    IsFirstClass a, IsSized a s, IsArithmetic a, IsConst a) =>
   Param.T p a -> Param.T p a -> T p (Value a)
exponentialCore =
   iterate A.mul

exponential2 ::
   (Trans.C a, Storable a, MakeValueTuple a (Value a),
    IsFirstClass a, IsSized a s, IsArithmetic a, IsConst a) =>
   Param.T p a -> Param.T p a -> T p (Value a)
exponential2 halfLife =
   exponentialCore (0.5 ** recip halfLife)


exponentialBoundedCore ::
   (Storable a, MakeValueTuple a (Value a),
    IsFirstClass a, IsSized a s, SoV.Real a, IsConst a) =>
   Param.T p a -> Param.T p a -> Param.T p a ->
   T p (Value a)
exponentialBoundedCore bound decay =
   iterate
      (\(b,k) y -> SoV.max b =<< A.mul k y)
      (liftA2 (,) bound decay)

{- |
Exponential curve that remains at the bound value
if it would fall below otherwise.
This way you can avoid extremal values, e.g. denormalized ones.
The initial value and the bound value must be positive.
-}
exponentialBounded2 ::
   (Trans.C a, Storable a, MakeValueTuple a (Value a),
    IsFirstClass a, IsSized a s, SoV.Real a, IsConst a) =>
   Param.T p a -> Param.T p a -> Param.T p a ->
   T p (Value a)
exponentialBounded2 bound halfLife =
   exponentialBoundedCore bound (0.5 ** recip halfLife)


osciCore ::
   (Storable t, MakeValueTuple t (Value t),
    IsFirstClass t, IsSized t size,
    SoV.Fraction t, IsConst t) =>
   Param.T p t -> Param.T p t -> T p (Value t)
osciCore phase freq =
   iterate SoV.incPhase freq phase

osci ::
   (Storable t, MakeValueTuple t (Value t),
    Storable c, MakeValueTuple c cl,
    IsFirstClass t, IsSized t size,
    Rep.Memory cl cp, IsSized cp cs,
    SoV.Fraction t, IsConst t) =>
   (forall r. cl -> Value t -> CodeGenFunction r y) ->
   Param.T p c ->
   Param.T p t -> Param.T p t -> T p y
osci wave waveParam phase freq =
   map wave waveParam $
   osciCore phase freq

osciSimple ::
   (Storable t, MakeValueTuple t (Value t),
    IsFirstClass t, IsSized t size,
    SoV.Fraction t, IsConst t) =>
   (forall r. Value t -> CodeGenFunction r y) ->
   Param.T p t -> Param.T p t -> T p y
osciSimple wave =
   osci (const wave) (return ())

osciSaw ::
   (Ring.C a0, IsConst a0, SoV.Replicate a0 a,
    Storable a, MakeValueTuple a (Value a),
    IsFirstClass a, IsSized a size,
    SoV.Fraction a, IsPrimitive a, IsConst a) =>
   Param.T p a -> Param.T p a -> T p (Value a)
osciSaw =
   osciSimple Wave.saw



rampCore ::
   (Storable a, MakeValueTuple a (Value a),
    IsFirstClass a, IsSized a s, IsArithmetic a, IsConst a) =>
   Param.T p a -> Param.T p a -> T p (Value a)
rampCore = iterate A.add

parabolaCore ::
   (Storable a, MakeValueTuple a (Value a),
    IsFirstClass a, IsSized a s, IsArithmetic a, IsConst a) =>
   Param.T p a -> Param.T p a -> Param.T p a -> T p (Value a)
parabolaCore d2 d1 start =
   Causal.apply (Causal.integrate start) $
   rampCore d2 d1



rampInf, rampSlope,
 parabolaFadeInInf, parabolaFadeOutInf ::
   (Field.C a, Storable a, MakeValueTuple a (Value a),
    IsFirstClass a, IsSized a s, IsArithmetic a, IsConst a) =>
   Param.T p a -> T p (Value a)
rampSlope slope  =  rampCore slope Additive.zero
rampInf dur  =  rampSlope (recip dur)

{-
t*(2-t) = 1 - (t-1)^2

(t+d)*(2-t-d) - t*(2-t)
   = d*(2-t) - d*t - d^2
   = 2*d*(1-t) - d^2
   = d*(2*(1-t) - d)

2*d*(1-t-d) + d^2  -  (2*d*(1-t) + d^2)
   = -2*d^2
-}
parabolaFadeInInf dur =
   parabolaCore
      (fmap (\d -> -2*d*d)  $ recip dur)
      (fmap (\d -> d*(2-d)) $ recip dur)
      Additive.zero

{-
1-t^2
-}
parabolaFadeOutInf dur =
   parabolaCore
      (fmap (\d -> -2*d*d) $ recip dur)
      (fmap (\d ->   -d*d) $ recip dur)
      one

ramp,
 parabolaFadeIn, parabolaFadeOut,
 parabolaFadeInMap, parabolaFadeOutMap ::
   (RealField.C a, Storable a, MakeValueTuple a (Value a),
    IsFirstClass a, IsSized a s, IsArithmetic a, IsConst a) =>
   Param.T p a -> T p (Value a)

ramp dur =
   Causal.apply (Causal.take (fmap round dur)) $
   rampInf dur

parabolaFadeIn dur =
   Causal.apply (Causal.take (fmap round dur)) $
   parabolaFadeInInf dur

parabolaFadeOut dur =
   Causal.apply (Causal.take (fmap round dur)) $
   parabolaFadeOutInf dur

parabolaFadeInMap dur =
   -- t*(2-t)
   Causal.apply (Causal.mapSimple (\t -> A.mul t =<< A.sub (valueOf 2) t)) $
   ramp dur

parabolaFadeOutMap dur =
   -- 1-t^2
   Causal.apply (Causal.mapSimple (\t -> A.sub (valueOf 1) =<< A.mul t t)) $
   ramp dur


{- |
@noise seed rate@

The @rate@ parameter is for adjusting the amplitude
such that it is uniform across different sample rates
and after frequency filters.
The @rate@ is the ratio of the current sample rate to the default sample rate,
where the variance of the samples would be one.
If you want that at sample rate 22050 the variance is 1,
then in order to get a consistent volume at sample rate 44100
you have to set @rate = 2@.

I use the variance as quantity and not the amplitude,
because the amplitude makes only sense for uniformly distributed samples.
However, frequency filters transform the probabilistic density of the samples
towards the normal distribution according to the central limit theorem.
-}
noise ::
   (Algebraic.C a, IsFloating a, IsConst a,
    NumberOfElements TypeNum.D1 a,
    IsSized a ps, MakeValueTuple a (Value a), Storable a) =>
   Param.T p Word32 ->
   Param.T p a ->
   T p (Value a)
noise seed rate =
   let m2 = fromInteger $ div Rnd.modulus 2
   in  map (\r y ->
          A.mul r
           =<< flip A.sub (valueOf $ m2+1)
          {-
          In principle it must be uitofp,
          but sitofp is a single instruction on x86
          and our numbers are below 2^31.
          -}
           =<< sitofp y)
          (sqrt (3 * rate) / return m2) $
       noiseCore seed

noiseCore, noiseCoreAlt ::
   Param.T p Word32 ->
   T p (Value Word32)
noiseCore seed =
   iterate (const Rnd.nextCG)
      (return ()) ((+1) . flip mod (Rnd.modulus-1) ^<< seed)

noiseCoreAlt seed =
   iterate (const Rnd.nextCG32)
      (return ()) ((+1) . flip mod (Rnd.modulus-1) ^<< seed)


-- * conversion from and to storable vectors

fromStorableVector ::
   (Storable a, MakeValueTuple a value, Rep.Memory value struct) =>
   Param.T p (SV.Vector a) ->
   T p value
fromStorableVector selectVec =
   Cons
      (\() (p0,l0) -> do
         cont <- Maybe.lift $ A.icmp IntUGT l0 (valueOf 0)
         Maybe.withBool cont $ do
            y1 <- Rep.load p0
            p1 <- advanceArrayElementPtr p0
            l1 <- A.dec l0
            return (y1,(p1,l1)))
      return
      (\p ->
         let (fp,s,l) = SVB.toForeignPtr $ Param.get selectVec p
         in  return (fp,
                ((),
                 (Rep.castStorablePtr $ unsafeForeignPtrToPtr fp `advancePtr` s,
                  fromIntegral l :: Word32))))
      -- keep the foreign ptr alive
      touchForeignPtr

{-
This function calls back into the Haskell function 'ChunkIt.next'
that returns a pointer to the data of the next chunk
and advances to the next chunk in the sequence.
-}
fromStorableVectorLazy ::
   (Storable a, MakeValueTuple a value, Rep.Memory value struct) =>
   Param.T p (SVL.Vector a) ->
   T p value
fromStorableVectorLazy sig =
   Cons
      (\(stable, lenPtr) (buffer0,length0) -> do
         (buffer1,length1) <- Maybe.lift $ do
            nextChunkFn <- staticFunction ChunkIt.nextCallBack
            needNext <- A.icmp IntEQ length0 (valueOf 0)
            ifThen needNext (buffer0,length0)
               (liftM2 (,)
                   (call nextChunkFn stable lenPtr)
                   (load lenPtr))
         valid <- Maybe.lift $ A.icmp IntNE buffer1 (valueOf nullPtr)
         Maybe.withBool valid $ do
            x <- Rep.load buffer1
            buffer2 <- advanceArrayElementPtr buffer1
            length2 <- A.dec length1
            return (x, (buffer2,length2)))
      (\() -> return (valueOf nullPtr, valueOf 0))
      (\p -> do
          s <- liftM2 (,) (ChunkIt.new (Param.get sig p)) Alloc.malloc
          return (s, (s,())))
      (\(stable,lenPtr) -> do
          ChunkIt.dispose stable
          Alloc.free lenPtr)


piecewiseConstant ::
   (Storable a, MakeValueTuple a value, Rep.Memory value struct, IsSized struct size) =>
   Param.T p (EventList.T NonNeg.Int a) ->
   T p value
piecewiseConstant evs =
   Cons
      (\(stable, yPtr) (y0,length0) -> do
         (y1,length1) <- Maybe.lift $ do
            nextFn <- staticFunction EventIt.nextCallBack
            needNext <- A.icmp IntEQ length0 (valueOf 0)
            ifThen needNext (y0,length0)
               (fmap swap $
                liftM2 (,)
                   (call nextFn stable yPtr)
                   (Rep.load yPtr))
         Maybe.guard =<<
            Maybe.lift (A.icmp IntNE length1 (valueOf 0))
         length2 <- Maybe.lift (A.dec length1)
         return (y1, (y1,length2)))
      (\() -> return (undefTuple, valueOf 0))
      (\p -> do
         stable <- EventIt.new (Param.get evs p)
         yPtr <- Alloc.malloc
         return ((stable, asTypeOfEventListElement yPtr evs),
                 ((stable, Rep.castStorablePtr yPtr), ())))
      (\(stable,yPtr) -> do
         EventIt.dispose stable
         Alloc.free yPtr)

asTypeOfEventListElement ::
   Ptr a ->
   Param.T p (EventList.T NonNeg.Int a) ->
   Ptr a
asTypeOfEventListElement ptr _ = ptr



{- |
Turns a lazy chunky size into a signal generator with unit element type.
The signal length is the only information that the generator provides.
Using 'zipWith' you can use this signal as a lazy 'take'.
-}
lazySize ::
   Param.T p SVP.LazySize ->
   T p ()
lazySize size =
   Cons
      (\stable length0 -> do
         length1 <- Maybe.lift $ do
            nextFn <- staticFunction SizeIt.nextCallBack
            needNext <- A.icmp IntEQ length0 (valueOf 0)
            ifThen needNext length0
               (call nextFn stable)
         Maybe.guard =<<
            Maybe.lift (A.icmp IntNE length1 (valueOf 0))
         length2 <- Maybe.lift (A.dec length1)
         return ((), length2))
      (\() -> return (valueOf 0))
      (\p -> do
         stable <- SizeIt.new (Param.get size p)
         return (stable, (stable, ())))
      (\stable ->
         SizeIt.dispose stable)


foreign import ccall safe "dynamic" derefFillPtr ::
   Exec.Importer (Ptr param -> Word32 -> Ptr a -> IO Word32)

run ::
   (Storable a, MakeValueTuple a value, Rep.Memory value struct) =>
   T p value ->
   IO (Int -> p -> SV.Vector a)
run (Cons next start createIOContext deleteIOContext) =
   do -- this compiles once and is much faster than simpleFunction
      fill <-
         fmap derefFillPtr .
         Exec.compileModule .
         createFunction ExternalLinkage $
         \paramPtr size bPtr -> do
            (nextParam,startParam) <- Rep.load paramPtr
            s <- start startParam
            (pos,_) <- Maybe.arrayLoop size bPtr s $ \ ptri s0 -> do
               (y,s1) <- next nextParam s0
               Maybe.lift $ Rep.store y ptri
               return s1
            ret (pos :: Value Word32)

      return $ \len p ->
         unsafePerformIO $
         bracket (createIOContext p) (deleteIOContext . fst) $
         \ (_,params) ->
            SVB.createAndTrim len $ \ ptr ->
            Alloc.alloca $ \paramPtr ->
               poke paramPtr params >>
               (fmap fromIntegral $
                  fill (Rep.castStorablePtr paramPtr)
                     (fromIntegral len) (Rep.castStorablePtr ptr))

{- |
This is not really a function, see 'renderChunky'.
-}
render ::
   (Storable a, MakeValueTuple a value, Rep.Memory value struct) =>
   T p value -> Int -> p -> SV.Vector a
render gen = unsafePerformIO $ run gen


foreign import ccall safe "dynamic" derefChunkPtr ::
   Exec.Importer (Ptr nextParamStruct -> Ptr stateStruct -> Word32 -> Ptr struct -> IO Word32)


compileChunky ::
   (Rep.Memory value struct,
    Rep.Memory state stateStruct,
    IsSized stateStruct stateSize,
    Rep.Memory startParamValue startParamStruct,
    Rep.Memory nextParamValue  nextParamStruct,
    IsSized    startParamStruct startParamSize,
    IsSized    nextParamStruct  nextParamSize) =>
   (forall r.
    nextParamValue ->
    state -> Maybe.T r (Value Bool, state) (value, state)) ->
   (forall r.
    startParamValue ->
    CodeGenFunction r state) ->
   IO (FunPtr (Ptr startParamStruct -> IO (Ptr stateStruct)),
       FunPtr (Ptr stateStruct -> IO ()),
       FunPtr (Ptr nextParamStruct -> Ptr stateStruct -> Word32 -> Ptr struct -> IO Word32))
compileChunky next start =
   Exec.compileModule $
      liftM3 (,,)
         (createFunction ExternalLinkage $
          \paramPtr -> do
             -- danger: size computation in LLVM currently does not work for structs!
             pptr <- Rep.malloc
             flip Rep.store pptr =<< start =<< Rep.load paramPtr
             ret pptr)
         (createFunction ExternalLinkage $
          \ pptr -> Rep.free pptr >> ret ())
         (createFunction ExternalLinkage $
          \ paramPtr sptr loopLen ptr -> do
             param <- Rep.load paramPtr
             sInit <- Rep.load sptr
             (pos,sExit) <- Maybe.arrayLoop loopLen ptr sInit $ \ ptri s0 -> do
                (y,s1) <- next param s0
                Maybe.lift $ Rep.store y ptri
                return s1
             Rep.store sExit sptr
             ret (pos :: Value Word32))


{- |
Renders a signal generator to a chunky storable vector with given pattern.
If the pattern is shorter than the generated signal
this means that the signal is shortened.
-}
runChunkyPattern ::
   (Storable a, MakeValueTuple a value, Rep.Memory value struct) =>
   T p value ->
   IO (SVP.LazySize -> p -> SVL.Vector a)
runChunkyPattern (Cons next start createIOContext deleteIOContext) = do
   (startFunc, stopFunc, fill) <- compileChunky next start
   return $
      \ lazysize p -> SVL.fromChunks $ unsafePerformIO $ do
         (ioContext, (nextParam, startParam)) <- createIOContext p

         statePtr <- Rep.newForeignPtrParam stopFunc startFunc startParam
         nextParamPtr <- Rep.newForeignPtr (deleteIOContext ioContext) nextParam

         let go cs =
                unsafeInterleaveIO $
                case cs of
                   [] -> return []
                   SVL.ChunkSize size : rest -> do
                      v <-
                         withForeignPtr statePtr $ \sptr ->
                         Rep.withForeignPtr nextParamPtr $ \nptr ->
                         SVB.createAndTrim size $
                         fmap fromIntegral .
                         derefChunkPtr fill nptr sptr (fromIntegral size) .
                         Rep.castStorablePtr
                      (if SV.length v > 0
                         then fmap (v:)
                         else id) $
                         (if SV.length v < size
                            then return []
                            else go rest)
         go (Chunky.toChunks lazysize)

runChunky ::
   (Storable a, MakeValueTuple a value, Rep.Memory value struct) =>
   T p value ->
   IO (SVL.ChunkSize -> p -> SVL.Vector a)
runChunky sig =
   flip fmap (runChunkyPattern sig) $ \f size p ->
      f (Chunky.fromChunks (repeat size)) p

{- |
This looks like a function,
but it is not a function since it depends on LLVM being initialized
with LLVM.initializeNativeTarget before.
It is also problematic since you cannot control when and how often
the underlying LLVM code is compiled.
The compilation cannot be observed, thus it is referential transparent.
But this influences performance considerably
and I assume that you use this package exclusively for performance reasons.
-}
renderChunky ::
   (Storable a, MakeValueTuple a value, Rep.Memory value struct) =>
   SVL.ChunkSize -> T p value ->
   p -> SVL.Vector a
renderChunky size gen =
   unsafePerformIO (runChunky gen) size