{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE ForeignFunctionInterface #-}
module Synthesizer.LLVM.CausalParameterized.Process (
   T(Cons), simple,
   mapAccum, map, mapSimple,
   apply, compose, first,
   feedFst, feedSnd,
   take, integrate,
   module Synthesizer.LLVM.CausalParameterized.Process
   ) where

import Synthesizer.LLVM.CausalParameterized.ProcessPrivate
import qualified Synthesizer.LLVM.Parameter as Param

import Synthesizer.LLVM.Parameterized.Signal (($#), )
import qualified Synthesizer.LLVM.Parameterized.Signal as Sig
import qualified Synthesizer.LLVM.Frame.Stereo as Stereo
import qualified Synthesizer.LLVM.Sample as Sample
import qualified Synthesizer.LLVM.Execution as Exec
import qualified Synthesizer.LLVM.Simple.Value as Value

import qualified Data.StorableVector.Lazy as SVL
import qualified Data.StorableVector as SV
import qualified Data.StorableVector.Base as SVB

import qualified Synthesizer.Plain.Modifier as Modifier

import qualified LLVM.Extra.ScalarOrVector as SoV
import qualified LLVM.Extra.Vector as Vector
import qualified LLVM.Extra.MaybeContinuation as Maybe
import qualified LLVM.Extra.Representation as Rep
import qualified LLVM.Extra.Control as C
import qualified LLVM.Extra.Class as Class
import qualified LLVM.Extra.Arithmetic as A

import LLVM.Core as LLVM
import Data.TypeLevel.Num (D2, )
import qualified Data.TypeLevel.Num as TypeNum
import qualified Data.TypeLevel.Num.Sets as Sets

import qualified Control.Monad.HT as M
import qualified Control.Arrow    as Arr
import qualified Control.Category as Cat
import Control.Monad.Trans.State (runState, state, evalState, )
import Control.Arrow ((<<<), (>>>), (&&&), )
import Control.Monad (liftM2, liftM3, )
import Control.Applicative (liftA2, )

import System.Random (Random, RandomGen, randomR, )

import qualified Algebra.Transcendental as Trans
import qualified Algebra.Field    as Field
import qualified Algebra.Ring     as Ring
import qualified Algebra.Additive as Additive

import Data.Function.HT (nest, )
import Data.Word (Word32, )
import Foreign.Storable.Tuple ()
import Foreign.Storable (Storable, poke, )
import qualified Foreign.Marshal.Array as Array
import qualified Foreign.Marshal.Alloc as Alloc
import Foreign.ForeignPtr (withForeignPtr, )
import Foreign.Ptr (FunPtr, )
import Control.Exception (bracket, )
import System.IO.Unsafe (unsafePerformIO, unsafeInterleaveIO, )

import qualified Data.List as List

import NumericPrelude.Numeric
import NumericPrelude.Base hiding (and, iterate, map, zip, zipWith, take, )


infixl 0 $<, $>, $*, $<#, $>#, $*#
-- infixr 0 $:*   -- can be used together with $

applyFst, ($<) :: T p (a,b) c -> Sig.T p a -> T p b c
applyFst proc sig =
   proc <<< feedFst sig

applySnd, ($>) :: T p (a,b) c -> Sig.T p b -> T p a c
applySnd proc sig =
   proc <<< feedSnd sig

{-
These infix operators may become methods of a type class
that can also have synthesizer-core:Causal.Process as instance.
-}
($*) :: T p a b -> Sig.T p a -> Sig.T p b
($*) = apply
($<) = applyFst
($>) = applySnd

{- |
provide constant input in a comfortable way
-}
($*#) ::
   (Storable ah, MakeValueTuple ah a,
    Rep.Memory a am, IsSized am as) =>
   T p a b -> ah -> Sig.T p b
proc $*# x = proc $* (Sig.constant $# x)

($<#) ::
   (Storable ah, MakeValueTuple ah a,
    Rep.Memory a am, IsSized am as) =>
   T p (a,b) c -> ah -> T p b c
proc $<# x = proc $< (Sig.constant $# x)

($>#) ::
   (Storable bh, MakeValueTuple bh b,
    Rep.Memory b bm, IsSized bm bs) =>
   T p (a,b) c -> bh -> T p a c
proc $># x = proc $> (Sig.constant $# x)


mapAccumSimple ::
   (Rep.Memory s struct, IsSized struct sa) =>
   (forall r. a -> s -> CodeGenFunction r (b,s)) ->
   (forall r. CodeGenFunction r s) ->
   T p a b
mapAccumSimple f s =
   mapAccum (\() -> f) (\() -> s) (return ()) (return ())

{- |
Not quite the loop of ArrowLoop
because we need a delay of one time step
and thus an initialization value.

For a real ArrowLoop.loop, that is a zero-delay loop,
we would formally need a MonadFix instance of CodeGenFunction.
But this will not become reality, since LLVM is not able to re-order code
in a way that allows to access a result before creating the input.
-}
loop ::
   (Storable ch,
    MakeValueTuple ch c,
    Rep.Memory c cp,
    IsSized cp cs) =>
   Param.T p ch -> T p (a,c) (b,c) -> T p a b
loop initial (Cons next start createIOContext deleteIOContext) =
   Cons
      (\p a0 (c0,s0) -> do
         ((b1,c1), s1) <- next p (a0,c0) s0
         return (b1,(c1,s1)))
      (\(i,p) -> fmap ((,) (Param.value initial i)) $ start p)
      (\p -> do
         (ctx,(nextParam,startParam)) <- createIOContext p
         return (ctx,
            (nextParam, (Param.get initial p, startParam))))
      deleteIOContext


-- cf. synthesizer-core:Causal.Process, can be defined for any arrow
{-# INLINE replicateControlled #-}
replicateControlled :: Int -> T p (c,x) x -> T p (c,x) x
replicateControlled n p =
   nest n
      (Arr.arr fst &&& p  >>> )
      (Arr.arr snd)

-- cf. synthesizer-core:Causal.Process
{-# INLINE feedbackControlled #-}
feedbackControlled ::
   (Storable ch,
    MakeValueTuple ch c,
    Rep.Memory c cp,
    IsSized cp cs) =>
   Param.T p ch ->
   T p ((ctrl,a),c) b -> T p (ctrl,b) c -> T p (ctrl,a) b
feedbackControlled initial forth back =
   loop initial
      (Arr.arr (fst.fst) &&& forth  >>>  Arr.arr snd &&& back)


fromModifier ::
   (Value.Flatten ah al, Value.Flatten bh bl, Value.Flatten ch cl,
    Value.Flatten sh sl, Rep.Memory sl sp, IsSized sp ss) =>
   Modifier.Simple sh ch ah bh -> T p (cl,al) bl
fromModifier (Modifier.Simple initial step) =
   mapAccumSimple
      (\(c,a) s ->
         Value.flatten $
         runState
            (step (Value.unfold c) (Value.unfold a))
            (Value.unfold s))
      (Value.flatten initial)


{- |
Run a causal process independently on each stereo channel.
-}
stereoFromMono ::
   T p a b -> T p (Stereo.T a) (Stereo.T b)
stereoFromMono =
   Stereo.arrowFromMono

stereoFromMonoControlled ::
   T p (c,a) b -> T p (c, Stereo.T a) (Stereo.T b)
stereoFromMonoControlled =
   Stereo.arrowFromMonoControlled

stereoFromChannels ::
   T p a b -> T p a b -> T p (Stereo.T a) (Stereo.T b)
stereoFromChannels =
   Stereo.arrowFromChannels

{-
In order to let this work we have to give the disable-mmx option somewhere,
but where?
-}
stereoFromVector ::
   (IsPrimitive a, IsPrimitive b) =>
   T p (Value (Vector D2 a)) (Value (Vector D2 b)) ->
   T p (Stereo.T (Value a)) (Stereo.T (Value b))
stereoFromVector proc =
   mapSimple Sample.stereoFromVector <<<
   proc <<<
   mapSimple Sample.vectorFromStereo


vectorize ::
   (Vector.Access n a va, Vector.Access n b vb) =>
   T p a b -> T p va vb
vectorize = vectorizeSize undefined

{-
insert and extract instructions will be in opposite order,
no matter whether we use foldr or foldl
and independent from the order of proc and channel in replaceChannel.
However, LLVM neglects the order anyway.
-}
vectorizeSize ::
   (Vector.Access n a va, Vector.Access n b vb) =>
   n -> T p a b -> T p va vb
vectorizeSize n proc =
   foldl
      (\acc i -> replaceChannel i proc acc)
      (Arr.arr (const $ LLVM.undefTuple)) $
   List.take (TypeNum.toInt n) [0 ..]

{- |
Given a vector process, replace the i-th output by output
that is generated by a scalar process from the i-th input.
-}
replaceChannel ::
   (Vector.Access n a va, Vector.Access n b vb) =>
   Int -> T p a b -> T p va vb -> T p va vb
replaceChannel i channel proc =
   let li = valueOf $ fromIntegral i
   in  mapSimple (uncurry (Vector.insert li)) <<<
          (channel <<< mapSimple (Vector.extract li)) &&&
          proc


zipWithSimple ::
   (forall r. a -> b -> CodeGenFunction r c) ->
   T p (a,b) c
zipWithSimple f =
   mapSimple (uncurry f)

mix ::
   (IsArithmetic a) =>
   T p (Value a, Value a) (Value a)
mix =
   zipWithSimple Sample.mixMono

mixStereo ::
   (IsArithmetic a) =>
   T p (Stereo.T (Value a), Stereo.T (Value a)) (Stereo.T (Value a))
mixStereo =
   zipWithSimple Sample.mixStereo


raise ::
   (IsArithmetic a, Storable a,
    MakeValueTuple a (Value a), IsSized a size) =>
   Param.T p a -> T p (Value a) (Value a)
raise =
   map Sample.mixMono


envelope ::
   (IsArithmetic a) =>
   T p (Value a, Value a) (Value a)
envelope =
   zipWithSimple Sample.amplifyMono

envelopeStereo ::
   (IsArithmetic a) =>
   T p (Value a, Stereo.T (Value a)) (Stereo.T (Value a))
envelopeStereo =
   zipWithSimple Sample.amplifyStereo

amplify ::
   (IsArithmetic a, Storable a,
    MakeValueTuple a (Value a), IsFirstClass a, IsSized a size) =>
   Param.T p a -> T p (Value a) (Value a)
amplify =
   map Sample.amplifyMono

amplifyStereo ::
   (IsArithmetic a, Storable a,
    MakeValueTuple a (Value a), IsFirstClass a, IsSized a size) =>
   Param.T p a -> T p (Stereo.T (Value a)) (Stereo.T (Value a))
amplifyStereo =
   map Sample.amplifyStereo



mapLinear ::
   (IsArithmetic a, Storable a,
    MakeValueTuple a (Value a), IsFirstClass a, IsSized a size) =>
   Param.T p a -> Param.T p a -> T p (Value a) (Value a)
mapLinear depth center =
   map
      (\(d,c) x -> A.add c =<< A.mul d x)
      (depth&&&center)

mapExponential ::
   (Trans.C a, IsFloating a, IsConst a, Storable a,
    MakeValueTuple a (Value a), IsFirstClass a, IsSized a size) =>
   Param.T p a -> Param.T p a -> T p (Value a) (Value a)
mapExponential depth center =
   map
      (\(d,c) x ->
         A.mul c =<< A.exp =<< A.mul d x)
      (log depth &&& center)


{- |
@quantizeLift k f@ applies the process @f@ to every @k@th sample
and repeats the result @k@ times.

Like 'SigP.interpolateConstant' this function can be used
for computation of filter parameters at a lower rate.
This can be useful, if you have a frequency control signal at sample rate
that shall be used both for an oscillator and a frequency filter.
-}
quantizeLift ::
   (Rep.Memory b struct, IsSized struct size,
    Ring.C c,
    IsFloating c, CmpRet c Bool,
    Storable c, MakeValueTuple c (Value c),
    IsConst c, IsFirstClass c, IsSized c sc) =>
   Param.T p c ->
   T p a b ->
   T p a b
quantizeLift k
      (Cons next start createIOContext deleteIOContext) = Cons
   (\(kl,parameter) a0 bState0 -> do
      ((b1,state1), ss1) <-
         Maybe.fromBool $
         C.whileLoop
            (valueOf True, bState0)
            (\(cont1, (_, ss1)) ->
               and cont1 =<< A.fcmp FPOLE ss1 (value LLVM.zero))
            (\(_,((_,state01), ss1)) ->
               Maybe.toBool $ liftM2 (,)
                  (next parameter a0 state01)
                  (Maybe.lift $ A.add ss1 (Param.value k kl)))

      ss2 <- Maybe.lift $ A.sub ss1 (valueOf Ring.one)
      return (b1, ((b1,state1),ss2)))
   (fmap (\sa -> ((undefTuple, sa), value LLVM.zero)) . start)
   (\p -> do
      (ioContext, (nextParam, startParam)) <- createIOContext p
      return (ioContext, ((Param.get k p, nextParam), startParam)))
   deleteIOContext


{- |
Compute the phases from phase distortions and frequencies.

It's like integrate but with wrap-around performed by @fraction@.
For FM synthesis we need also negative phase distortions,
thus we use 'SoV.addToPhase' which supports that.
-}
osciCore ::
   (IsFirstClass t, IsSized t size,
    SoV.Fraction t, IsConst t,
    Additive.C t) =>
   T p (Value t, Value t) (Value t)
osciCore =
   mapSimple (uncurry SoV.addToPhase) <<<
   Arr.second
      (mapAccumSimple
         (\a s -> do
            b <- SoV.incPhase a s
            return (s,b))
         (return (valueOf Additive.zero)))

osciSimple ::
   (IsFirstClass t, IsSized t size,
    SoV.Fraction t, IsConst t,
    Additive.C t) =>
   (forall r. Value t -> CodeGenFunction r y) ->
   T p (Value t, Value t) y
osciSimple wave =
   mapSimple wave <<< osciCore

shapeModOsci ::
   (IsFirstClass t, IsSized t size,
    SoV.Fraction t, IsConst t,
    Additive.C t) =>
   (forall r. c -> Value t -> CodeGenFunction r y) ->
   T p (c, (Value t, Value t)) y
shapeModOsci wave =
   mapSimple (uncurry wave) <<< Arr.second osciCore



{- |
Delay time must be non-negative.

The initial value is needed in order to determine the ring buffer element type.
-}
delay ::
   (Storable a,
    MakeValueTuple a al,
    Rep.Memory al ap,
    IsSized ap as) =>
   Param.T p a -> Param.T p Int -> T p al al
delay initial time =
   let time32 = fmap (fromIntegral :: Int -> Word32) time in
   Cons
      (\(size,ptr) a0 (remain0,ptri0) -> Maybe.lift $ do
         Rep.store a0 ptri0
         cont <- A.icmp IntNE remain0 (valueOf 0)
         (remain1,ptri1) <-
            C.ifThenSelect cont (Param.value time32 size, ptr)
               (liftM2 (,)
                  (A.dec remain0)
                  (A.advanceArrayElementPtr ptri0))
         a1 <- Rep.load ptri1
         return (a1, (remain1,ptri1)))
      (\(x, (size,ptr)) -> do
         size1 <- A.inc (Param.value time32 size)
         -- cf. LLVM.Storable.Signal.fill
         C.arrayLoop size1 ptr () $ \ ptri () ->
            Rep.store (Param.value initial x) ptri >> return ()
         return (size,ptr))
      (\p -> do
         let size = Param.get time p
             x = Param.get initial p
         {-
         We allocate one element more than necessary
         in order to simplify handling of delay time zero
         -}
         ptr <- Array.mallocArray (size+1)
         let param =
               (fromIntegral size :: Word32,
                Rep.castStorablePtr (ptrAsTypeOf ptr x))
         return (ptr, (param, (x, param))))
      Alloc.free

ptrAsTypeOf :: Ptr a -> a -> Ptr a
ptrAsTypeOf p _ = p

{- |
Delay by one sample.
For very small delay times (say up to 8)
it may be more efficient to apply 'delay1' several times
or to use a pipeline,
e.g. @pipeline (id :: T (Vector D4 Float) (Vector D4 Float))@
delays by 4 samples in an efficient way.
In principle it would be also possible to use
@unpack (delay1 (const $ toVector (0,0,0,0)))@
but 'unpack' causes an additional delay.
Thus @unpack (id :: T (Vector D4 Float) (Vector D4 Float))@ may do,
what you want.
-}
delay1 ::
   (Storable a,
    MakeValueTuple a al,
    Rep.Memory al ap,
    IsSized ap as) =>
   Param.T p a -> T p al al
delay1 initial = simple
   (\() a s -> return (s,a))
   return
   (return ())
   initial


{- |
Delay time must be greater than zero!
-}
comb ::
   (Ring.C a,
    Storable a,
    IsArithmetic a,
    MakeValueTuple a (Value a),
    IsFirstClass a,
    IsSized a as) =>
   Param.T p a -> Param.T p Int ->
   T p (Value a) (Value a)
comb gain time =
   let z = Additive.zero `asTypeOf` gain
   in  loop z (mix >>> (Cat.id &&&
          (delay z (subtract 1 time) >>> amplify gain)))

combStereo ::
   (Ring.C a,
    Storable a,
    IsArithmetic a,
    MakeValueTuple a (Value a),
    IsFirstClass a,
    IsSized a as) =>
   Param.T p a -> Param.T p Int ->
   T p (Stereo.T (Value a)) (Stereo.T (Value a))
combStereo gain time =
   let z = Additive.zero `asTypeOf` (liftA2 Stereo.cons gain gain)
   in  loop z (mixStereo >>> (Cat.id &&&
          (delay z (subtract 1 time) >>> amplifyStereo gain)))

reverb ::
   (Field.C a, Random a,
    Storable a, IsArithmetic a,
    MakeValueTuple a (Value a), IsFirstClass a, IsSized a as,
    RandomGen g) =>
   g -> Int -> (a,a) -> (Int,Int) ->
   T p (Value a) (Value a)
reverb rnd num gainRange timeRange =
   amplify (return (recip (fromIntegral num))) <<<
   (foldl (\proc chan -> mix <<< (proc &&& chan)) Cat.id $
    List.take num $
    List.map (\(g,t) -> comb $# g $# t) $
    flip evalState rnd $
    M.repeat $
    liftM2 (,)
       (state (randomR gainRange))
       (state (randomR timeRange)))


{- |
This allows to compute a chain of equal processes efficiently,
if all of these processes can be bundled in one vectorial process.
Applications are an allpass cascade or an FM operator cascade.

The function expects that the vectorial input process
works like parallel scalar processes.
The different pipeline stages may be controlled by different parameters,
but the structure of all pipeline stages must be equal.
Our function feeds the input of the pipelined process
to the zeroth element of the Vector.
The result of processing the i-th element (the i-th channel, so to speak)
is fed to the (i+1)-th element.
The (n-1)-th element of the vectorial process is emitted as output of pipelined process.

The pipeline necessarily introduces a delay of (n-1) values.
For simplification we extend this to n values delay.
If you need to combine the resulting signal from the pipeline
with another signal in a 'zip'-like way,
you may delay that signal with @pipeline id@.
The first input values in later stages of the pipeline
are initialized with zero.
If this is not appropriate for your application,
then we may add a more sensible initialization.
-}
pipeline ::
   (Vector.Access n a v, Class.Zero v,
    Rep.Memory v vp, IsSized vp s) =>
   T p v v -> T p a a
pipeline (Cons next start createIOContext deleteIOContext) = Cons
   (\param a0 (v0,s0) -> do
      (a1,v1) <- Maybe.lift $ Vector.shiftUp a0 v0
      (v2,s2) <- next param v1 s0
      return (a1, (v2,s2)))
   (\p -> do
      s <- start p
      return (Class.zeroTuple, s))
   createIOContext
   deleteIOContext


linearInterpolation ::
   (Ring.C a, IsArithmetic a, IsConst a) =>
   Value a -> (Value a, Value a) -> CodeGenFunction r (Value a)
linearInterpolation r (a,b) = do
   ra <- A.mul a =<< A.sub (valueOf one) r
   rb <- A.mul b r
   A.add ra rb


{- |
> frequencyModulationLinear signal

is a causal process mapping from a shrinking factor
to the modulated input @signal@.
Similar to 'Sig.interpolateConstant'
but the factor is reciprocal and controllable
and we use linear interpolation.
The shrinking factor must be non-negative.
-}
frequencyModulationLinear ::
   (-- Rep.Memory a struct, IsSized struct size,
    Ring.C a,
    IsFloating a, CmpRet a Bool,
    Storable a, MakeValueTuple a (Value a),
    IsConst a, IsFirstClass a, IsSized a sa) =>
   Sig.T p (Value a) -> T p (Value a) (Value a)
frequencyModulationLinear
      (Sig.Cons next start createIOContext deleteIOContext) =
   Cons
      (\parameter k yState0 -> do
         (((y02,y12),state2), ss2) <-
            Maybe.fromBool $
            C.whileLoop
               (valueOf True, yState0)
               (\(cont0, (_, ss0)) ->
                  and cont0 =<< A.fcmp FPOGE ss0 (valueOf Ring.one))
               (\(_,(((_,y01),state0), ss0)) ->
                  Maybe.toBool $ liftM2 (,)
                     (do (y11,state1) <- next parameter state0
                         return ((y01,y11),state1))
                     (Maybe.lift $ A.sub ss0 (valueOf Ring.one)))

         Maybe.lift $ do
            y <- linearInterpolation ss2 (y02,y12)
            ss3 <- A.add ss2 k
            return (y, (((y02,y12),state2),ss3)))
      (\p -> do
         sa <- start p
         return (((value undef, value undef), sa), valueOf 2))
      createIOContext
      deleteIOContext


{- |
@trigger fill signal@ send @signal@ to the output
and restart it whenever the Boolean process input is 'True'.
Before the first occurrence of 'True'
and between instances of the signal the output is filled with the @fill@ value.

Attention:
This function will crash if the input generator
uses fromStorableVectorLazy, piecewiseConstant or lazySize,
since these functions contain mutable references and in-place updates,
and thus they cannot read lazy Haskell data multiple times.
-}
trigger ::
   (Storable a, MakeValueTuple a al, C.Select al,
    Rep.Memory al as, IsSized as asize) =>
   Param.T p a ->
   Sig.T p al ->
   T p (Value Bool) al
trigger fill (Sig.Cons next start createIOContext deleteIOContext) = Cons
   (\(nextParam, startParam, f) b0 (active0, s0) -> Maybe.lift $ do
      (active1,s1) <-
         C.ifThen b0 (active0,s0)
            (fmap ((,) (valueOf False)) $ start startParam)
      (active2,(a2,s2)) <-
         Maybe.toBool $ Maybe.guard active1 >> next nextParam s1
      a3 <- C.select active2 a2 (Param.value fill f)
      return (a3,(active2,s2)))
   (\() -> return (valueOf False, undefTuple))
   (\p -> do
      (context, (nextParam, startParam)) <- createIOContext p
      return (context, ((nextParam, startParam, Param.get fill p), ())))
   deleteIOContext


{- |
On each restart the parameters of type @b@ are passed to the signal.

triggerParam ::
   (MakeValueTuple a al,
    MakeValueTuple b bl) =>
   Param.T p a ->
   (Param.T p b -> Sig.T p a) ->
   T p (Value Bool, bl) al
triggerParam fill sig =
-}



foreign import ccall safe "dynamic" derefFillPtr ::
   Exec.Importer (Ptr param -> Word32 -> Ptr a -> Ptr b -> IO Word32)

runStorable ::
   (Storable a, MakeValueTuple a valueA, Rep.Memory valueA structA,
    Storable b, MakeValueTuple b valueB, Rep.Memory valueB structB) =>
   T p valueA valueB ->
   IO (p -> SV.Vector a -> SV.Vector b)
runStorable (Cons next start createIOContext deleteIOContext) = do
   fill <-
      fmap derefFillPtr $
      Exec.compileModule $
      createFunction ExternalLinkage $
      \paramPtr size alPtr blPtr -> do
         (nextParam,startParam) <- Rep.load paramPtr
         s <- start startParam
         (pos,_) <- Maybe.arrayLoop2 size alPtr blPtr s $
               \ aPtri bPtri s0 -> do
            a <- Maybe.lift $ Rep.load aPtri
            (b,s1) <- next nextParam a s0
            Maybe.lift $ Rep.store b bPtri
            return s1
         ret (pos :: Value Word32)

   return $ \p as ->
      unsafePerformIO $
      bracket (createIOContext p) (deleteIOContext . fst) $
      \ (_,params) ->
         SVB.withStartPtr as $ \ aPtr len ->
         SVB.createAndTrim len $ \ bPtr ->
         Alloc.alloca $ \paramPtr ->
            poke paramPtr params >>
            (fmap fromIntegral $
               fill (Rep.castStorablePtr paramPtr)
                  (fromIntegral len)
                  (Rep.castStorablePtr aPtr)
                  (Rep.castStorablePtr bPtr))

applyStorable ::
   (Storable a, MakeValueTuple a valueA, Rep.Memory valueA structA,
    Storable b, MakeValueTuple b valueB, Rep.Memory valueB structB) =>
   T p valueA valueB ->
   p -> SV.Vector a -> SV.Vector b
applyStorable gen = unsafePerformIO $ runStorable gen



foreign import ccall safe "dynamic" derefChunkPtr ::
   Exec.Importer (Ptr nextParamStruct -> Ptr stateStruct -> Word32 ->
             Ptr structA -> Ptr structB -> IO Word32)


compileChunky ::
   (Rep.Memory valueA structA,
    Rep.Memory valueB structB,
    Rep.Memory state stateStruct,
    IsSized    stateStruct stateSize,
    Rep.Memory startParamValue startParamStruct,
    Rep.Memory nextParamValue  nextParamStruct,
    IsSized    startParamStruct startParamSize,
    IsSized    nextParamStruct  nextParamSize) =>
   (forall r.
    nextParamValue ->
    valueA -> state ->
    Maybe.T r (Value Bool, (Value (Ptr structB), state)) (valueB, state)) ->
   (forall r.
    startParamValue ->
    CodeGenFunction r state) ->
   IO (FunPtr (Ptr startParamStruct -> IO (Ptr stateStruct)),
       FunPtr (Ptr stateStruct -> IO ()),
       FunPtr (Ptr nextParamStruct -> Ptr stateStruct -> Word32 ->
               Ptr structA -> Ptr structB -> IO Word32))
compileChunky next start =
   Exec.compileModule $
      liftM3 (,,)
         (createFunction ExternalLinkage $
          \paramPtr -> do
             -- FIXME: size computation in LLVM currently does not work for structs!
             pptr <- Rep.malloc
             flip Rep.store pptr =<< start =<< Rep.load paramPtr
             ret pptr)
         (createFunction ExternalLinkage $
          \ pptr -> Rep.free pptr >> ret ())
         (createFunction ExternalLinkage $
          \ paramPtr sptr loopLen aPtr bPtr -> do
             param <- Rep.load paramPtr
             sInit <- Rep.load sptr
             (pos,sExit) <- Maybe.arrayLoop2 loopLen aPtr bPtr sInit $
                   \ aPtri bPtri s0 -> do
                a <- Maybe.lift $ Rep.load aPtri
                (b,s1) <- next param a s0
                Maybe.lift $ Rep.store b bPtri
                return s1
             Rep.store sExit sptr
             ret (pos :: Value Word32))


runStorableChunky ::
   (Storable a, MakeValueTuple a valueA, Rep.Memory valueA structA,
    Storable b, MakeValueTuple b valueB, Rep.Memory valueB structB) =>
   T p valueA valueB ->
   IO (p -> SVL.Vector a -> SVL.Vector b)
runStorableChunky proc =
   fmap ($ const SVL.empty) $
   runStorableChunkyCont proc

{-
I liked to write something with signature

> import qualified Synthesizer.Causal.Process as Causal
>
> liftStorableChunk ::
>    T p valueA valueB ->
>    IO (p -> Causal.T (SV.Vector a) (SV.Vector b))

This could be used to convert a LLVM causal process
to something that works on Haskell values (here: strict storable vectors).
In a second step we could convert this to a processor of lazy lists,
and thus to a processor of chunky storable vectors.
Unfortunately @Causal.T@ uses an immutable state internally,
whereas @T@ uses mutable states.
In principle the immutable state of @Causal.T@
could be used for breaking the processing of a stream
and continue it on two different streams in parallel.
I have no function that makes use of this feature,
and thus an @ST@ monad might be a way out.
-}

{- |
This function should be used
instead of @StorableVector.Lazy.Pattern.splitAt@ and subsequent @append@,
because it does not have the risk of a memory leak.
-}
runStorableChunkyCont ::
   (Storable a, MakeValueTuple a valueA, Rep.Memory valueA structA,
    Storable b, MakeValueTuple b valueB, Rep.Memory valueB structB) =>
   T p valueA valueB ->
   IO ((SVL.Vector a -> SVL.Vector b) ->
       p ->
       SVL.Vector a -> SVL.Vector b)
runStorableChunkyCont (Cons next start createIOContext deleteIOContext) = do
   (startFunc, stopFunc, fill) <- compileChunky next start
   return $
      \ procRest p sig ->
      SVL.fromChunks $ unsafePerformIO $ do
         (ioContext, (nextParam, startParam)) <- createIOContext p

         statePtr <- Rep.newForeignPtrParam stopFunc startFunc startParam
         nextParamPtr <-
            Rep.newForeignPtr (deleteIOContext ioContext) nextParam

         let go xt =
               unsafeInterleaveIO $
               case xt of
                  [] -> return []
                  x:xs -> SVB.withStartPtr x $ \aPtr size -> do
                     v <-
                        Rep.withForeignPtr nextParamPtr $ \nptr ->
                        withForeignPtr statePtr $ \sptr ->
                        SVB.createAndTrim size $
                        fmap fromIntegral .
                        derefChunkPtr fill nptr sptr
                           (fromIntegral size)
                           (Rep.castStorablePtr aPtr) .
                        Rep.castStorablePtr
                     (if SV.length v > 0
                        then fmap (v:)
                        else id) $
                        (if SV.length v < size
                           then return $ SVL.chunks $
                                procRest $ SVL.fromChunks $
                                SV.drop (SV.length v) x : xs
                           else go xs)
         go (SVL.chunks sig)

applyStorableChunky ::
   (Storable a, MakeValueTuple a valueA, Rep.Memory valueA structA,
    Storable b, MakeValueTuple b valueB, Rep.Memory valueB structB) =>
   T p valueA valueB ->
   p -> SVL.Vector a -> SVL.Vector b
applyStorableChunky gen =
   unsafePerformIO (runStorableChunky gen)