{-# LANGUAGE
        MultiParamTypeClasses,
        RankNTypes,
        FlexibleInstances, FlexibleContexts,
        RecordWildCards, BangPatterns
  #-}

-- |A generic \"ziggurat algorithm\" implementation.  Fairly rough right
--  now.
--  
--  There is a lot of room for improvement in 'findBin0' especially.
--  It needs a fair amount of cleanup and elimination of redundant
--  calculation, as well as either a justification for using the simple
--  'findMinFrom' or a proper root-finding algorithm. 
--  
--  It would also be nice to add (preferably by pulling in an 
--  external package) support for numerical integration and 
--  differentiation, so that tables can be derived from only a 
--  PDF (if the end user is willing to take the performance and 
--  accuracy hit for the convenience).
module Data.Random.Distribution.Ziggurat
    ( Ziggurat(..)
    , mkZigguratRec
    , mkZiggurat
    , mkZiggurat_
    , findBin0
    , runZiggurat
    ) where

import Data.Random.Internal.Find

import Data.Random.Distribution.Uniform
import Data.Random.Distribution
import Data.Random.RVar
import Data.Vector.Generic as Vec
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as UV

-- |A data structure containing all the data that is needed
-- to implement Marsaglia & Tang's \"ziggurat\" algorithm for
-- sampling certain kinds of random distributions.
--
-- The documentation here is probably not sufficient to tell a user exactly
-- how to build one of these from scratch, but it is not really intended to
-- be.  There are several helper functions that will build 'Ziggurat's.
-- The pathologically curious may wish to read the 'runZiggurat' source.
-- That is the ultimate specification of the semantics of all these fields.
data Ziggurat v t = Ziggurat {
        -- |The X locations of each bin in the distribution.  Bin 0 is the
        -- 'infinite' one.
        -- 
        -- In the case of bin 0, the value given is sort of magical - x[0] is
        -- defined to be V/f(R).  It's not actually the location of any bin, 
        -- but a value computed to make the algorithm more concise and slightly 
        -- faster by not needing to specially-handle bin 0 quite as often.
        -- If you really need to know why it works, see the 'runZiggurat'
        -- source or \"the literature\" - it's a fairly standard setup.
        zTable_xs         :: !(v t),
        -- |The ratio of each bin's Y value to the next bin's Y value
        zTable_y_ratios   :: !(v t),
        -- |The Y value (zFunc x) of each bin
        zTable_ys         :: !(v t),
        -- |An RVar providing a random tuple consisting of:
        --
        --  * a bin index, uniform over [0,c) :: Int (where @c@ is the
        --    number of bins in the tables)
        -- 
        --  * a uniformly distributed fractional value, from -1 to 1 
        --    if not mirrored, from 0 to 1 otherwise.
        --
        -- This is provided as a single 'RVar' because it can be implemented
        -- more efficiently than naively sampling 2 separate values - a
        -- single random word (64 bits) can be efficiently converted to
        -- a double (using 52 bits) and a bin number (using up to 12 bits),
        -- for example.
        zGetIU            :: !(forall m. RVarT m (Int, t)),

        -- |The distribution for the final \"virtual\" bin
        -- (the ziggurat algorithm does not handle distributions
        -- that wander off to infinity, so another distribution is needed
        -- to handle the last \"bin\" that stretches to infinity)
        zTailDist         :: (forall m. RVarT m t),

        -- |A copy of the uniform RVar generator for the base type,
        -- so that @Distribution Uniform t@ is not needed when sampling
        -- from a Ziggurat (makes it a bit more self-contained).
        zUniform          :: !(forall m. t -> t -> RVarT m t),

        -- |The (one-sided antitone) PDF, not necessarily normalized
        zFunc             :: !(t -> t),

        -- |A flag indicating whether the distribution should be
        -- mirrored about the origin (the ziggurat algorithm in
        -- its native form only samples from one-sided distributions.
        -- By mirroring, we can extend it to symmetric distributions
        -- such as the normal distribution)
        zMirror           :: !Bool
    }

-- |Sample from the distribution encoded in a 'Ziggurat' data structure.
{-# INLINE runZiggurat #-}
{-# SPECIALIZE runZiggurat :: Ziggurat UV.Vector Float  -> RVarT m Float #-}
{-# SPECIALIZE runZiggurat :: Ziggurat UV.Vector Double -> RVarT m Double #-}
{-# SPECIALIZE runZiggurat :: Ziggurat  V.Vector Float  -> RVarT m Float #-}
{-# SPECIALIZE runZiggurat :: Ziggurat  V.Vector Double -> RVarT m Double #-}
runZiggurat :: (Num a, Ord a, Vector v a) =>
               Ziggurat v a -> RVarT m a
runZiggurat !Ziggurat{..} = go
    where
        {-# NOINLINE go #-}
        go = do
            -- Select a bin (I) and a uniform value (U) from -1 to 1
            -- (or 0 to 1 if not mirroring the distribution).
            -- Let X be U scaled to the size of the selected bin.
            (!i,!u) <- zGetIU

            -- if the uniform value U falls in the area "clearly inside" the
            -- bin, accept X immediately.
            -- Otherwise, depending on the bin selected, use either the
            -- tail distribution or an accept/reject test.
            if abs u < zTable_y_ratios ! i
                then return $! (u * zTable_xs ! i)
                else if i == 0
                    then sampleTail u
                    else sampleGreyArea i $! (u * zTable_xs ! i)

        -- when the sample falls in the "grey area" (the area between
        -- the Y values of the selected bin and the bin after that one),
        -- use an accept/reject method based on the target PDF.
        {-# INLINE sampleGreyArea #-}
        sampleGreyArea i x = do
            !v <- zUniform (zTable_ys ! (i+1)) (zTable_ys ! i)
            if v < zFunc (abs x)
                then return $! x
                else go

        -- if the selected bin is the "infinite" one, call it quits and
        -- defer to the tail distribution (mirroring if needed to ensure
        -- the result has the sign already selected by zGetIU)
        {-# INLINE sampleTail #-}
        sampleTail x
            | zMirror && x < 0  = fmap negate zTailDist
            | otherwise         = zTailDist


-- |Build the tables to implement the \"ziggurat algorithm\" devised by 
-- Marsaglia & Tang, attempting to automatically compute the R and V
-- values.
-- 
-- Arguments:
-- 
--  * flag indicating whether to mirror the distribution
-- 
--  * the (one-sided antitone) PDF, not necessarily normalized
-- 
--  * the inverse of the PDF
-- 
--  * the number of bins
-- 
--  * R, the x value of the first bin
-- 
--  * V, the volume of each bin
-- 
--  * an RVar providing the 'zGetIU' random tuple
-- 
--  * an RVar sampling from the tail (the region where x > R)
-- 
{-# INLINE mkZiggurat_ #-}
{-# SPECIALIZE mkZiggurat_ :: Bool -> (Float  ->  Float) -> (Float  ->  Float) -> Int -> Float  -> Float  -> (forall m. RVarT m (Int,  Float)) -> (forall m. RVarT m Float ) -> Ziggurat UV.Vector Float #-}
{-# SPECIALIZE mkZiggurat_ :: Bool -> (Double -> Double) -> (Double -> Double) -> Int -> Double -> Double -> (forall m. RVarT m (Int, Double)) -> (forall m. RVarT m Double) -> Ziggurat UV.Vector Double #-}
{-# SPECIALIZE mkZiggurat_ :: Bool -> (Float  ->  Float) -> (Float  ->  Float) -> Int -> Float  -> Float  -> (forall m. RVarT m (Int,  Float)) -> (forall m. RVarT m Float ) -> Ziggurat V.Vector Float #-}
{-# SPECIALIZE mkZiggurat_ :: Bool -> (Double -> Double) -> (Double -> Double) -> Int -> Double -> Double -> (forall m. RVarT m (Int, Double)) -> (forall m. RVarT m Double) -> Ziggurat V.Vector Double #-}
mkZiggurat_ :: (RealFloat t, Vector v t,
               Distribution Uniform t) =>
              Bool
              -> (t -> t)
              -> (t -> t)
              -> Int
              -> t
              -> t
              -> (forall m. RVarT m (Int, t))
              -> (forall m. RVarT m t)
              -> Ziggurat v t
mkZiggurat_ m f fInv c r v getIU tailDist = Ziggurat
    { zTable_xs         = xs
    , zTable_y_ratios   = precomputeRatios xs
    , zTable_ys         = Vec.map f xs
    , zGetIU            = getIU
    , zUniform          = uniformT
    , zFunc             = f
    , zTailDist         = tailDist
    , zMirror           = m
    }
    where
        xs = zigguratTable f fInv c r v

-- |Build the tables to implement the \"ziggurat algorithm\" devised by 
-- Marsaglia & Tang, attempting to automatically compute the R and V
-- values.
-- 
-- Arguments are the same as for 'mkZigguratRec', with an additional
-- argument for the tail distribution as a function of the selected
-- R value.
mkZiggurat :: (RealFloat t, Vector v t,
               Distribution Uniform t) =>
              Bool
              -> (t -> t)
              -> (t -> t)
              -> (t -> t)
              -> t
              -> Int
              -> (forall m. RVarT m (Int, t))
              -> (forall m. t -> RVarT m t)
              -> Ziggurat v t
mkZiggurat m f fInv fInt fVol c getIU tailDist =
    mkZiggurat_ m f fInv c r v getIU (tailDist r)
        where
            (r,v) = findBin0 c f fInv fInt fVol

-- |Build a lazy recursive ziggurat.  Uses a lazily-constructed ziggurat
-- as its tail distribution (with another as its tail, ad nauseam).
-- 
-- Arguments:
-- 
--  * flag indicating whether to mirror the distribution
--
--  * the (one-sided antitone) PDF, not necessarily normalized
--
--  * the inverse of the PDF
--
--  * the integral of the PDF (definite, from 0)
--
--  * the estimated volume under the PDF (from 0 to +infinity)
--
--  * the chunk size (number of bins in each layer).  64 seems to
--    perform well in practice.
--
--  * an RVar providing the 'zGetIU' random tuple
--
mkZigguratRec ::
  (RealFloat t, Vector v t,
   Distribution Uniform t) =>
  Bool
  -> (t -> t)
  -> (t -> t)
  -> (t -> t)
  -> t
  -> Int
  -> (forall m. RVarT m (Int, t))
  -> Ziggurat v t
mkZigguratRec m f fInv fInt fVol c getIU = z
        where
            fix :: ((forall m. a -> RVarT m a) -> (forall m. a -> RVarT m a)) -> (forall m. a -> RVarT m a)
            fix g = g (fix g)
            z = mkZiggurat m f fInv fInt fVol c getIU (fix (mkTail m f fInv fInt fVol c getIU z))

mkTail ::
    (RealFloat a, Vector v a, Distribution Uniform a) =>
    Bool
    -> (a -> a) -> (a -> a) -> (a -> a)
    -> a
    -> Int
    -> (forall m. RVarT m (Int, a))
    -> Ziggurat v a
    -> (forall m. a -> RVarT m a)
    -> (forall m. a -> RVarT m a)
mkTail m f fInv fInt fVol c getIU typeRep nextTail r = do
     x <- rvarT (mkZiggurat m f' fInv' fInt' fVol' c getIU nextTail `asTypeOf` typeRep)
     return (x + r * signum x)
        where
            fIntR = fInt r

            f' x    | x < 0     = f r
                    | otherwise = f (x+r)
            fInv' = subtract r . fInv
            fInt' x | x < 0     = 0
                    | otherwise = fInt (x+r) - fIntR

            fVol' = fVol - fIntR


zigguratTable :: (Fractional a, Vector v a, Ord a) =>
                 (a -> a) -> (a -> a) -> Int -> a -> a -> v a
zigguratTable f fInv c r v = case zigguratXs f fInv c r v of
    (xs, _excess) -> fromList xs

zigguratExcess :: (Fractional a, Ord a) => (a -> a) -> (a -> a) -> Int -> a -> a -> a
zigguratExcess f fInv c r v = snd (zigguratXs f fInv c r v)

zigguratXs :: (Fractional a, Ord a) => (a -> a) -> (a -> a) -> Int -> a -> a -> ([a], a)
zigguratXs f fInv c r v = (xs, excess)
    where
        xs = Prelude.map x [0..c] -- sample c x
        ys = Prelude.map f xs

        x 0 = v / f r
        x 1 = r
        x i | i == c = 0
        x i | i >  1 = next (i-1)
        x _ = error "zigguratXs: programming error! this case should be impossible!"

        next i = let x_i = xs!!i
                  in if x_i <= 0 then -1 else fInv (ys!!i + (v / x_i))

        excess = xs!!(c-1) * (f 0 - ys !! (c-1)) - v


precomputeRatios :: (Vector v a, Fractional a) => v a -> v a
precomputeRatios zTable_xs = generate (c-1) $ \i -> zTable_xs!(i+1) / zTable_xs!i
    where
        c = Vec.length zTable_xs

-- |I suspect this isn't completely right, but it works well so far.
-- Search the distribution for an appropriate R and V.
--
-- Arguments:
-- 
--  * Number of bins
--
--  * target function (one-sided antitone PDF, not necessarily normalized)
--
--  * function inverse
--
--  * function definite integral (from 0 to _)
--
--  * estimate of total volume under function (integral from 0 to infinity)
--
-- Result: (R,V)
findBin0 :: (RealFloat b) =>
    Int -> (b -> b) -> (b -> b) -> (b -> b) -> b -> (b, b)
findBin0 cInt f fInv fInt fVol = (rMin,v rMin)
    where
        c = fromIntegral cInt
        v r = r * f r + fVol - fInt r

        -- initial R guess:
        r0 = findMin (\r -> v r <= fVol / c)
        -- find a better R:
        rMin = findMinFrom r0 1 $ \r ->
            let e = exc r
             in e >= 0 && not (isNaN e)

        exc x = zigguratExcess f fInv cInt x (v x)

instance (Num t, Ord t, Vector v t) => Distribution (Ziggurat v) t where
    rvar = runZiggurat