{-# OPTIONS_GHC -fno-warn-type-defaults #-}

{-| incremental folds of 'Histogram's from the <https://hackage.haskell.org/package/histogram-fill histogram-fill> library.

-}

module Control.Foldl.Incremental.Histogram  (
    -- * Incrementalize
    incrementalizeHist
  , incrementalizeHist2D
    -- * Common Histogram Folds
  , incHist
  , incHist2D
  , incAdaptiveHist
  ) where

import           Control.Foldl (Fold(..))
import qualified Control.Foldl as L
import           Data.Histogram
import           Data.Histogram.Adaptable
import           Data.Histogram.Bin.BinDU
import           Data.Histogram.Fill
import           Data.Vector.Unboxed ((//))
import qualified Data.Vector.Unboxed as V
import           GHC.Float (double2Int)

-- | FIXME: make Increment a class and IncrementHist an instance.
data IncrementHist a = IncrementHist
   { _adder   :: Histogram a Double
   , _counter :: {-# UNPACK #-} !Double
   , _rate    :: {-# UNPACK #-} !Double
   }

{-| incrementalizeHist takes a function governing the input to the histogram.

>>> incrementalizeHist (const 1)

is the usual boiler-plate meaning of histogram.

-}
incrementalizeHist :: BinD -> (Double -> Double) -> Double -> L.Fold Double (Histogram BinD Double)
incrementalizeHist b f r =  L.Fold step begin done
  where
    step (IncrementHist x' d r') a = IncrementHist
        (Data.Histogram.zip (\x0 y0 -> x0 * r' + y0) x' (mkOne b f a))
        (d * r' + 1)
        r'
    begin = IncrementHist (fillBuilderVec (mkSimple b) V.empty) 0 r
    done (IncrementHist a c _) =
      if c /= 0
         then Data.Histogram.map (/c) a
         else a
    mkOne :: BinD -> (Double -> Double) -> Double -> Histogram BinD Double
    mkOne b' f' a = histogram b' d
      where
        i  = toIndex b a
        d = V.replicate (nBins b) 0 // [(i,f' a)]

-- 2D histogram folding
data IncrementHist2D = IncrementHist2D
   { _adder2D   :: Histogram (Bin2D BinD BinD) Double
   , _counter2D :: {-# UNPACK #-} !Double
   , _rate2D    :: {-# UNPACK #-} !Double
   } deriving (Show)

-- | 2D version
incrementalizeHist2D :: Bin2D BinD BinD -> ((Double,Double) -> Double) -> Double -> L.Fold (Double,Double) (Histogram (Bin2D BinD BinD) Double)
incrementalizeHist2D b f r =  L.Fold step begin done
  where
    step (IncrementHist2D x' d r') a = IncrementHist2D
        (Data.Histogram.zip (\x0 y0 -> x0 * r' + y0) x' (mkOne2D b f a))
        (d * r' + 1)
        r'
    begin = IncrementHist2D (fillBuilderVec (mkSimple b) V.empty) 0 r
    done (IncrementHist2D a c _) =
      if c /= 0
         then Data.Histogram.map (/c) a
         else a
    mkOne2D :: Bin2D BinD BinD -> ((Double,Double) -> Double) -> (Double,Double) -> Histogram (Bin2D BinD BinD) Double
    mkOne2D b' f' a = histogram b' d
      where
        i  = toIndex b a
        d = V.replicate (nBins b) 0 // [(i,f' a)]

{-| incremental histogram with pre-defined bins

>>> import Control.Foldl.Incremental
>>> import qualified Control.Foldl as L

>>> let b = binDn 0 2 12
>>> L.fold (incHist b 0.9) [1..10]
-}
incHist :: BinD -> Double -> Fold Double (Histogram BinD Double)
incHist b = incrementalizeHist b (const 1)
{-# INLINABLE incHist #-}

-- | incremental 2D histogram
incHist2D :: Bin2D BinD BinD -> Double -> Fold (Double,Double) (Histogram (Bin2D BinD BinD) Double)
incHist2D b = incrementalizeHist2D b (const 1)

{-| adaptable histogram fold

TODO: integrate Histogram.Adaptable upstream

incHist requires a pre-specified bin, which in turn requires an initial pass over the stream to determine the data ranges.

For a one pass histogram fold, we require an incremental approach to bin creation, which, in turn, requires some way of creating a histogram from scratch.

'Data.Histogram.Adaptable' and 'Data.Histogram.Bin.BinDU' is a draft solution to enable a one-pass at histogram creation.


This function takes

- a maximum frequency (thresh) for a bin, which, when triggered causes a bin to be split (at the moment using a uniform distribution assumption which is pretty bad).
- a minimum bin size (grain). bins are further constrained to be multiples of this.

- a maximum number of bins, which, when triggered, causes bins to be merged.

>>> L.fold (incAdaptiveHist 0.2 1.0 10 1.0) [1..1000]

provides a histogram with no bin more than 20% frequency size, with a minimum bin size of 1, with at most 10 bins, and a decay rate of 1.0

-}
incAdaptiveHist :: Double -> Double -> Int -> Double -> Fold Double (Histogram BinDU Double)
incAdaptiveHist thresh grain maxBins rate = Fold step begin done
  where
    step (IncrementHist h d _) a =
        let h' = Data.Histogram.map (*rate) (step' h a) in
        IncrementHist h' ((d+1)*rate) rate
    step' x a =
        checkMaxBins maxBins
        (maybeSlice thresh grain (checkMaxBins maxBins (addOne grain x a)) a)
      where
        checkMaxBins max' x' =
            if nBins (bins x') > max'
            then checkMaxBins max' (mergeSmallest x')
            else x'
    begin = IncrementHist (histogram (unsafeBinDU V.empty) V.empty) 0 rate
    done (IncrementHist h c _) =
      if c /= 0
         then Data.Histogram.map (/c) h 
         else h

-- helpers
addOne :: Double -> Histogram BinDU Double -> Double -> Histogram BinDU Double
addOne grain x a
    | V.length (cuts (bins x)) == 0 = addFirst grain a
    | a < lowerLimit (bins x) = addLower grain x a
    | a >= upperLimit (bins x) = addUpper grain x a
    | otherwise = addMiddle x a

addMiddle :: (Bin bin, V.Unbox a, Num a) => Histogram bin a -> BinValue bin -> Histogram bin a
addMiddle h a = histogram (bins h) (histData h // [(i, 1 + h `atI` i)])
      where
        i = toIndex (bins h) a

addFirst :: Double -> Double -> Histogram BinDU Double
addFirst grain a = histogram bin (V.fromList [1.0])
      where
        a' = roundD grain a
        bin = binDU (V.fromList [a' - grain/2, a' + grain/2])

addLower :: Double -> Histogram BinDU Double-> Double -> Histogram BinDU Double
addLower grain x a = x1
      where
        a' = roundD grain a
        x0 = sliceAt x (a' - grain/2)
        x1 = addMiddle x0 a

addUpper :: Double -> Histogram BinDU Double-> Double -> Histogram BinDU Double
addUpper grain x a = x1
      where
        a' = roundD grain a
        x0 = sliceAt x (a' + grain/2)
        x1 = addMiddle x0 a

maybeSlice :: Double-> Double-> Histogram BinDU Double-> BinValue BinDU-> Histogram BinDU Double
maybeSlice thresh' grain' x' a' =
    let freq = x' `atV` a'
        i = toIndex (bins x') a'
        (l,u) = binInterval (bins x') i
        numGrains = round ((u-l) / grain') :: Int
        center = l + grain' * fromIntegral (round (fromIntegral numGrains / 2)) 
    in
    if binSizeN (bins x') i > grain' + 3e-11 &&
       freq / V.sum (histData x') > thresh'
    then sliceAt x' center
    else x'

roundD :: Double -> Double -> Double
roundD grain x = if frac > 0.5 then whole + grain else whole
    where
      whole = floorDD
      frac = (x - floorDD) / grain
      floorDD = fromIntegral (floorD (x / grain)) * grain
      floorD x' | x' < 0     = double2Int x' - 1
                | otherwise = double2Int x'
{-# INLINE roundD #-}