{-# OPTIONS_GHC -fno-warn-type-defaults #-} {-| incremental folds of 'Histogram's from the library. -} module Control.Foldl.Incremental.Histogram ( -- * Incrementalize incrementalizeHist , incrementalizeHist2D -- * Common Histogram Folds , incHist , incHist2D , incAdaptiveHist ) where import Control.Foldl (Fold(..)) import qualified Control.Foldl as L import Data.Histogram import Data.Histogram.Adaptable import Data.Histogram.Bin.BinDU import Data.Histogram.Fill import Data.Vector.Unboxed ((//)) import qualified Data.Vector.Unboxed as V import GHC.Float (double2Int) -- | FIXME: make Increment a class and IncrementHist an instance. data IncrementHist a = IncrementHist { _adder :: Histogram a Double , _counter :: {-# UNPACK #-} !Double , _rate :: {-# UNPACK #-} !Double } {-| incrementalizeHist takes a function governing the input to the histogram. >>> incrementalizeHist (const 1) is the usual boiler-plate meaning of histogram. -} incrementalizeHist :: BinD -> (Double -> Double) -> Double -> L.Fold Double (Histogram BinD Double) incrementalizeHist b f r = L.Fold step begin done where step (IncrementHist x' d r') a = IncrementHist (Data.Histogram.zip (\x0 y0 -> x0 * r' + y0) x' (mkOne b f a)) (d * r' + 1) r' begin = IncrementHist (fillBuilderVec (mkSimple b) V.empty) 0 r done (IncrementHist a c _) = if c /= 0 then Data.Histogram.map (/c) a else a mkOne :: BinD -> (Double -> Double) -> Double -> Histogram BinD Double mkOne b' f' a = histogram b' d where i = toIndex b a d = V.replicate (nBins b) 0 // [(i,f' a)] -- 2D histogram folding data IncrementHist2D = IncrementHist2D { _adder2D :: Histogram (Bin2D BinD BinD) Double , _counter2D :: {-# UNPACK #-} !Double , _rate2D :: {-# UNPACK #-} !Double } deriving (Show) -- | 2D version incrementalizeHist2D :: Bin2D BinD BinD -> ((Double,Double) -> Double) -> Double -> L.Fold (Double,Double) (Histogram (Bin2D BinD BinD) Double) incrementalizeHist2D b f r = L.Fold step begin done where step (IncrementHist2D x' d r') a = IncrementHist2D (Data.Histogram.zip (\x0 y0 -> x0 * r' + y0) x' (mkOne2D b f a)) (d * r' + 1) r' begin = IncrementHist2D (fillBuilderVec (mkSimple b) V.empty) 0 r done (IncrementHist2D a c _) = if c /= 0 then Data.Histogram.map (/c) a else a mkOne2D :: Bin2D BinD BinD -> ((Double,Double) -> Double) -> (Double,Double) -> Histogram (Bin2D BinD BinD) Double mkOne2D b' f' a = histogram b' d where i = toIndex b a d = V.replicate (nBins b) 0 // [(i,f' a)] {-| incremental histogram with pre-defined bins >>> import Control.Foldl.Incremental >>> import qualified Control.Foldl as L >>> let b = binDn 0 2 12 >>> L.fold (incHist b 0.9) [1..10] -} incHist :: BinD -> Double -> Fold Double (Histogram BinD Double) incHist b = incrementalizeHist b (const 1) {-# INLINABLE incHist #-} -- | incremental 2D histogram incHist2D :: Bin2D BinD BinD -> Double -> Fold (Double,Double) (Histogram (Bin2D BinD BinD) Double) incHist2D b = incrementalizeHist2D b (const 1) {-| adaptable histogram fold TODO: integrate Histogram.Adaptable upstream incHist requires a pre-specified bin, which in turn requires an initial pass over the stream to determine the data ranges. For a one pass histogram fold, we require an incremental approach to bin creation, which, in turn, requires some way of creating a histogram from scratch. 'Data.Histogram.Adaptable' and 'Data.Histogram.Bin.BinDU' is a draft solution to enable a one-pass at histogram creation. This function takes - a maximum frequency (thresh) for a bin, which, when triggered causes a bin to be split (at the moment using a uniform distribution assumption which is pretty bad). - a minimum bin size (grain). bins are further constrained to be multiples of this. - a maximum number of bins, which, when triggered, causes bins to be merged. >>> L.fold (incAdaptiveHist 0.2 1.0 10 1.0) [1..1000] provides a histogram with no bin more than 20% frequency size, with a minimum bin size of 1, with at most 10 bins, and a decay rate of 1.0 -} incAdaptiveHist :: Double -> Double -> Int -> Double -> Fold Double (Histogram BinDU Double) incAdaptiveHist thresh grain maxBins rate = Fold step begin done where step (IncrementHist h d _) a = let h' = Data.Histogram.map (*rate) (step' h a) in IncrementHist h' ((d+1)*rate) rate step' x a = checkMaxBins maxBins (maybeSlice thresh grain (checkMaxBins maxBins (addOne grain x a)) a) where checkMaxBins max' x' = if nBins (bins x') > max' then checkMaxBins max' (mergeSmallest x') else x' begin = IncrementHist (histogram (unsafeBinDU V.empty) V.empty) 0 rate done (IncrementHist h c _) = if c /= 0 then Data.Histogram.map (/c) h else h -- helpers addOne :: Double -> Histogram BinDU Double -> Double -> Histogram BinDU Double addOne grain x a | V.length (cuts (bins x)) == 0 = addFirst grain a | a < lowerLimit (bins x) = addLower grain x a | a >= upperLimit (bins x) = addUpper grain x a | otherwise = addMiddle x a addMiddle :: (Bin bin, V.Unbox a, Num a) => Histogram bin a -> BinValue bin -> Histogram bin a addMiddle h a = histogram (bins h) (histData h // [(i, 1 + h `atI` i)]) where i = toIndex (bins h) a addFirst :: Double -> Double -> Histogram BinDU Double addFirst grain a = histogram bin (V.fromList [1.0]) where a' = roundD grain a bin = binDU (V.fromList [a' - grain/2, a' + grain/2]) addLower :: Double -> Histogram BinDU Double-> Double -> Histogram BinDU Double addLower grain x a = x1 where a' = roundD grain a x0 = sliceAt x (a' - grain/2) x1 = addMiddle x0 a addUpper :: Double -> Histogram BinDU Double-> Double -> Histogram BinDU Double addUpper grain x a = x1 where a' = roundD grain a x0 = sliceAt x (a' + grain/2) x1 = addMiddle x0 a maybeSlice :: Double-> Double-> Histogram BinDU Double-> BinValue BinDU-> Histogram BinDU Double maybeSlice thresh' grain' x' a' = let freq = x' `atV` a' i = toIndex (bins x') a' (l,u) = binInterval (bins x') i numGrains = round ((u-l) / grain') :: Int center = l + grain' * fromIntegral (round (fromIntegral numGrains / 2)) in if binSizeN (bins x') i > grain' + 3e-11 && freq / V.sum (histData x') > thresh' then sliceAt x' center else x' roundD :: Double -> Double -> Double roundD grain x = if frac > 0.5 then whole + grain else whole where whole = floorDD frac = (x - floorDD) / grain floorDD = fromIntegral (floorD (x / grain)) * grain floorD x' | x' < 0 = double2Int x' - 1 | otherwise = double2Int x' {-# INLINE roundD #-}