{-# language DeriveDataTypeable #-}
{-# LANGUAGE FlexibleContexts #-}
{-# language BangPatterns #-}
{-# options_ghc -Wno-unused-imports #-}
{-# options_ghc -Wno-unused-top-binds #-}
{-# options_ghc -Wno-type-defaults #-}
module Data.RPTree.Conduit
  (
    tree,
  forest,
  RPTreeConfig(..),
  rpTreeCfg
  -- ** helpers
  , dataSource
  , liftC
  )
where

import Control.Monad (replicateM)
import Data.Functor (void)
import GHC.Word (Word64)
import GHC.Stack (HasCallStack)

-- conduit
import qualified Data.Conduit as C (ConduitT, runConduit, yield, await, transPipe)
import Data.Conduit ((.|))
import qualified Data.Conduit.Combinators as C (map, mapM, last, scanl, print, foldl)
import qualified Data.Conduit.List as C (chunksOf, unfold, unfoldM, mapAccum)
-- containers
import qualified Data.IntMap.Strict as IM (IntMap, fromList, insert, lookup, map, mapWithKey, traverseWithKey, foldlWithKey, foldrWithKey, intersectionWith)
-- splitmix-distributions
import System.Random.SplitMix.Distributions (Gen, sample, GenT, sampleT, normal, stdNormal, stdUniform, exponential, bernoulli, uniformR)
-- transformers
import Control.Monad.Trans.State (StateT(..), runStateT, evalStateT, State, runState, evalState)
import Control.Monad.Trans.Class (MonadTrans(..))
-- vector
import qualified Data.Vector as V (Vector, replicateM, fromList)
import qualified Data.Vector.Generic as VG (Vector(..), unfoldrM, length, replicateM, (!), map, freeze, thaw, take, drop, unzip)
import qualified Data.Vector.Unboxed as VU (Vector, Unbox, fromList)
import qualified Data.Vector.Storable as VS (Vector)

import Data.RPTree.Gen (sparse, dense)
import Data.RPTree.Internal (RPTree(..), RPForest, RPT(..), levels, points, Inner(..), innerSD, innerSS, metricSSL2, metricSDL2, SVector(..), fromListSv, DVector(..), fromListDv, partitionAtMedian, RPTError(..), Embed(..))


liftC :: (Monad m, MonadTrans t) => C.ConduitT i o m r -> C.ConduitT i o (t m) r
liftC :: ConduitT i o m r -> ConduitT i o (t m) r
liftC = (forall a. m a -> t m a)
-> ConduitT i o m r -> ConduitT i o (t m) r
forall (m :: * -> *) (n :: * -> *) i o r.
Monad m =>
(forall a. m a -> n a) -> ConduitT i o m r -> ConduitT i o n r
C.transPipe forall a. m a -> t m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift

-- | Populate a tree from a data stream
--
-- Assumptions on the data source:
--
-- * non-empty : contains at least one value
--
-- * stationary : each chunk is representative of the whole dataset
--
-- * bounded : we wait until the end of the stream to produce a result
tree :: (Monad m, Inner SVector v) =>
        Word64 -- ^ random seed
     -> Int -- ^ max tree depth
     -> Int -- ^ min leaf size
     -> Int -- ^ data chunk size
     -> Double -- ^ nonzero density of projection vectors
     -> Int -- ^ dimension of projection vectors
     -> C.ConduitT () (Embed v Double x) m () -- ^ data source
     -> m (RPTree Double () (V.Vector (Embed v Double x)))
tree :: Word64
-> Int
-> Int
-> Int
-> Double
-> Int
-> ConduitT () (Embed v Double x) m ()
-> m (RPTree Double () (Vector (Embed v Double x)))
tree Word64
seed Int
maxDepth Int
minLeaf Int
n Double
pnz Int
dim ConduitT () (Embed v Double x) m ()
src = do
  let
    rvs :: Vector (SVector Double)
rvs = Word64 -> Gen (Vector (SVector Double)) -> Vector (SVector Double)
forall a. Word64 -> Gen a -> a
sample Word64
seed (Gen (Vector (SVector Double)) -> Vector (SVector Double))
-> Gen (Vector (SVector Double)) -> Vector (SVector Double)
forall a b. (a -> b) -> a -> b
$ Int
-> GenT Identity (SVector Double) -> Gen (Vector (SVector Double))
forall (m :: * -> *) a. Monad m => Int -> m a -> m (Vector a)
V.replicateM Int
maxDepth (Double
-> Int -> GenT Identity Double -> GenT Identity (SVector Double)
forall (m :: * -> *) a.
(Monad m, Unbox a) =>
Double -> Int -> GenT m a -> GenT m (SVector a)
sparse Double
pnz Int
dim GenT Identity Double
forall (m :: * -> *). Monad m => GenT m Double
stdNormal)
  RPT Double () (Vector (Embed v Double x))
t <- ConduitT () Void m (RPT Double () (Vector (Embed v Double x)))
-> m (RPT Double () (Vector (Embed v Double x)))
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
C.runConduit (ConduitT () Void m (RPT Double () (Vector (Embed v Double x)))
 -> m (RPT Double () (Vector (Embed v Double x))))
-> ConduitT () Void m (RPT Double () (Vector (Embed v Double x)))
-> m (RPT Double () (Vector (Embed v Double x)))
forall a b. (a -> b) -> a -> b
$ ConduitT () (Embed v Double x) m ()
src ConduitT () (Embed v Double x) m ()
-> ConduitM
     (Embed v Double x)
     Void
     m
     (RPT Double () (Vector (Embed v Double x)))
-> ConduitT () Void m (RPT Double () (Vector (Embed v Double x)))
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
                      Int
-> Int
-> Int
-> Vector (SVector Double)
-> ConduitM
     (Embed v Double x)
     Void
     m
     (RPT Double () (Vector (Embed v Double x)))
forall (m :: * -> *) (u :: * -> *) (v :: * -> *) d x o.
(Monad m, Inner u v, Ord d, Unbox d, Fractional d) =>
Int
-> Int
-> Int
-> Vector (u d)
-> ConduitT (Embed v d x) o m (RPT d () (Vector (Embed v d x)))
insertC Int
maxDepth Int
minLeaf Int
n Vector (SVector Double)
rvs
  RPTree Double () (Vector (Embed v Double x))
-> m (RPTree Double () (Vector (Embed v Double x)))
forall (f :: * -> *) a. Applicative f => a -> f a
pure (RPTree Double () (Vector (Embed v Double x))
 -> m (RPTree Double () (Vector (Embed v Double x))))
-> RPTree Double () (Vector (Embed v Double x))
-> m (RPTree Double () (Vector (Embed v Double x)))
forall a b. (a -> b) -> a -> b
$ Vector (SVector Double)
-> RPT Double () (Vector (Embed v Double x))
-> RPTree Double () (Vector (Embed v Double x))
forall d l a. Vector (SVector d) -> RPT d l a -> RPTree d l a
RPTree Vector (SVector Double)
rvs RPT Double () (Vector (Embed v Double x))
t





-- | Incrementally build a tree
insertC :: (Monad m, Inner u v, Ord d, VU.Unbox d, Fractional d) =>
           Int -- ^ max tree depth
        -> Int -- ^ min leaf size
        -> Int -- ^ data chunk size
        -> V.Vector (u d) -- ^ random projection vectors
        -> C.ConduitT
           (Embed v d x)
           o
           m
           (RPT d () (V.Vector (Embed v d x))) 
insertC :: Int
-> Int
-> Int
-> Vector (u d)
-> ConduitT (Embed v d x) o m (RPT d () (Vector (Embed v d x)))
insertC Int
maxDepth Int
minLeaf Int
n Vector (u d)
rvs = Int
-> RPT d () (Vector (Embed v d x))
-> (RPT d () (Vector (Embed v d x))
    -> Vector (Embed v d x) -> RPT d () (Vector (Embed v d x)))
-> ConduitT (Embed v d x) o m (RPT d () (Vector (Embed v d x)))
forall (m :: * -> *) t a o.
Monad m =>
Int -> t -> (t -> Vector a -> t) -> ConduitT a o m t
chunkedAccum Int
n RPT d () (Vector (Embed v d x))
forall d. RPT d () (Vector (Embed v d x))
z (Int
-> Int
-> Vector (u d)
-> RPT d () (Vector (Embed v d x))
-> Vector (Embed v d x)
-> RPT d () (Vector (Embed v d x))
forall (v1 :: * -> *) (u :: * -> *) d (v :: * -> *) x.
(Vector v1 (u d), Ord d, Inner u v, Unbox d, Fractional d) =>
Int
-> Int
-> v1 (u d)
-> RPT d () (Vector (Embed v d x))
-> Vector (Embed v d x)
-> RPT d () (Vector (Embed v d x))
insert Int
maxDepth Int
minLeaf Vector (u d)
rvs)
  where
    z :: RPT d () (Vector (Embed v d x))
z = () -> Vector (Embed v d x) -> RPT d () (Vector (Embed v d x))
forall d l a. l -> a -> RPT d l a
Tip () Vector (Embed v d x)
forall a. Monoid a => a
mempty



-- | Populate a forest from a data stream
--
-- Assumptions on the data source:
--
-- * non-empty : contains at least one value
--
-- * stationary : each chunk is representative of the whole dataset
--
-- * bounded : we wait until the end of the stream to produce a result
forest :: (Monad m, Inner SVector v) =>
          Word64 -- ^ random seed
       -> Int -- ^ max tree depth, \(l > 1\) 
       -> Int -- ^ min leaf size, \(m_{leaf} > 1\)
       -> Int -- ^ number of trees, \(n_t > 1\)
       -> Int -- ^ data chunk size, \(n_{chunk} > 3\)
       -> Double -- ^ nonzero density of projection vectors, \(p_{nz} \in (0, 1)\)
       -> Int -- ^ dimension of projection vectors, \(d > 1\)
       -> C.ConduitT () (Embed v Double x) m () -- ^ data source
       -> m (RPForest Double (V.Vector (Embed v Double x)))
forest :: Word64
-> Int
-> Int
-> Int
-> Int
-> Double
-> Int
-> ConduitT () (Embed v Double x) m ()
-> m (RPForest Double (Vector (Embed v Double x)))
forest Word64
seed Int
maxd Int
minl Int
ntrees Int
chunksize Double
pnz Int
dim ConduitT () (Embed v Double x) m ()
src = do
  let
    rvss :: IntMap (Vector (SVector Double))
rvss = Word64
-> Gen (IntMap (Vector (SVector Double)))
-> IntMap (Vector (SVector Double))
forall a. Word64 -> Gen a -> a
sample Word64
seed (Gen (IntMap (Vector (SVector Double)))
 -> IntMap (Vector (SVector Double)))
-> Gen (IntMap (Vector (SVector Double)))
-> IntMap (Vector (SVector Double))
forall a b. (a -> b) -> a -> b
$ do
      [Vector (SVector Double)]
rvs <- Int
-> Gen (Vector (SVector Double))
-> GenT Identity [Vector (SVector Double)]
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
ntrees (Gen (Vector (SVector Double))
 -> GenT Identity [Vector (SVector Double)])
-> Gen (Vector (SVector Double))
-> GenT Identity [Vector (SVector Double)]
forall a b. (a -> b) -> a -> b
$ Int
-> GenT Identity (SVector Double) -> Gen (Vector (SVector Double))
forall (m :: * -> *) a. Monad m => Int -> m a -> m (Vector a)
V.replicateM Int
maxd (Double
-> Int -> GenT Identity Double -> GenT Identity (SVector Double)
forall (m :: * -> *) a.
(Monad m, Unbox a) =>
Double -> Int -> GenT m a -> GenT m (SVector a)
sparse Double
pnz Int
dim GenT Identity Double
forall (m :: * -> *). Monad m => GenT m Double
stdNormal)
      IntMap (Vector (SVector Double))
-> Gen (IntMap (Vector (SVector Double)))
forall (f :: * -> *) a. Applicative f => a -> f a
pure (IntMap (Vector (SVector Double))
 -> Gen (IntMap (Vector (SVector Double))))
-> IntMap (Vector (SVector Double))
-> Gen (IntMap (Vector (SVector Double)))
forall a b. (a -> b) -> a -> b
$ [(Int, Vector (SVector Double))]
-> IntMap (Vector (SVector Double))
forall a. [(Int, a)] -> IntMap a
IM.fromList ([(Int, Vector (SVector Double))]
 -> IntMap (Vector (SVector Double)))
-> [(Int, Vector (SVector Double))]
-> IntMap (Vector (SVector Double))
forall a b. (a -> b) -> a -> b
$ [Int]
-> [Vector (SVector Double)] -> [(Int, Vector (SVector Double))]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0 .. ] [Vector (SVector Double)]
rvs
  IntMap (RPT Double () (Vector (Embed v Double x)))
ts <- ConduitT
  () Void m (IntMap (RPT Double () (Vector (Embed v Double x))))
-> m (IntMap (RPT Double () (Vector (Embed v Double x))))
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
C.runConduit (ConduitT
   () Void m (IntMap (RPT Double () (Vector (Embed v Double x))))
 -> m (IntMap (RPT Double () (Vector (Embed v Double x)))))
-> ConduitT
     () Void m (IntMap (RPT Double () (Vector (Embed v Double x))))
-> m (IntMap (RPT Double () (Vector (Embed v Double x))))
forall a b. (a -> b) -> a -> b
$ ConduitT () (Embed v Double x) m ()
src ConduitT () (Embed v Double x) m ()
-> ConduitM
     (Embed v Double x)
     Void
     m
     (IntMap (RPT Double () (Vector (Embed v Double x))))
-> ConduitT
     () Void m (IntMap (RPT Double () (Vector (Embed v Double x))))
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
                       Int
-> Int
-> Int
-> IntMap (Vector (SVector Double))
-> ConduitM
     (Embed v Double x)
     Void
     m
     (IntMap (RPT Double () (Vector (Embed v Double x))))
forall (m :: * -> *) d (u :: * -> *) (v :: * -> *) (v1 :: * -> *) x
       o.
(Monad m, Ord d, Inner u v, Unbox d, Fractional d,
 Vector v1 (u d)) =>
Int
-> Int
-> Int
-> IntMap (v1 (u d))
-> ConduitT
     (Embed v d x) o m (IntMap (RPT d () (Vector (Embed v d x))))
insertMultiC Int
maxd Int
minl Int
chunksize IntMap (Vector (SVector Double))
rvss
  RPForest Double (Vector (Embed v Double x))
-> m (RPForest Double (Vector (Embed v Double x)))
forall (f :: * -> *) a. Applicative f => a -> f a
pure (RPForest Double (Vector (Embed v Double x))
 -> m (RPForest Double (Vector (Embed v Double x))))
-> RPForest Double (Vector (Embed v Double x))
-> m (RPForest Double (Vector (Embed v Double x)))
forall a b. (a -> b) -> a -> b
$ (Vector (SVector Double)
 -> RPT Double () (Vector (Embed v Double x))
 -> RPTree Double () (Vector (Embed v Double x)))
-> IntMap (Vector (SVector Double))
-> IntMap (RPT Double () (Vector (Embed v Double x)))
-> RPForest Double (Vector (Embed v Double x))
forall a b c. (a -> b -> c) -> IntMap a -> IntMap b -> IntMap c
IM.intersectionWith Vector (SVector Double)
-> RPT Double () (Vector (Embed v Double x))
-> RPTree Double () (Vector (Embed v Double x))
forall d l a. Vector (SVector d) -> RPT d l a -> RPTree d l a
RPTree IntMap (Vector (SVector Double))
rvss IntMap (RPT Double () (Vector (Embed v Double x)))
ts

data RPTreeConfig = RPCfg {
  RPTreeConfig -> Int
fpMaxTreeDepth :: Int -- ^ max tree depth \(l > 1\) 
  -- , fpMinLeafSize :: Int -- ^ min leaf size 
  , RPTreeConfig -> Int
fpDataChunkSize :: Int -- ^ data chunk size
  , RPTreeConfig -> Double
fpProjNzDensity :: Double -- ^ nonzero density of projection vectors \(p_{nz} \in (0, 1)\)
                          } deriving (Int -> RPTreeConfig -> ShowS
[RPTreeConfig] -> ShowS
RPTreeConfig -> String
(Int -> RPTreeConfig -> ShowS)
-> (RPTreeConfig -> String)
-> ([RPTreeConfig] -> ShowS)
-> Show RPTreeConfig
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RPTreeConfig] -> ShowS
$cshowList :: [RPTreeConfig] -> ShowS
show :: RPTreeConfig -> String
$cshow :: RPTreeConfig -> String
showsPrec :: Int -> RPTreeConfig -> ShowS
$cshowsPrec :: Int -> RPTreeConfig -> ShowS
Show)


-- | Configure the rp-tree tree construction process with some natural defaults
rpTreeCfg :: Int -- ^ min leaf size
          -> Int -- ^ number of points in the dataset
          -> Int -- ^ vector dimension of the data points
          -> RPTreeConfig
rpTreeCfg :: Int -> Int -> Int -> RPTreeConfig
rpTreeCfg Int
minl Int
n Int
d = Int -> Int -> Double -> RPTreeConfig
RPCfg Int
maxd Int
nchunk Double
pnz
  where
    maxd :: Int
maxd = Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
ceiling (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Double -> Double -> Double
forall a. Floating a => a -> a -> a
logBase Double
2 (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
minl)
    nchunk :: Int
nchunk = Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
ceiling (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
100
    pnzMin :: Double
pnzMin = Double
1 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double -> Double -> Double
forall a. Floating a => a -> a -> a
logBase Double
10 (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
d)
    pnz :: Double
pnz = Double
pnzMin Double -> Double -> Double
forall a. Ord a => a -> a -> a
`min` Double
1.0





insertMultiC :: (Monad m, Ord d, Inner u v, VU.Unbox d, Fractional d, VG.Vector v1 (u d)) =>
                Int  -- ^ max tree depth
             -> Int -- ^ min leaf size
             -> Int -- ^ chunk size
             -> IM.IntMap (v1 (u d)) -- one entry per tree
             -> C.ConduitT
                (Embed v d x)
                o
                m
                (IM.IntMap (RPT d () (V.Vector (Embed v d x))))
insertMultiC :: Int
-> Int
-> Int
-> IntMap (v1 (u d))
-> ConduitT
     (Embed v d x) o m (IntMap (RPT d () (Vector (Embed v d x))))
insertMultiC Int
maxd Int
minl Int
n IntMap (v1 (u d))
rvss = Int
-> IntMap (RPT d () (Vector (Embed v d x)))
-> (IntMap (RPT d () (Vector (Embed v d x)))
    -> Vector (Embed v d x)
    -> IntMap (RPT d () (Vector (Embed v d x))))
-> ConduitT
     (Embed v d x) o m (IntMap (RPT d () (Vector (Embed v d x))))
forall (m :: * -> *) t a o.
Monad m =>
Int -> t -> (t -> Vector a -> t) -> ConduitT a o m t
chunkedAccum Int
n IntMap (RPT d () (Vector (Embed v d x)))
forall d. IntMap (RPT d () (Vector (Embed v d x)))
im0 (Int
-> Int
-> IntMap (v1 (u d))
-> IntMap (RPT d () (Vector (Embed v d x)))
-> Vector (Embed v d x)
-> IntMap (RPT d () (Vector (Embed v d x)))
forall d (u :: * -> *) (v :: * -> *) (v1 :: * -> *) x.
(Ord d, Inner u v, Unbox d, Fractional d, Vector v1 (u d)) =>
Int
-> Int
-> IntMap (v1 (u d))
-> IntMap (RPT d () (Vector (Embed v d x)))
-> Vector (Embed v d x)
-> IntMap (RPT d () (Vector (Embed v d x)))
insertMulti Int
maxd Int
minl IntMap (v1 (u d))
rvss)
  where
    im0 :: IntMap (RPT d () (Vector (Embed v d x)))
im0 = (v1 (u d) -> RPT d () (Vector (Embed v d x)))
-> IntMap (v1 (u d)) -> IntMap (RPT d () (Vector (Embed v d x)))
forall a b. (a -> b) -> IntMap a -> IntMap b
IM.map (RPT d () (Vector (Embed v d x))
-> v1 (u d) -> RPT d () (Vector (Embed v d x))
forall a b. a -> b -> a
const RPT d () (Vector (Embed v d x))
forall d. RPT d () (Vector (Embed v d x))
z) IntMap (v1 (u d))
rvss
    z :: RPT d () (Vector (Embed v d x))
z = () -> Vector (Embed v d x) -> RPT d () (Vector (Embed v d x))
forall d l a. l -> a -> RPT d l a
Tip () Vector (Embed v d x)
forall a. Monoid a => a
mempty


{-# SCC insertMulti #-}
insertMulti :: (Ord d, Inner u v, VU.Unbox d, Fractional d, VG.Vector v1 (u d)) =>
               Int
            -> Int
            -> IM.IntMap (v1 (u d)) -- ^ projection vectors
            -> IM.IntMap (RPT d () (V.Vector (Embed v d x))) -- ^ accumulator of subtrees
            -> V.Vector (Embed v d x) -- ^ data chunk
            -> IM.IntMap (RPT d () (V.Vector (Embed v d x)))
insertMulti :: Int
-> Int
-> IntMap (v1 (u d))
-> IntMap (RPT d () (Vector (Embed v d x)))
-> Vector (Embed v d x)
-> IntMap (RPT d () (Vector (Embed v d x)))
insertMulti Int
maxd Int
minl IntMap (v1 (u d))
rvss IntMap (RPT d () (Vector (Embed v d x)))
tacc Vector (Embed v d x)
xs =
  ((Int
  -> RPT d () (Vector (Embed v d x))
  -> RPT d () (Vector (Embed v d x)))
 -> IntMap (RPT d () (Vector (Embed v d x)))
 -> IntMap (RPT d () (Vector (Embed v d x))))
-> IntMap (RPT d () (Vector (Embed v d x)))
-> (Int
    -> RPT d () (Vector (Embed v d x))
    -> RPT d () (Vector (Embed v d x)))
-> IntMap (RPT d () (Vector (Embed v d x)))
forall a b c. (a -> b -> c) -> b -> a -> c
flip (Int
 -> RPT d () (Vector (Embed v d x))
 -> RPT d () (Vector (Embed v d x)))
-> IntMap (RPT d () (Vector (Embed v d x)))
-> IntMap (RPT d () (Vector (Embed v d x)))
forall a b. (Int -> a -> b) -> IntMap a -> IntMap b
IM.mapWithKey IntMap (RPT d () (Vector (Embed v d x)))
tacc ((Int
  -> RPT d () (Vector (Embed v d x))
  -> RPT d () (Vector (Embed v d x)))
 -> IntMap (RPT d () (Vector (Embed v d x))))
-> (Int
    -> RPT d () (Vector (Embed v d x))
    -> RPT d () (Vector (Embed v d x)))
-> IntMap (RPT d () (Vector (Embed v d x)))
forall a b. (a -> b) -> a -> b
$ \ !Int
i !RPT d () (Vector (Embed v d x))
t -> case Int -> IntMap (v1 (u d)) -> Maybe (v1 (u d))
forall a. Int -> IntMap a -> Maybe a
IM.lookup Int
i IntMap (v1 (u d))
rvss of
                                      Just !v1 (u d)
rvs -> Int
-> Int
-> v1 (u d)
-> RPT d () (Vector (Embed v d x))
-> Vector (Embed v d x)
-> RPT d () (Vector (Embed v d x))
forall (v1 :: * -> *) (u :: * -> *) d (v :: * -> *) x.
(Vector v1 (u d), Ord d, Inner u v, Unbox d, Fractional d) =>
Int
-> Int
-> v1 (u d)
-> RPT d () (Vector (Embed v d x))
-> Vector (Embed v d x)
-> RPT d () (Vector (Embed v d x))
insert Int
maxd Int
minl v1 (u d)
rvs RPT d () (Vector (Embed v d x))
t Vector (Embed v d x)
xs
                                      Maybe (v1 (u d))
_        -> RPT d () (Vector (Embed v d x))
t

{-# SCC insert #-}
insert :: (VG.Vector v1 (u d), Ord d, Inner u v, VU.Unbox d, Fractional d) =>
          Int -- ^ max tree depth
       -> Int -- ^ min leaf size
       -> v1 (u d) -- ^ projection vectors
       -> RPT d () (V.Vector (Embed v d x)) -- ^ accumulator
       -> V.Vector (Embed v d x) -- ^ data chunk
       -> RPT d () (V.Vector (Embed v d x))
insert :: Int
-> Int
-> v1 (u d)
-> RPT d () (Vector (Embed v d x))
-> Vector (Embed v d x)
-> RPT d () (Vector (Embed v d x))
insert Int
maxDepth Int
minLeaf v1 (u d)
rvs = Int
-> RPT d () (Vector (Embed v d x))
-> Vector (Embed v d x)
-> RPT d () (Vector (Embed v d x))
loop Int
0
  where
    z :: RPT d () (Vector (Embed v d x))
z = () -> Vector (Embed v d x) -> RPT d () (Vector (Embed v d x))
forall d l a. l -> a -> RPT d l a
Tip () Vector (Embed v d x)
forall a. Monoid a => a
mempty
    loop :: Int
-> RPT d () (Vector (Embed v d x))
-> Vector (Embed v d x)
-> RPT d () (Vector (Embed v d x))
loop Int
ixLev !RPT d () (Vector (Embed v d x))
tt Vector (Embed v d x)
xs =
      let
        r :: u d
r = v1 (u d)
rvs v1 (u d) -> Int -> u d
forall (v :: * -> *) a. Vector v a => v a -> Int -> a
VG.! Int
ixLev -- proj vector for current level
      in
        case RPT d () (Vector (Embed v d x))
tt of

          b :: RPT d () (Vector (Embed v d x))
b@(Bin ()
_ d
thr0 Margin d
margin0 RPT d () (Vector (Embed v d x))
tl0 RPT d () (Vector (Embed v d x))
tr0) ->
            if Int
ixLev Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
maxDepth
              then RPT d () (Vector (Embed v d x))
b -- return current subtree
              else
              case u d
-> Vector (Embed v d x)
-> Maybe (d, Margin d, Vector (Embed v d x), Vector (Embed v d x))
forall a (u :: * -> *) (v :: * -> *) x.
(Ord a, Inner u v, Unbox a, Fractional a) =>
u a
-> Vector (Embed v a x)
-> Maybe (a, Margin a, Vector (Embed v a x), Vector (Embed v a x))
partitionAtMedian u d
r Vector (Embed v d x)
xs of
                Maybe (d, Margin d, Vector (Embed v d x), Vector (Embed v d x))
Nothing -> () -> Vector (Embed v d x) -> RPT d () (Vector (Embed v d x))
forall d l a. l -> a -> RPT d l a
Tip () Vector (Embed v d x)
forall a. Monoid a => a
mempty
                Just (d
thr, Margin d
margin, Vector (Embed v d x)
ll, Vector (Embed v d x)
rr) -> ()
-> d
-> Margin d
-> RPT d () (Vector (Embed v d x))
-> RPT d () (Vector (Embed v d x))
-> RPT d () (Vector (Embed v d x))
forall d l a.
l -> d -> Margin d -> RPT d l a -> RPT d l a -> RPT d l a
Bin () d
thr' Margin d
margin' RPT d () (Vector (Embed v d x))
tl RPT d () (Vector (Embed v d x))
tr
                  where
                    margin' :: Margin d
margin' = Margin d
margin0 Margin d -> Margin d -> Margin d
forall a. Semigroup a => a -> a -> a
<> Margin d
margin
                    thr' :: d
thr' = (d
thr0 d -> d -> d
forall a. Num a => a -> a -> a
+ d
thr) d -> d -> d
forall a. Fractional a => a -> a -> a
/ d
2
                    tl :: RPT d () (Vector (Embed v d x))
tl = Int
-> RPT d () (Vector (Embed v d x))
-> Vector (Embed v d x)
-> RPT d () (Vector (Embed v d x))
loop (Int
ixLev Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) RPT d () (Vector (Embed v d x))
tl0 Vector (Embed v d x)
ll
                    tr :: RPT d () (Vector (Embed v d x))
tr = Int
-> RPT d () (Vector (Embed v d x))
-> Vector (Embed v d x)
-> RPT d () (Vector (Embed v d x))
loop (Int
ixLev Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) RPT d () (Vector (Embed v d x))
tr0 Vector (Embed v d x)
rr

          Tip ()
_ Vector (Embed v d x)
xs0 -> do
            let xs' :: Vector (Embed v d x)
xs' = Vector (Embed v d x)
xs Vector (Embed v d x)
-> Vector (Embed v d x) -> Vector (Embed v d x)
forall a. Semigroup a => a -> a -> a
<> Vector (Embed v d x)
xs0
            if Int
ixLev Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
maxDepth Bool -> Bool -> Bool
|| Vector (Embed v d x) -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Vector (Embed v d x)
xs' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
minLeaf
              then () -> Vector (Embed v d x) -> RPT d () (Vector (Embed v d x))
forall d l a. l -> a -> RPT d l a
Tip () Vector (Embed v d x)
xs' -- concat data in leaf
              else
              case u d
-> Vector (Embed v d x)
-> Maybe (d, Margin d, Vector (Embed v d x), Vector (Embed v d x))
forall a (u :: * -> *) (v :: * -> *) x.
(Ord a, Inner u v, Unbox a, Fractional a) =>
u a
-> Vector (Embed v a x)
-> Maybe (a, Margin a, Vector (Embed v a x), Vector (Embed v a x))
partitionAtMedian u d
r Vector (Embed v d x)
xs' of
                Maybe (d, Margin d, Vector (Embed v d x), Vector (Embed v d x))
Nothing -> () -> Vector (Embed v d x) -> RPT d () (Vector (Embed v d x))
forall d l a. l -> a -> RPT d l a
Tip () Vector (Embed v d x)
forall a. Monoid a => a
mempty
                Just (d
thr, Margin d
margin, Vector (Embed v d x)
ll, Vector (Embed v d x)
rr) -> ()
-> d
-> Margin d
-> RPT d () (Vector (Embed v d x))
-> RPT d () (Vector (Embed v d x))
-> RPT d () (Vector (Embed v d x))
forall d l a.
l -> d -> Margin d -> RPT d l a -> RPT d l a -> RPT d l a
Bin () d
thr Margin d
margin RPT d () (Vector (Embed v d x))
tl RPT d () (Vector (Embed v d x))
tr
                  where
                    tl :: RPT d () (Vector (Embed v d x))
tl = Int
-> RPT d () (Vector (Embed v d x))
-> Vector (Embed v d x)
-> RPT d () (Vector (Embed v d x))
loop (Int
ixLev Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) RPT d () (Vector (Embed v d x))
forall d. RPT d () (Vector (Embed v d x))
z Vector (Embed v d x)
ll
                    tr :: RPT d () (Vector (Embed v d x))
tr = Int
-> RPT d () (Vector (Embed v d x))
-> Vector (Embed v d x)
-> RPT d () (Vector (Embed v d x))
loop (Int
ixLev Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) RPT d () (Vector (Embed v d x))
forall d. RPT d () (Vector (Embed v d x))
z Vector (Embed v d x)
rr




-- | Aggregate the input stream in chunks of a given size (semantics of 'C.chunksOf'), and fold over the resulting stream building up an accumulator structure (e.g. a tree)
chunkedAccum :: (Monad m) =>
                Int -- ^ chunk size
             -> t -- ^ initial accumulator state
             -> (t -> V.Vector a -> t)
             -> C.ConduitT a o m t
chunkedAccum :: Int -> t -> (t -> Vector a -> t) -> ConduitT a o m t
chunkedAccum Int
n t
z t -> Vector a -> t
f = Int -> ConduitT a [a] m ()
forall (m :: * -> *) a. Monad m => Int -> ConduitT a [a] m ()
C.chunksOf Int
n ConduitT a [a] m () -> ConduitM [a] o m t -> ConduitT a o m t
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
                     ([a] -> Vector a) -> ConduitT [a] (Vector a) m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
C.map [a] -> Vector a
forall a. [a] -> Vector a
V.fromList ConduitT [a] (Vector a) m ()
-> ConduitM (Vector a) o m t -> ConduitM [a] o m t
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
                     (t -> Vector a -> t) -> t -> ConduitM (Vector a) o m t
forall (m :: * -> *) a b o.
Monad m =>
(a -> b -> a) -> a -> ConduitT b o m a
C.foldl t -> Vector a -> t
f t
z

-- | Source of random data points
dataSource :: (Monad m) =>
              Int -- ^ number of vectors to generate
           -> GenT m a -- ^ random generator for the vector components
           -> C.ConduitT i a (GenT m) ()
dataSource :: Int -> GenT m a -> ConduitT i a (GenT m) ()
dataSource Int
n GenT m a
gg = ((Int -> GenT m (Maybe (a, Int)))
 -> Int -> ConduitT i a (GenT m) ())
-> Int
-> (Int -> GenT m (Maybe (a, Int)))
-> ConduitT i a (GenT m) ()
forall a b c. (a -> b -> c) -> b -> a -> c
flip (Int -> GenT m (Maybe (a, Int))) -> Int -> ConduitT i a (GenT m) ()
forall (m :: * -> *) b a i.
Monad m =>
(b -> m (Maybe (a, b))) -> b -> ConduitT i a m ()
C.unfoldM Int
0 ((Int -> GenT m (Maybe (a, Int))) -> ConduitT i a (GenT m) ())
-> (Int -> GenT m (Maybe (a, Int))) -> ConduitT i a (GenT m) ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
  if Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
n
    then Maybe (a, Int) -> GenT m (Maybe (a, Int))
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe (a, Int)
forall a. Maybe a
Nothing
    else do
      a
x <- GenT m a
gg
      Maybe (a, Int) -> GenT m (Maybe (a, Int))
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Maybe (a, Int) -> GenT m (Maybe (a, Int)))
-> Maybe (a, Int) -> GenT m (Maybe (a, Int))
forall a b. (a -> b) -> a -> b
$ (a, Int) -> Maybe (a, Int)
forall a. a -> Maybe a
Just (a
x, Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)




-- -- sinks

-- tree' :: (Monad m, Inner SVector v) =>
--         Word64 -- ^ random seed
--      -> Int -- ^ max tree depth
--      -> Int -- ^ min leaf size
--      -> Int -- ^ data chunk size
--      -> Double -- ^ nonzero density of projection vectors
--      -> Int -- ^ dimension of projection vectors
--       -> C.ConduitT (v Double) o m (RPTree Double (V.Vector (v Double)))
-- tree' seed maxDepth minLeaf n pnz dim = do
--   let
--     rvs = sample seed $ V.replicateM maxDepth (sparse pnz dim stdNormal)
--   t <- insertC maxDepth minLeaf n rvs
--   pure $ RPTree rvs t

-- forest' :: (Monad m, Inner SVector v) =>
--            Word64 -- ^ random seed
--         -> Int -- ^ max tree depth
--         -> Int -- ^ min leaf size
--         -> Int -- ^ number of trees
--         -> Int -- ^ data chunk size
--         -> Double -- ^ nonzero density of projection vectors
--         -> Int -- ^ dimension of projection vectors
--         -> C.ConduitT (v Double) o m (IM.IntMap (RPTree Double (V.Vector (v Double))))
-- forest' seed maxd minl ntrees chunksize pnz dim = do
--   let
--     rvss = sample seed $ do
--       rvs <- replicateM ntrees $ V.replicateM maxd (sparse pnz dim stdNormal)
--       pure $ IM.fromList $ zip [0 .. ] rvs
--   ts <- insertMultiC maxd minl chunksize rvss
--   pure $ IM.intersectionWith RPTree rvss ts