{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE TypeFamilies #-}
module HaskellWorks.Data.EliasFano
( EliasFano(..)
, fromWord64s
, toWord64s
, empty
, size
) where
import Control.DeepSeq
import Data.Word
import GHC.Generics
import HaskellWorks.Data.AtIndex hiding (end)
import HaskellWorks.Data.Bits.BitWise
import HaskellWorks.Data.Bits.Log2
import HaskellWorks.Data.EliasFano.Internal
import HaskellWorks.Data.Positioning
import HaskellWorks.Data.RankSelect.Base.Select1
import HaskellWorks.Data.RankSelect.CsPoppy
import Prelude hiding (length, take)
import qualified Data.Vector.Storable as DVS
import qualified HaskellWorks.Data.PackedVector.PackedVector64 as PV
data EliasFano = EliasFano
{ efBucketBits :: !CsPoppy
, efLoSegments :: !PV.PackedVector64
, efLoBitCount :: !Count
, efCount :: !Count
} deriving (Eq, Show, Generic)
instance NFData EliasFano
size :: EliasFano -> Count
size = efCount
empty :: EliasFano
empty = EliasFano
{ efBucketBits = makeCsPoppy DVS.empty
, efLoSegments = PV.empty
, efLoBitCount = 0
, efCount = 0
}
fromWord64s :: [Word64] -> EliasFano
fromWord64s ws = case foldCountAndLast ws of
(Just end', _) -> EliasFano
{ efBucketBits = makeCsPoppy . DVS.fromList $ hiSegmentToWords his
, efLoSegments = PV.fromList loBits' los
, efLoBitCount = loBits'
, efCount = length'
}
where length' = length ws
loBits' = fromIntegral (log2 (end' `divup` length')) :: Count
hiMask = maxBound .<. loBits' :: Word64
loMask = comp hiMask :: Word64
his = (.>. loBits') . (.&. hiMask) <$> ws
los = (.&. loMask) <$> ws
(Nothing, _) -> empty
toWord64s :: EliasFano -> [Word64]
toWord64s ef = uncurry combine <$> zip (bucketBitsToHiSegment bucketBits) (PV.toList (efLoSegments ef))
where combine hi lo = (hi .<. efLoBitCount ef) .|. lo
bucketBits = bucketWordsToBucketBools (efCount ef) (csPoppyBits (efBucketBits ef))
instance Container EliasFano where
type Elem EliasFano = Word64
instance Length EliasFano where
length = efCount
{-# INLINE length #-}
instance AtIndex EliasFano where
(!!!) = atIndex
atIndex ef i = atIndex (efLoSegments ef) i .|. ((select1 (efBucketBits ef) j - j) .<. efLoBitCount ef)
where j = fromIntegral i + 1
{-# INLINE (!!!) #-}
{-# INLINE atIndex #-}