module DenseIntSet
(
DenseIntSet,
foldable,
topValueIndices,
filteredIndices,
intersection,
union,
size,
lookup,
presentElementsVector,
indexVector,
filterVector,
presentElementsUnfoldr,
absentElementsUnfoldr,
vectorElementsUnfoldr,
DenseIntSetComposition,
compose,
composeList,
)
where
import DenseIntSet.Prelude hiding (intersection, union, lookup)
import qualified DeferredFolds.Unfoldr as Unfoldr
import qualified Data.Vector as Vector
import qualified Data.Vector.Unboxed as UnboxedVector
import qualified Data.Vector.Generic as GenericVector
import qualified Data.Vector.Generic.Mutable as MutableGenericVector
import qualified Data.Vector.Algorithms.Intro as IntroVectorAlgorithm
newtype DenseIntSet = DenseIntSet (UnboxedVector Word64)
deriving instance Eq DenseIntSet
deriving instance Ord DenseIntSet
instance Show DenseIntSet where
show = show . toList
deriving instance Serialize DenseIntSet
instance Hashable DenseIntSet where
hashWithSalt salt (DenseIntSet vec) = hashWithSalt salt (GenericVector.toList vec)
instance IsList DenseIntSet where
type Item DenseIntSet = Int
fromList list = foldable (foldl' max 0 list) list
toList = toList . presentElementsUnfoldr
foldable :: Foldable foldable => Int -> foldable Int -> DenseIntSet
foldable size foldable = let
!wordsAmount = divCeiling size 64
in DenseIntSet $ runST $ do
indexSetMVec <- MutableGenericVector.new wordsAmount
forM_ foldable $ \ index -> let
(wordIndex, bitIndex) = divMod index 64
in MutableGenericVector.modify indexSetMVec (flip setBit bitIndex) wordIndex
GenericVector.unsafeFreeze indexSetMVec
intersection :: DenseIntSetComposition -> DenseIntSet
intersection = zipWords (.&.) maxBound
union :: DenseIntSetComposition -> DenseIntSet
union = zipWords (.|.) 0
zipWords :: (Word64 -> Word64 -> Word64) -> Word64 -> DenseIntSetComposition -> DenseIntSet
zipWords append empty (DenseIntSetComposition minLength vecs) = DenseIntSet $ runST $ let
wordIndexUnfoldr = Unfoldr.intsInRange 0 (pred minLength)
vecVec = Vector.fromList vecs
vecUnfoldr = Unfoldr.foldable vecVec
wordUnfoldrAt wordIndex = fmap (flip GenericVector.unsafeIndex wordIndex) vecUnfoldr
finalWordAt = foldr append empty . wordUnfoldrAt
in do
indexSetMVec <- MutableGenericVector.new minLength
forM_ wordIndexUnfoldr $ \ index -> MutableGenericVector.unsafeWrite indexSetMVec index (finalWordAt index)
GenericVector.unsafeFreeze indexSetMVec
topValueIndices :: (GenericVector.Vector vector a, GenericVector.Vector vector (a, Int)) => (a -> a -> Ordering) -> Int -> vector a -> DenseIntSet
topValueIndices compare amount valueVec = let
valuesAmount = GenericVector.length valueVec
limitedAmount = min amount valuesAmount
wordsAmount = divCeiling valuesAmount 64
in runST $ do
pairMVec <- GenericVector.unsafeThaw (GenericVector.imap (\ index count -> (count, index)) valueVec)
IntroVectorAlgorithm.selectBy (\ a b -> compare (fst b) (fst a)) pairMVec limitedAmount
indexSetMVec <- MutableGenericVector.new wordsAmount
forM_ (Unfoldr.intsInRange 0 (pred limitedAmount)) $ \ pairIndex -> do
(_, index) <- MutableGenericVector.unsafeRead pairMVec pairIndex
let (wordIndex, bitIndex) = divMod index 64
MutableGenericVector.modify indexSetMVec (flip setBit bitIndex) wordIndex
DenseIntSet <$> GenericVector.unsafeFreeze indexSetMVec
filteredIndices :: GenericVector.Vector vector a => (a -> Bool) -> vector a -> DenseIntSet
filteredIndices predicate valueVec = DenseIntSet $ let
valuesAmount = GenericVector.length valueVec
wordsAmount = divCeiling valuesAmount 64
indexUnfoldr = do
(index, a) <- Unfoldr.vectorWithIndices valueVec
guard (predicate a)
return (divMod index 64)
in runST $ do
indexSetMVec <- MutableGenericVector.new wordsAmount
forM_ indexUnfoldr $ \ (wordIndex, bitIndex) -> MutableGenericVector.modify indexSetMVec (flip setBit bitIndex) wordIndex
GenericVector.unsafeFreeze indexSetMVec
size :: DenseIntSet -> Int
size (DenseIntSet vec) = getSum (foldMap (Sum . popCount) (Unfoldr.vector vec))
lookup :: Int -> DenseIntSet -> Bool
lookup index (DenseIntSet vec) = let
(wordIndex, bitIndex) = divMod index 64
in vec GenericVector.!? wordIndex & maybe False (flip testBit bitIndex)
presentElementsVector :: GenericVector.Vector vector element => DenseIntSet -> (Int -> element) -> vector element
presentElementsVector intSet intToIndex = let
sizeVal = size intSet
unfoldr = Unfoldr.zipWithIndex (presentElementsUnfoldr intSet)
in runST $ do
mv <- MutableGenericVector.unsafeNew sizeVal
forM_ unfoldr $ \ (index, element) -> MutableGenericVector.unsafeWrite mv index (intToIndex element)
GenericVector.unsafeFreeze mv
indexVector :: GenericVector.Vector vector (Maybe index) => DenseIntSet -> (Int -> index) -> vector (Maybe index)
indexVector set@(DenseIntSet setVec) intToIndex = let
indexVecSize = GenericVector.length setVec * 64
in runST $ do
v <- MutableGenericVector.replicate indexVecSize Nothing
forM_ (Unfoldr.zipWithIndex (presentElementsUnfoldr set)) $ \ (index, element) -> do
MutableGenericVector.unsafeWrite v element (Just (intToIndex index))
GenericVector.unsafeFreeze v
filterVector :: GenericVector.Vector vector a => DenseIntSet -> vector a -> vector a
filterVector set vector = let
!newVectorSize = size set
in runST $ do
newVector <- MutableGenericVector.unsafeNew newVectorSize
forM_ (Unfoldr.zipWithIndex (vectorElementsUnfoldr vector set)) $ \ (newIndex, a) -> do
MutableGenericVector.unsafeWrite newVector newIndex a
GenericVector.unsafeFreeze newVector
presentElementsUnfoldr :: DenseIntSet -> Unfoldr Int
presentElementsUnfoldr (DenseIntSet vec) = do
(wordIndex, word) <- Unfoldr.vectorWithIndices vec
bitIndex <- Unfoldr.setBitIndices word
return (wordIndex * 64 + bitIndex)
absentElementsUnfoldr :: DenseIntSet -> Unfoldr Int
absentElementsUnfoldr (DenseIntSet vec) = do
(wordIndex, word) <- Unfoldr.vectorWithIndices vec
bitIndex <- Unfoldr.unsetBitIndices word
return (wordIndex * 64 + bitIndex)
vectorElementsUnfoldr :: (GenericVector.Vector vector a) => vector a -> DenseIntSet -> Unfoldr a
vectorElementsUnfoldr vec = fmap (vec GenericVector.!) . presentElementsUnfoldr
data DenseIntSetComposition = DenseIntSetComposition !Int [UnboxedVector Word64]
instance Semigroup DenseIntSetComposition where
(<>) (DenseIntSetComposition leftMinLength leftVecs) =
if null leftVecs
then id
else \ (DenseIntSetComposition rightMinLength rightVecs) -> if null rightVecs
then DenseIntSetComposition leftMinLength leftVecs
else DenseIntSetComposition (min leftMinLength rightMinLength) (leftVecs <> rightVecs)
instance Monoid DenseIntSetComposition where
mempty = DenseIntSetComposition 0 []
mappend = (<>)
compose :: DenseIntSet -> DenseIntSetComposition
compose (DenseIntSet vec) = DenseIntSetComposition (UnboxedVector.length vec) (pure vec)
composeList :: [DenseIntSet] -> DenseIntSetComposition
composeList list = if null list
then mempty
else let
unboxedVec = fmap (\ (DenseIntSet x) -> x) list
in DenseIntSetComposition (foldr1 min (fmap UnboxedVector.length unboxedVec)) unboxedVec