module DenseIntSet
(
DenseIntSet,
foldable,
topValueIndices,
filteredIndices,
invert,
intersections,
unions,
capacity,
population,
lookup,
presentElementsVector,
indexVector,
filterVector,
presentElementsUnfoldr,
absentElementsUnfoldr,
vectorElementsUnfoldr,
)
where
import DenseIntSet.Prelude hiding (intersection, union, lookup)
import qualified DeferredFolds.Unfoldr as Unfoldr
import qualified Data.Vector as Vector
import qualified Data.Vector.Unboxed as UnboxedVector
import qualified Data.Vector.Generic as GenericVector
import qualified Data.Vector.Generic.Mutable as MutableGenericVector
import qualified Data.Vector.Algorithms.Intro as IntroVectorAlgorithm
data DenseIntSet = DenseIntSet {-# UNPACK #-} !Int {-# UNPACK #-} !(UnboxedVector Word64)
deriving instance Eq DenseIntSet
deriving instance Ord DenseIntSet
instance Show DenseIntSet where
show = show . toList
deriving instance Generic DenseIntSet
instance Serialize DenseIntSet
instance Hashable DenseIntSet where
hashWithSalt salt (DenseIntSet capacity vec) = hashWithSalt (hashWithSalt salt capacity) (GenericVector.toList vec)
instance IsList DenseIntSet where
type Item DenseIntSet = Int
fromList list = foldable (succ (foldl' max 0 list)) list
toList = toList . presentElementsUnfoldr
foldable :: Foldable foldable => Int -> foldable Int -> DenseIntSet
foldable capacity foldable = let
!wordsAmount = divCeiling capacity 64
in DenseIntSet capacity $ runST $ do
indexSetMVec <- MutableGenericVector.new wordsAmount
forM_ foldable $ \ index -> let
(wordIndex, bitIndex) = divMod index 64
in MutableGenericVector.modify indexSetMVec (flip setBit bitIndex) wordIndex
GenericVector.unsafeFreeze indexSetMVec
intersections :: [DenseIntSet] -> DenseIntSet
intersections list = if null list
then DenseIntSet 0 mempty
else let
cap = foldl1' min (fmap capacity list)
vecs = fmap (\ (DenseIntSet _ vec) -> vec) list
in compositions (.&.) maxBound cap vecs
unions :: [DenseIntSet] -> DenseIntSet
unions list = let
cap = foldl' max 0 (fmap capacity list)
vecs = fmap (\ (DenseIntSet _ vec) -> vec) list
in compositions (.|.) 0 cap vecs
compositions :: (Word64 -> Word64 -> Word64) -> Word64 -> Int -> [UnboxedVector Word64] -> DenseIntSet
compositions append empty capacity vecs = DenseIntSet capacity $ runST $ let
wordsAmount = divCeiling capacity 64
wordIndexUnfoldr = Unfoldr.intsInRange 0 (pred wordsAmount)
vecVec = Vector.fromList vecs
vecUnfoldr = Unfoldr.foldable vecVec
wordUnfoldrAt wordIndex = vecUnfoldr >>= Unfoldr.foldable . flip (GenericVector.!?) wordIndex
finalWordAt = foldr append empty . wordUnfoldrAt
in do
indexSetMVec <- MutableGenericVector.new wordsAmount
forM_ wordIndexUnfoldr $ \ index -> MutableGenericVector.unsafeWrite indexSetMVec index (finalWordAt index)
GenericVector.unsafeFreeze indexSetMVec
topValueIndices :: (GenericVector.Vector vector a, GenericVector.Vector vector (a, Int)) => (a -> a -> Ordering) -> Int -> vector a -> DenseIntSet
topValueIndices compare amount valueVec = let
valuesAmount = GenericVector.length valueVec
limitedAmount = min amount valuesAmount
wordsAmount = divCeiling valuesAmount 64
in runST $ do
pairMVec <- GenericVector.unsafeThaw (GenericVector.imap (\ index count -> (count, index)) valueVec)
IntroVectorAlgorithm.selectBy (\ a b -> compare (fst b) (fst a)) pairMVec limitedAmount
indexSetMVec <- MutableGenericVector.new wordsAmount
forM_ (Unfoldr.intsInRange 0 (pred limitedAmount)) $ \ pairIndex -> do
(_, index) <- MutableGenericVector.unsafeRead pairMVec pairIndex
let (wordIndex, bitIndex) = divMod index 64
MutableGenericVector.modify indexSetMVec (flip setBit bitIndex) wordIndex
DenseIntSet valuesAmount <$> GenericVector.unsafeFreeze indexSetMVec
filteredIndices :: GenericVector.Vector vector a => (a -> Bool) -> vector a -> DenseIntSet
filteredIndices predicate valueVec = let
valuesAmount = GenericVector.length valueVec
wordsAmount = divCeiling valuesAmount 64
indexUnfoldr = do
(index, a) <- Unfoldr.vectorWithIndices valueVec
guard (predicate a)
return (divMod index 64)
in DenseIntSet valuesAmount $ runST $ do
indexSetMVec <- MutableGenericVector.new wordsAmount
forM_ indexUnfoldr $ \ (wordIndex, bitIndex) -> MutableGenericVector.modify indexSetMVec (flip setBit bitIndex) wordIndex
GenericVector.unsafeFreeze indexSetMVec
invert :: DenseIntSet -> DenseIntSet
invert (DenseIntSet capacity vec) = DenseIntSet capacity $ let
invertedVec = GenericVector.map complement vec
(lastWordIndex, claimedBitsOfLastWord) = divMod capacity 64
unclaimedBitsOfLastWord = 64 - claimedBitsOfLastWord
in if claimedBitsOfLastWord == 0
then invertedVec
else runST $ do
mv <- GenericVector.unsafeThaw invertedVec
flip (MutableGenericVector.modify mv) lastWordIndex $ flip shiftR unclaimedBitsOfLastWord . flip shiftL unclaimedBitsOfLastWord
GenericVector.unsafeFreeze mv
capacity :: DenseIntSet -> Int
capacity (DenseIntSet x _) = x
population :: DenseIntSet -> Int
population (DenseIntSet _ vec) = getSum (foldMap (Sum . popCount) (Unfoldr.vector vec))
lookup :: Int -> DenseIntSet -> Bool
lookup index (DenseIntSet _ vec) = let
(wordIndex, bitIndex) = divMod index 64
in vec GenericVector.!? wordIndex & maybe False (flip testBit bitIndex)
presentElementsVector :: GenericVector.Vector vector element => DenseIntSet -> (Int -> element) -> vector element
presentElementsVector intSet intToIndex = let
vectorPop = population intSet
unfoldr = Unfoldr.zipWithIndex (presentElementsUnfoldr intSet)
in runST $ do
mv <- MutableGenericVector.unsafeNew vectorPop
forM_ unfoldr $ \ (index, element) -> MutableGenericVector.unsafeWrite mv index (intToIndex element)
GenericVector.unsafeFreeze mv
indexVector :: GenericVector.Vector vector (Maybe index) => DenseIntSet -> (Int -> index) -> vector (Maybe index)
indexVector set@(DenseIntSet capacity setVec) intToIndex = runST $ do
v <- MutableGenericVector.replicate capacity Nothing
forM_ (Unfoldr.zipWithIndex (presentElementsUnfoldr set)) $ \ (index, element) -> do
MutableGenericVector.unsafeWrite v element (Just (intToIndex index))
GenericVector.unsafeFreeze v
filterVector :: GenericVector.Vector vector a => DenseIntSet -> vector a -> vector a
filterVector set vector = let
!newVectorPop = population set
in runST $ do
newVector <- MutableGenericVector.unsafeNew newVectorPop
forM_ (Unfoldr.zipWithIndex (vectorElementsUnfoldr vector set)) $ \ (newIndex, a) -> do
MutableGenericVector.unsafeWrite newVector newIndex a
GenericVector.unsafeFreeze newVector
presentElementsUnfoldr :: DenseIntSet -> Unfoldr Int
presentElementsUnfoldr (DenseIntSet capacity vec) = do
(wordIndex, word) <- Unfoldr.vectorWithIndices vec
bitIndex <- Unfoldr.setBitIndices word
return (wordIndex * 64 + bitIndex)
absentElementsUnfoldr :: DenseIntSet -> Unfoldr Int
absentElementsUnfoldr (DenseIntSet capacity vec) = let
isLastWordIndex = let
!maxWordIndex = pred (divCeiling capacity 64)
in \ wordIndex -> wordIndex == maxWordIndex
in do
(wordIndex, word) <- Unfoldr.vectorWithIndices vec
let
wordElemIndex = wordIndex * 64
in if isLastWordIndex wordIndex
then let
!bitsLeft = capacity - wordElemIndex
predicate bitIndex = bitIndex < bitsLeft
in fmap (wordElemIndex +) (Unfoldr.takeWhile predicate (Unfoldr.unsetBitIndices word))
else fmap (wordElemIndex +) (Unfoldr.unsetBitIndices word)
vectorElementsUnfoldr :: (GenericVector.Vector vector a) => vector a -> DenseIntSet -> Unfoldr a
vectorElementsUnfoldr vec = fmap (vec GenericVector.!) . presentElementsUnfoldr