-- | Internal module whose primary exports are 'uniformPartition' -- and 'uniformPartitionThin'. Import @RandomCycle.List@ instead. module RandomCycle.List.Partition where import Control.Monad (guard) import Data.Bits import GHC.Natural (Natural) import System.Random.Stateful {- UTILITIES -} -- | Internal. Version of @Data.List.'span'@ that uses the supplied bits @bs@ -- as a grouping variable. 'switch` flips the booleans, so that the input bit's -- least significant digit determines the grouping. Note the case @bs == 0@ is -- not handled specially here, since termination is guaranteed whenever 'xs' is -- finite. Compare to @RandomCycle.Vector.Partitions.'commonSubseqBits'@. -- -- Note this would be simpler to implement with `countTrailingZeros`, but that -- would limit the input list to some length, e.g. 64 if using @Word@, which is -- too restrictive. spanBits :: (Bool -> Bool) -> Natural -> [a] -> ([a], (Natural, [a])) spanBits _ bs xs@[] = (xs, (bs, xs)) spanBits switch bs (x : xs) | switch (bs `testBit` 0) = let (zs, (bs', zzs)) = spanBits switch (bs `shiftR` 1) xs in (x : zs, (bs', zzs)) | otherwise = ([], (bs, x : xs)) -- | Utility to generate a list partition using the provided 'Natural' -- as grouping variable, viewed as 'Bits'. The choice of grouping variable is to -- improve performance since the number of partitions grows exponentially in the -- input list length. -- -- This can be used to generate a list of all possible partitions of the input list -- as shown in the example. See 'RandomCycle.Vector.partitionFromBits' for other examples. -- -- >>> import GHC.Natural -- >>> allPartitions n | n < 0 = [] -- >>> allPartitions n = map (`partitionFromBits` [0..n-1]) [0 .. 2^(n-1) - 1] -- >>> allPartitions 4 -- [[[0,1,2,3]],[[0],[1,2,3]],[[0],[1],[2,3]],[[0,1],[2,3]],[[0,1],[2],[3]],[[0],[1],[2],[3 -- ]],[[0],[1,2],[3]],[[0,1,2],[3]]] partitionFromBits :: Natural -> [a] -> [[a]] partitionFromBits _ [] = [] partitionFromBits bs xs = -- NOTE: Grouping is determined by the first bit. This is important for -- correctness of grouping based on spanBits implementation, but also to -- ensure uniformPartition is uniform over 2^(n-1) partitions. let switch = if bs `testBit` 0 then id else not (ys, (bs', yss)) = spanBits switch bs xs in ys : partitionFromBits bs' yss -- | Primarily a testing utility, to compute directly the lengths of each -- partition element for a list of size 'n', using 'countTrailingZeros'. Note -- this uses 'Word'. partitionLengths :: Word -> Int -> [Int] partitionLengths bs = op bs (countTrailingZeros bs) where op b 0 m = let b' = complement b in op b' (countTrailingZeros b') m op b z m = if z > m then [m | m > 0] else let b' = b `shiftR` z in z : op b' (countTrailingZeros b') (m - z) {- PARTITIONING WITH THINNING -} -- | Internal. Partition a list as determined by bits @bs@, but shortcircuit if -- the local condition 'r' is false for some partition element. partitionFromBitsThin :: ([a] -> Bool) -> Natural -> [a] -> Maybe [[a]] partitionFromBitsThin _ _ [] = Just [] partitionFromBitsThin r bs xs = let ps = partitionFromBits bs xs in guard (all r ps) >> pure ps -- | Internal. Inner logic of 'uniformPartitionThin' that carries around -- the input list length to avoid recomputation. It is the callers job to -- ensure @n == length xs@. uniformPartitionThinN :: (StatefulGen g m) => Int -> Int -> ([a] -> Bool) -> [a] -> g -> m (Maybe [[a]]) uniformPartitionThinN maxit _ _ _ _ | maxit <= 0 = pure Nothing uniformPartitionThinN maxit n r xs g = do bs <- uniformRM (0, 2 ^ n - 1) g case partitionFromBitsThin r bs xs of Nothing -> uniformPartitionThinN (maxit - 1) n r xs g Just ys -> pure $ Just ys {- RANDOM -} -- | Draw a random partition of the input list 'xs' from the uniform -- distribution on partitions. This proceeds by randomizing the placement of -- each breakpoint, in other words by walking a random path in a perfect binary -- tree. /O(n)/ for a vector length /n/. -- -- This function preserves the order of the input list. -- -- ==== __Examples__ -- -- >>> import System.Random.Stateful -- >>> pureGen = mkStdGen 0 -- >>> runStateGen_ pureGen $ uniformPartition [1..5::Int] -- [[1,2,3],[4],[5]] -- >>> runStateGen_ pureGen $ uniformPartition ([] :: [Int]) -- [] uniformPartition :: (StatefulGen g m) => [a] -> g -> m [[a]] uniformPartition xs g = do let d = length xs -- Drawing w.p. 1/2^d, but first bit determines grouping. bs <- uniformRM (0, 2 ^ d - 1) g pure $ partitionFromBits bs xs -- TODO: be more precise in the statement below about exponential growth in the -- length condition case. -- | Generate a partition with a local condition @r@ on each partition element. -- Construction of a partition shortcircuits to failure as soon as the local -- condition is false. -- -- Since this is a rejection sampling method, the user is asked to provide -- a counter for the maximum number of sampling attempts in order to guarantee -- termination in cases where the edge predicate has probability of success close -- to zero. -- -- Run time on average is /O(n\/p)/ where /p/ is the probability @all r yss -- == True@ for a uniformly generated partition @yss@, assuming @r@ has run -- time linear in the length of its argument. This can be highly non-linear -- because /p/ in general is a function of /n/. -- -- Some cases can perhaps be deceptively expensive: For example, the condition @r = ((>= -- 2) . length)@ leads to huge runtimes, since the number of partitions with at -- least one element of length 1 is exponential in /n/. -- -- ==== __Examples__ -- -- >>> import System.Random.Stateful -- >>> maxit = 1000 -- >>> pureGen = mkStdGen 0 -- >>> r = (>= 2) . length -- >>> runStateGen_ pureGen $ uniformPartitionThin maxit r [1..5::Int] -- Just [[1,2],[3, 4, 5]] -- >>> runStateGen_ pureGen $ uniformPartitionThin maxit (const False) ([] :: [Int]) -- Just [] -- >>> runStateGen_ pureGen $ uniformPartitionThin maxit r [1::Int] -- Nothing uniformPartitionThin :: (StatefulGen g m) => Int -> ([a] -> Bool) -> [a] -> g -> m (Maybe [[a]]) uniformPartitionThin maxit r xs = uniformPartitionThinN maxit (length xs) r xs