```
-- | Efficient combinatorial algorithms over multisets, including
--   generating all permutations, partitions, subsets, cycles, and
--   other combinatorial structures based on multisets.  Note that an
--   'Eq' or 'Ord' instance on the elements is /not/ required; the
--   algorithms are careful to keep track of which things are (by
--   construction) equal to which other things, so equality testing is
--   not needed.
module Math.Combinatorics.Multiset
( -- * The 'Multiset' type

Count
, Multiset(..)
, emptyMS, singletonMS
, consMS, (+:)

-- ** Conversions
, toList
, fromList
, fromListEq
, fromDistinctList
, fromCounts
, getCounts
, size

-- ** Operations
, disjUnion
, disjUnions

-- * Permutations

, permutations
, permutationsRLE

-- * Partitions

, Vec
, vPartitions
, partitions

-- * Submultisets

, splits
, kSubsets

-- * Cycles

, cycles

-- * Miscellaneous

, sequenceMS

) where

import Data.List (group, sort)
import Control.Arrow (first, second, (&&&), (***))
import Data.Maybe (catMaybes)

type Count = Int

-- | A multiset is represented as a list of (element, count) pairs.
--   We maintain the invariants that the counts are always positive,
--   and no element ever appears more than once.
newtype Multiset a = MS { toCounts :: [(a, Count)] }
deriving (Show)

-- | Construct a 'Multiset' from a list of (element, count) pairs.
--   Precondition: the counts must all be positive, and there must not
--   be any duplicate elements.
fromCounts :: [(a, Count)] -> Multiset a
fromCounts = MS

-- | Extract just the element counts from a multiset, forgetting the
--   elements.
getCounts :: Multiset a -> [Count]
getCounts (MS xs) = map snd xs

-- | Compute the total size of a multiset.
size :: Multiset a -> Int
size = sum . getCounts

liftMS :: ([(a, Count)] -> [(b, Count)]) -> Multiset a -> Multiset b
liftMS f (MS m) = MS (f m)

-- | A multiset with no values in it.
emptyMS :: Multiset a
emptyMS = MS []

-- | Create a multiset with only a single value in it.
singletonMS :: a -> Multiset a
singletonMS a = MS [(a,1)]

-- | Add an element with multiplicity to a multiset.  Precondition:
--   the new element is distinct from all elements already in the
--   multiset.
consMS :: (a, Count) -> Multiset a -> Multiset a
consMS e@(_,c) (MS m)
| c > 0     = MS (e:m)
| otherwise = MS m

-- | A convenient shorthand for 'consMS'.
(+:) :: (a, Count) -> Multiset a -> Multiset a
(+:) = consMS

instance Functor Multiset where
fmap f = fromCounts . (map . first \$ f) . toCounts

-- | Convert a multiset to a list.
toList :: Multiset a -> [a]
toList = expandCounts . toCounts

expandCounts :: [(a, Count)] -> [a]
expandCounts = concatMap (uncurry (flip replicate))

-- | Efficiently convert a list to a multiset, given an 'Ord' instance
--   for the elements.  This method is provided just for convenience.
--   you can also use 'fromListEq' with only an 'Eq' instance, or
--   construct 'Multiset's directly using 'fromCounts'.
fromList :: Ord a => [a] -> Multiset a
fromList = fromCounts . map (head &&& length) . group . sort

-- | Convert a list to a multiset, given an 'Eq' instance for the
--   elements.
fromListEq :: Eq a => [a] -> Multiset a
fromListEq = fromCounts . fromListEq'
where fromListEq' []     = []
fromListEq' (x:xs) = (x, 1 + count x xs) : fromListEq' (filter (/=x) xs)
count x = length . filter (==x)

-- | Make a multiset with one copy of each element from a list of
--   distinct elements.
fromDistinctList :: [a] -> Multiset a
fromDistinctList = fromCounts . map (\x -> (x,1))

-- | Form the disjoint union of two multisets; i.e. we assume the two
--   multisets share no elements in common.
disjUnion :: Multiset a -> Multiset a -> Multiset a
disjUnion (MS xs) (MS ys) = MS (xs ++ ys)

-- | Form the disjoint union of a collection of multisets.  We assume
--   that the multisets all have distinct elements.
disjUnions :: [Multiset a] -> Multiset a
disjUnions = foldr disjUnion (MS [])

-- | In order to generate permutations of a multiset, we need to keep
--   track of the most recently used element in the permutation being
--   built, so that we don't use it again immediately.  The
--   'RMultiset' type (for \"restricted multiset\") records this
--   information, consisting of a multiset possibly paired with an
--   element (with multiplicity) which is also part of the multiset,
--   but should not be used at the beginning of permutations.
data RMultiset a = RMS (Maybe (a, Count)) [(a,Count)]
deriving Show

-- | Convert a 'Multiset' to a 'RMultiset' (with no avoided element).
toRMS :: Multiset a -> RMultiset a
toRMS = RMS Nothing . toCounts

-- | Convert a 'RMultiset' to a 'Multiset'.
fromRMS :: RMultiset a -> Multiset a
fromRMS (RMS Nothing m)  = MS m
fromRMS (RMS (Just e) m) = MS (e:m)

-- | List all the distinct permutations of the elements of a
--   multiset.
--
--   For example, @permutations (fromList \"abb\") ==
--   [\"abb\",\"bba\",\"bab\"]@, whereas @Data.List.permutations
--   \"abb\" == [\"abb\",\"bab\",\"bba\",\"bba\",\"bab\",\"abb\"]@.
--   This function is equivalent to, but /much/ more efficient than,
--   @nub . Data.List.permutations@, and even works when the elements
--   have no 'Eq' instance.
--
--   Note that this is a specialized version of 'permutationsRLE',
--   where each run has been expanded via 'replicate'.
permutations :: Multiset a -> [[a]]
permutations = map expandCounts . permutationsRLE

-- | List all the distinct permutations of the elements of a multiset,
--   with each permutation run-length encoded. (Note that the
--   run-length encoding is a natural byproduct of the algorithm used,
--   not a separate postprocessing step.)
--
--   For example, @permutationsRLE [('a',1), ('b',2)] ==
--   [[('a',1),('b',2)],[('b',2),('a',1)],[('b',1),('a',1),('b',1)]]@.
--
--   (Note that although the output type is newtype-equivalent to
--   @[Multiset a]@, we don't call it that since the output may
--   violate the 'Multiset' invariant that no element should appear
--   more than once.  And indeed, morally this function does not
--   output multisets at all.)
permutationsRLE :: Multiset a -> [[(a,Count)]]
permutationsRLE (MS []) = [[]]
permutationsRLE m       = permutationsRLE' (toRMS m)

-- | List all the (run-length encoded) distinct permutations of the
--   elements of a multiset which do not start with the element to
--   avoid (if any).
permutationsRLE' :: RMultiset a -> [[(a,Count)]]

-- If only one element is left, there's only one permutation.
permutationsRLE' (RMS Nothing [(x,n)]) = [[(x,n)]]

-- Otherwise, select an element+multiplicity in all possible ways, and
-- concatenate the elements to all possible permutations of the
-- remaining multiset.
permutationsRLE' m  = [ e : p
| (e, m') <- selectRMS m
, p       <- permutationsRLE' m'
]

-- | Select an element + multiplicity from a multiset in all possible
--   ways, appropriately keeping track of elements to avoid at the
--   start of permutations.
selectRMS :: RMultiset a -> [((a, Count), RMultiset a)]

-- No elements to select.
selectRMS (RMS _ [])            = []

-- Selecting from a multiset with n copies of x, avoiding e:
selectRMS (RMS e ((x,n) : ms))  =

-- If we select all n copies of x, there are no copies of x left to avoid;
-- stick e (if it exists) back into the remaining multiset.
((x,n), RMS Nothing (maybe ms (:ms) e)) :

-- We can also select any number of copies of x from (n-1) down to 1; in each case,
-- we avoid the remaining copies of x and put e back into the returned multiset.
[ ( (x,k), RMS (Just (x,n-k))
(maybe ms (:ms) e) )
| k <- [n-1, n-2 .. 1]
] ++

-- Finally, we can recursively choose something other than x.
map (second (consRMS (x,n))) (selectRMS (RMS e ms))

consRMS :: (a, Count) -> RMultiset a -> RMultiset a
consRMS x (RMS e m) = RMS e (x:m)

-- Some QuickCheck properties.  Of course, due to combinatorial
-- explosion these are of limited utility!
-- newtype ArbCount = ArbCount Int
--   deriving (Eq, Show, Num, Real, Enum, Ord, Integral)

-- instance Arbitrary Count where
--   arbitrary = elements (map ArbCount [1..3])

-- prop_perms_distinct :: Multiset Char ArbCount -> Bool
-- prop_perms_distinct m = length ps == length (nub ps)
--   where ps = permutations m

-- prop_perms_are_perms :: Multiset Char ArbCount -> Bool
-- prop_perms_are_perms m = all ((==l') . sort) (permutations m)
--   where l' = sort (toList m)

---------------------
-- Partitions
---------------------

-- | Element count vector.
type Vec = [Count]

-- | Componentwise comparison of count vectors.
(<|=) :: Vec -> Vec -> Bool
xs <|= ys = and \$ zipWith (<=) xs ys

-- | 'vZero v' produces a zero vector of the same length as @v@.
vZero :: Vec -> Vec
vZero = map (const 0)

-- | Test for the zero vector.
vIsZero :: Vec -> Bool
vIsZero = all (==0)

-- | Do vector arithmetic componentwise.
(.+.), (.-.) :: Vec -> Vec -> Vec
(.+.) = zipWith (+)
(.-.) = zipWith (-)

-- | Multiply a count vector by a scalar.
(*.) :: Count -> Vec -> Vec
(*.) n = map (n*)

-- | 'v1 `vDiv` v2' is the largest scalar multiple of 'v2' which is
--   elementwise less than or equal to 'v1'.
vDiv :: Vec -> Vec -> Count
vDiv v1 v2 = minimum . catMaybes \$ zipWith zdiv v1 v2
where zdiv _ 0 = Nothing
zdiv x y = Just \$ x `div` y

-- | 'vInc within v' lexicographically increments 'v' with respect to
--   'within'.  For example, @vInc [2,3,5] [1,3,4] == [1,3,5]@, and
--   @vInc [2,3,5] [1,3,5] == [2,0,0]@.
vInc :: Vec -> Vec -> Vec
vInc lim v = reverse (vInc' (reverse lim) (reverse v))
where vInc' _ []          = []
vInc' [] (x:xs)     = x+1 : xs
vInc' (l:ls) (x:xs) | x < l     = x+1 : xs
| otherwise = 0 : vInc' ls xs

-- | Generate all vector partitions, representing each partition as a
--   multiset of vectors.
--
--   This code is a slight generalization of the code published in
--
--     Brent Yorgey. \"Generating Multiset Partitions\". In: The
--
--   See that article for a detailed discussion of the code and how it works.
vPartitions :: Vec -> [Multiset Vec]
vPartitions v = vPart v (vZero v) where
vPart v _ | vIsZero v = [MS []]
vPart v vL
| v <= vL   = []
| otherwise = MS [(v,1)]
: [ (v',k) +: p' | v' <- withinFromTo v (vHalf v) (vInc v vL)
, k  <- [1 .. (v `vDiv` v')]
, p' <- vPart (v .-. (k *. v')) v' ]

-- | 'vHalf v' computes the \"lexicographic half\" of 'v', that is,
--   the vector which is the middle element (biased towards the end)
--   in a lexicographically decreasing list of all the vectors
--   elementwise no greater than 'v'.
vHalf :: Vec -> Vec
vHalf [] = []
vHalf (x:xs) | (even x) = (x `div` 2) : vHalf xs
| otherwise = (x `div` 2) : xs

downFrom n = [n,(n-1)..0]

-- | 'within m' generates a lexicographically decreasing list of
--   vectors elementwise no greater than 'm'.
within :: Vec -> [Vec]
within = sequence . map downFrom

-- | Clip one vector against another.
clip :: Vec -> Vec -> Vec
clip = zipWith min

-- | 'withinFromTo m s e' efficiently generates a lexicographically
--   decreasing list of vectors which are elementwise no greater than
--   'm' and lexicographically between 's' and 'e'.
withinFromTo :: Vec -> Vec -> Vec -> [Vec]
withinFromTo m s e | not (s <|= m) = withinFromTo m (clip m s) e
withinFromTo m s e | e > s = []
withinFromTo m s e = wFT m s e True True
where
wFT [] _ _ _ _ = [[]]
wFT (m:ms) (s:ss) (e:es) useS useE =
let start = if useS then s else m
end   = if useE then e else 0
in
[x:xs | x <- [start,(start-1)..end],
let useS' = useS && x==s,
let useE' = useE && x==e,
xs <- wFT ms ss es useS' useE' ]

-- | Efficiently generate all distinct multiset partitions.  Note that
--   each partition is represented as a multiset of parts (each of
--   which is a multiset) in order to properly reflect the fact that
--   some parts may occur multiple times.
partitions :: Multiset a -> [Multiset (Multiset a)]
partitions (MS []) = [MS []]
partitions (MS m)  = (map . fmap) (combine elts) \$ vPartitions counts
where (elts, counts) = unzip m
combine es cs  = MS . filter ((/=0) . snd) \$ zip es cs

-- | Generate all splittings of a multiset into two submultisets,
--   i.e. all size-two partitions.
splits :: Multiset a -> [(Multiset a, Multiset a)]
splits (MS [])        = [(MS [], MS [])]
splits (MS ((x,n):m)) =
for [0..n] \$ \k ->

-- | Generate all size-k submultisets.
kSubsets :: Count -> Multiset a -> [Multiset a]
kSubsets 0 _              = [MS []]
kSubsets _ (MS [])        = []
kSubsets k (MS ((x,n):m)) =
for [0 .. min k n] \$ \j ->
map (addElt x j) (kSubsets (k - j) (MS m))

for = flip concatMap

addElt x k = ((x,k) +:)

-- | Generate all distinct cycles, aka necklaces, with elements taken
--   from a multiset.  See J. Sawada, \"A fast algorithm to generate
--   necklaces with fixed content\", J. Theor. Comput. Sci. 301 (2003)
--   pp. 477-489.
--
--   Given the ordering on the elements of the multiset based on their
--   position in the multiset representation (with \"smaller\"
--   elements first), in @map reverse (cycles m)@, each generated
--   cycle is lexicographically smallest among all its cyclic shifts,
--   and furthermore, the cycles occur in reverse lexicographic
--   order. (It's simply more convenient/efficient to generate the
--   cycles reversed in this way, and of course we get the same set of
--   cycles either way.)
--
--   For example, @cycles (fromList \"aabbc\") ==
--   [\"cabba\",\"bcaba\",\"cbaba\",\"bbcaa\",\"bcbaa\",\"cbbaa\"]@.
cycles :: Multiset a -> [[a]]
cycles (MS [])         = []   -- no such thing as an empty cycle
cycles m@(MS ((x1,n1):xs))
| n1 == 1    = (cycles' n 2 1 [(0,x1)] (reverse \$ zip [1..] xs))
| otherwise =  (cycles' n 2 1 [(0,x1)] (reverse \$ zip [0..] ((x1,n1-1):xs)))
where n = sum . getCounts \$ m

-- | The first parameter is the length of the necklaces being
--   generated.  The second parameter @p@ is the length of the longest
--   prefix of @pre@ which is a Lyndon word, i.e. an aperiodic
--   necklace.  @pre@ is the current (reversed) prefix of the
--   necklaces being generated.
cycles' :: Int -> Int -> Int -> [(Int, a)] -> [(Int, (a,Count))] -> [[a]]
cycles' n t p pre [] | n `mod` p == 0 = [map snd pre]
| otherwise      = []

cycles' n t p pre xs =
(takeWhile ((>=atp) . fst) xs) >>= \(j, (xj,nj)) ->
cycles' n (t+1) (if j == atp then p else t)
((j,xj):pre)
(remove j xs)
where atp = fst \$ pre !! (p - 1)

remove j [] = []
remove j (x@(j',(xj,nj)):xs)
| j == j' && nj == 1 = xs
| j == j'            = (j',(xj,nj-1)):xs
| otherwise          = x:remove j xs

-- | Take a multiset of lists, and select one element from each list
--   in every possible combination to form a list of multisets.  We
--   assume that all the list elements are distinct.
sequenceMS :: Multiset [a] -> [Multiset a]
sequenceMS = map disjUnions
. sequence
. map (\(xs, n) -> kSubsets n (MS \$ uncollate (xs, n)))
. toCounts

uncollate :: ([a], Count) -> [(a, Count)]
uncollate (xs, n) = map (\x -> (x,n)) xs

```