module SmartGroup (groupAll, groupNum, groupLog) where import Data.Heap as Heap import Data.Set as Set import Data.Map as Map import Data.Monoid import Data.Ord data StringL = StringL {str :: String, count :: Int} deriving (Eq, Show) instance Ord StringL where compare (StringL i a) (StringL x b) = compare a b `mappend` compare i x data SizeMap = Unsplittable (Set String) | Splittable (Map StringL (Set String)) deriving (Eq, Show) instance Ord SizeMap where compare (Splittable a) (Splittable b) = compare (Map.size a) (Map.size b) compare (Unsplittable a) (Unsplittable b) = compare a b compare (Unsplittable _) (Splittable _) = LT compare (Splittable _) (Unsplittable _) = GT type WordAssoc = MaxHeap SizeMap toSet (Unsplittable x) = x toSet (Splittable x) = Set.unions (Map.elems x) intLog :: Int -> Int intLog = truncate . logBase 2 . fromIntegral groupWith :: (Int -> WordAssoc -> WordAssoc) -> Int -> [String] -> [[String]] groupWith f i = mkList . f i . mkAssoc . mkMap -- | Divide list into as many groups as possible groupAll :: Int -> [String] -> [[String]] groupAll = groupWith $ \i x-> let cycleSplit n = case splitIt i n of (Just a) -> cycleSplit a Nothing -> n in cycleSplit x -- | Divide list into about n different groups groupNum :: Int -> Int -> [String] -> [[String]] groupNum i = groupWith $ \x-> let splitTimes n m = if Heap.size m >= n then m else case splitIt x m of (Just a) -> splitTimes n a Nothing -> m in splitTimes i -- | Divide list into groups such that the amount of groups -- equals the log of the number of elements groupLog :: Int -> [String] -> [[String]] groupLog i s = groupNum (intLog (length s)) i s mkMap :: [String] -> Map String (Set String) mkMap = foldl (\m x-> foldl (\m' i-> if length i > 3 then Map.alter (Just . maybe (Set.singleton x) (Set.insert x)) i m' else m') m (words x)) Map.empty mkAssoc :: Map String (Set String) -> MaxHeap SizeMap mkAssoc m = Heap.singleton . Splittable . Map.mapKeys (\k-> StringL k (Set.size (m Map.! k))) $ m splitIt :: Int -> WordAssoc -> Maybe WordAssoc splitIt i s = case Heap.view s of (Just ((Unsplittable _),_)) -> Nothing (Just ((Splittable x),xs)) -> let (Just (as,b)) = Map.maxView x x1 = sizeMap i $ flip Map.mapMaybe b $ \n-> let m = Set.difference n as in if Set.null m then Nothing else Just m x2 = sizeMap i $ flip Map.mapMaybe b $ \n-> let m = Set.intersection n as in if Set.null m then Nothing else Just m x3 = as Set.\\ (Set.unions (Map.elems b)) in Just $ (if Set.null x3 then id else Heap.insert (Unsplittable x3)) $ Heap.insert x1 $ Heap.insert x2 xs sizeMap :: Int -> (Map StringL (Set String)) -> SizeMap sizeMap i m = case Map.findMax m of (StringL _ c,_) -> if c >= i then Splittable m else Unsplittable (Set.unions (Map.elems m)) mkList :: WordAssoc -> [[String]] mkList = Prelude.map (Set.toList . toSet) . Heap.toList