-- {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TemplateHaskell #-} -- {-# LANGUAGE TupleSections #-} -- | Create clusters of non-overlapping things. module Agda.Utils.Cluster ( cluster , cluster' , tests ) where import Control.Monad -- An imperative union-find library: import Data.Equivalence.Monad import Data.Char import Data.Functor import qualified Data.IntMap as IntMap import Data.List import Test.QuickCheck import Test.QuickCheck.All import Test.QuickCheck.Function import Text.Show.Functions -- | Characteristic identifiers. type C = Int -- | Given a function @f :: a -> (C,[C])@ which returns a non-empty list of -- characteristics @C@ of @a@, partition a list of @a@s into groups -- such that each element in a group shares at least one characteristic -- with at least one other element of the group. cluster :: (a -> (C,[C])) -> [a] -> [[a]] cluster f as = cluster' $ map (\ a -> (a, f a)) as -- | Partition a list of @a@s paired with a non-empty list of -- characteristics $C$ into groups -- such that each element in a group shares at least one characteristic -- with at least one other element of the group. cluster' :: [(a,(C,[C]))] -> [[a]] cluster' acs = runEquivM id const $ do -- Construct the equivalence classes of characteristics. forM_ acs $ \ (_,(c,cs)) -> equateAll $ c:cs -- Pair each element with its class. cas <- forM acs $ \ (a,(c,_)) -> (`IntMap.singleton` [a]) <$> classDesc c -- Create a map from class to elements. let m = IntMap.unionsWith (++) cas -- Return the values of the map return $ IntMap.elems m ------------------------------------------------------------------------ -- * Properties ------------------------------------------------------------------------ -- instance Show (Int -> (C, [C])) where -- show f = "function " ++ show (map (\ x -> (x, f x)) [-10..10]) -- Fundamental properties: soundness and completeness -- | Not too many clusters. (Algorithm equated all it could.) -- -- Each element in a cluster shares a characteristic with at least one -- other element in the same cluster. prop_cluster_complete :: Fun Int (C, [C]) -> [Int] -> Bool prop_cluster_complete (Fun _ f) as = (`all` cluster f as) $ \ cl -> (`all` cl) $ \ a -> let csa = uncurry (:) $ f a in let cl' = delete a cl in -- Either a is the single element of the cluster, or it shares a characteristic c -- with some other element b of the same cluster. null cl' || not (null [ (b,c) | b <- cl', c <- uncurry (:) (f b), c `elem` csa ]) -- | Not too few clusters. (Algorithm did not equate too much.) -- -- Elements of different clusters share no characteristics. prop_cluster_sound :: Fun Int (C, [C]) -> [Int] -> Bool prop_cluster_sound (Fun _ f) as = (`all` [ (c, d) | let cs = cluster f as, c <- cs, d <- cs, c /= d]) $ \ (c, d) -> (`all` c) $ \ a -> (`all` d) $ \ b -> null $ (uncurry (:) $ f a) `intersect` (uncurry (:) $ f b) neToList :: (a, [a]) -> [a] neToList = uncurry (:) isSingleton, exactlyTwo, atLeastTwo :: [a] -> Bool isSingleton x = length x == 1 exactlyTwo x = length x == 2 atLeastTwo x = length x >= 2 prop_cluster_empty :: Bool prop_cluster_empty = null (cluster (const (0,[])) []) prop_cluster_permutation :: Fun Int (C, [C]) -> [Int] -> Bool prop_cluster_permutation (Fun _ f) as = sort as == sort (concat (cluster f as)) prop_cluster_single :: a -> [a] -> Bool prop_cluster_single a as = isSingleton $ cluster (const (0,[])) $ (a:as) prop_cluster_idem :: Fun a (C, [C]) -> a -> [a] -> Bool prop_cluster_idem (Fun _ f) a as = isSingleton $ cluster f $ head $ cluster f (a:as) prop_two_clusters :: [Int] -> Bool prop_two_clusters as = atLeastTwo $ cluster (\ x -> (x, [x])) (-1:1:as) -- | An example. -- -- "anabel" is related to "babel" (common letter 'a' in 2-letter prefix) -- which is related to "bond" (common letter 'b'). -- -- "hurz", "furz", and "kurz" are all related (common letter 'u'). test :: [[String]] test = cluster (\ (x:y:_) -> (ord x,[ord y])) ["anabel","bond","babel","hurz","furz","kurz"] prop_test :: Bool prop_test = test == [["anabel","bond","babel"],["hurz","furz","kurz"]] -- | Modified example (considering only the first letter). test1 :: [[String]] test1 = cluster (\ (x:_:_) -> (ord x,[])) ["anabel","bond","babel","hurz","furz","kurz"] prop_test1 :: Bool prop_test1 = test1 == [["anabel"],["bond","babel"],["furz"],["hurz"],["kurz"]] ------------------------------------------------------------------------ -- * All tests ------------------------------------------------------------------------ -- Template Haskell hack to make the following $quickCheckAll work -- under ghc-7.8. return [] -- KEEP! tests :: IO Bool tests = do putStrLn "Agda.Utils.Cluster" $quickCheckAll