{-# LANGUAGE CPP #-} {-# OPTIONS -Wall #-} ----------------------------------------------------------------------------- -- | -- Module : Data.HashSet -- Copyright : (c) Milan Straka 2010 -- License : BSD-style -- Maintainer : fox@ucw.cz -- Stability : provisional -- Portability : portable -- -- Persistent 'HashSet', which is defined as -- -- @ -- data 'HashSet' e = 'Data.IntMap.IntMap' ('Data.Set.Set' e) -- @ -- -- is an 'Data.IntMap.IntMap' indexed by hash values of elements, -- containing a set @'Data.Set.Set' e@ with elements of the same hash values. -- -- The interface of a 'HashSet' is a suitable subset of 'Data.IntSet.IntSet'. -- -- The complexity of operations is determined by the complexities of -- 'Data.IntMap.IntMap' and 'Data.Set.Set' operations. See the sources of -- 'HashSet' to see which operations from @containers@ package are used. ----------------------------------------------------------------------------- module Data.HashSet ( HashSet -- * Operators , (\\) -- * Query , null , size , member , notMember , isSubsetOf , isProperSubsetOf -- * Construction , empty , singleton , insert , delete -- * Combine , union , unions , difference , intersection -- * Filter , filter , partition -- * Map , map -- * Fold , fold -- * Conversion , elems , toList , fromList ) where import Prelude hiding (lookup,map,filter,null) import Data.Hashable import Data.List (foldl') import Data.Monoid (Monoid(..)) import Data.Typeable #if __GLASGOW_HASKELL__ import Text.Read import Data.Data (Data(..), mkNoRepType) #endif import qualified Data.IntMap as I import qualified Data.Set as S {-------------------------------------------------------------------- Operators --------------------------------------------------------------------} -- | Same as 'difference'. (\\) :: Ord a => HashSet a -> HashSet a -> HashSet a s1 \\ s2 = difference s1 s2 {-------------------------------------------------------------------- Types --------------------------------------------------------------------} data Some a = Only !a | More !(S.Set a) deriving (Eq, Ord) -- | The abstract type of a @HashSet@. Its interface is a suitable -- subset of 'Data.IntSet.IntSet'. newtype HashSet a = HashSet (I.IntMap (Some a)) deriving (Eq, Ord) instance Ord a => Monoid (HashSet a) where mempty = empty mappend = union mconcat = unions instance Show a => Show (HashSet a) where showsPrec d m = showParen (d > 10) $ showString "fromList " . shows (toList m) instance (Hashable a, Ord a, Read a) => Read (HashSet a) where #ifdef __GLASGOW_HASKELL__ readPrec = parens $ prec 10 $ do Ident "fromList" <- lexP xs <- readPrec return (fromList xs) readListPrec = readListPrecDefault #else readsPrec p = readParen (p > 10) $ \ r -> do ("fromList",s) <- lex r (xs,t) <- reads s return (fromList xs,t) #endif #include "Typeable.h" INSTANCE_TYPEABLE1(HashSet,hashSetTc,"HashSet") #if __GLASGOW_HASKELL__ {-------------------------------------------------------------------- A Data instance --------------------------------------------------------------------} -- This instance preserves data abstraction at the cost of inefficiency. -- We omit reflection services for the sake of data abstraction. instance (Hashable a, Ord a, Data a) => Data (HashSet a) where gfoldl f z m = z fromList `f` (toList m) toConstr _ = error "toConstr" gunfold _ _ = error "gunfold" dataTypeOf _ = mkNoRepType "Data.HashSet.HashSet" dataCast1 f = gcast1 f #endif {-------------------------------------------------------------------- Comparing elements --------------------------------------------------------------------} -- For ByteStrings, doing compare is usually faster than doing (==), -- according to benchmarks. A Set is using compare naturally. We therefore -- define eq :: Ord a => a -> a -> Bool, which serves as (==). {-# INLINE eq #-} eq :: Ord a => a -> a -> Bool eq x y = x `compare` y == EQ {-------------------------------------------------------------------- Query --------------------------------------------------------------------} -- | Is the set empty? null :: HashSet a -> Bool null (HashSet s) = I.null s -- | Number of elements in the set. size :: HashSet a -> Int size (HashSet s) = I.fold ((+) . some_size) 0 s where some_size (Only _) = 1 some_size (More t) = S.size t -- | Is the element a member of the set? member :: (Hashable a, Ord a) => a -> HashSet a -> Bool member a (HashSet s) = case I.lookup (hash a) s of Nothing -> False Just (Only a') -> a `eq` a' Just (More s') -> S.member a s' -- | Is the element not a member of the set? notMember :: (Hashable a, Ord a) => a -> HashSet a -> Bool notMember k s = not $ member k s -- | Is this a subset? isSubsetOf :: Ord a => HashSet a -> HashSet a -> Bool isSubsetOf (HashSet s1) (HashSet s2) = I.isSubmapOfBy (some_isSubsetOf) s1 s2 where some_isSubsetOf (Only a) (Only b) = a `eq` b some_isSubsetOf (Only a) (More s) = a `S.member` s some_isSubsetOf (More _) (Only _) = False some_isSubsetOf (More s) (More t) = s `S.isSubsetOf` t -- | Is this a proper subset? (ie. a subset but not equal). isProperSubsetOf :: Ord a => HashSet a -> HashSet a -> Bool isProperSubsetOf s1 s2 = isSubsetOf s1 s2 && size s1 < size s2 {-------------------------------------------------------------------- Construction --------------------------------------------------------------------} -- | The empty set. empty :: HashSet a empty = HashSet I.empty -- | A set of one element. singleton :: Hashable a => a -> HashSet a singleton a = HashSet $ I.singleton (hash a) $ Only a -- | Add a value to the set. When the value is already an element of the set, -- it is replaced by the new one, ie. 'insert' is left-biased. insert :: (Hashable a, Ord a) => a -> HashSet a -> HashSet a insert a (HashSet s) = HashSet $ I.insertWith some_insert (hash a) (Only a) s where some_insert _ v@(Only b) | a `eq` b = v | otherwise = More $ S.insert a (S.singleton b) some_insert _ (More t) = More $ S.insert a t some_norm :: S.Set a -> Maybe (Some a) some_norm s = case S.size s of 0 -> Nothing 1 -> Just $ Only $ S.findMin s _ -> Just $ More $ s some_norm' :: S.Set a -> Some a some_norm' s = case S.size s of 1 -> Only $ S.findMin s _ -> More $ s -- | Delete a value in the set. Returns the original set when the value was not -- present. delete :: (Hashable a, Ord a) => a -> HashSet a -> HashSet a delete a (HashSet s) = HashSet $ I.update some_delete (hash a) s where some_delete v@(Only b) | a `eq` b = Nothing | otherwise = Just v some_delete (More t) = some_norm $ S.delete a t {-------------------------------------------------------------------- Combine --------------------------------------------------------------------} -- | The union of two sets. union :: Ord a => HashSet a -> HashSet a -> HashSet a union (HashSet s1) (HashSet s2) = HashSet $ I.unionWith some_union s1 s2 where some_union v@(Only a) (Only b) | a `eq` b = v | otherwise = More (S.singleton a `S.union` S.singleton b) some_union (Only a) (More s) = More $ S.singleton a `S.union` s some_union (More s) (Only a) = More $ s `S.union` S.singleton a some_union (More s) (More t) = More $ s `S.union` t -- | The union of a list of sets. unions :: Ord a => [HashSet a] -> HashSet a unions xs = foldl' union empty xs -- | Difference between two sets. difference :: Ord a => HashSet a -> HashSet a -> HashSet a difference (HashSet s1) (HashSet s2) = HashSet $ I.differenceWith some_diff s1 s2 where some_diff v@(Only a) (Only b) | a `eq` b = Nothing | otherwise = Just v some_diff v@(Only a) (More s) | a `S.member` s = Nothing | otherwise = Just v some_diff (More s) (Only a) = some_norm $ S.delete a s some_diff (More s) (More t) = some_norm $ s `S.difference` t -- The I.intersectionWith does not have type general enough. We need the function -- given to I.intersectionWith to have type a -> b -> Maybe c, so the elements could -- be deleted from the IntMap. As it is only a -> b -> c, we allow empty sets to be -- in the resulting intersection and delete them with a filter afterwards. This is -- the function performing the deletions. delete_empty :: I.IntMap (Some a) -> I.IntMap (Some a) delete_empty = I.filter some_empty where some_empty (Only _) = True some_empty (More s) = not $ S.null s -- | The intersection of two sets. intersection :: Ord a => HashSet a -> HashSet a -> HashSet a intersection (HashSet s1) (HashSet s2) = HashSet $ delete_empty $ I.intersectionWith some_intersection s1 s2 where some_intersection v@(Only a) (Only b) | a `eq` b = v | otherwise = More (S.empty) some_intersection v@(Only a) (More s) | a `S.member` s = v | otherwise = More (S.empty) some_intersection (More s) (Only a) | a `S.member` s = Only (S.findMin $ s `S.intersection` (S.singleton a)) | otherwise = More (S.empty) some_intersection (More s) (More t) = some_norm' $ s `S.intersection` t {-------------------------------------------------------------------- Filter --------------------------------------------------------------------} -- | Filter all elements that satisfy some predicate. filter :: Ord a => (a -> Bool) -> HashSet a -> HashSet a filter p (HashSet s) = HashSet $ I.mapMaybe some_filter s where some_filter v@(Only a) | p a = Just v | otherwise = Nothing some_filter (More t) = some_norm (S.filter p t) -- | Partition the set according to some predicate. The first set contains all -- elements that satisfy the predicate, the second all elements that fail the -- predicate. partition :: Ord a => (a -> Bool) -> HashSet a -> (HashSet a, HashSet a) partition p s = (filter p s, filter (not . p) s) {-------------------------------------------------------------------- Map --------------------------------------------------------------------} -- | @'map' f s@ is the set obtained by applying @f@ to each element of @s@. -- -- It's worth noting that the size of the result may be smaller if, for some -- @(x,y)@, @x /= y && f x == f y@ map :: (Hashable b, Ord b) => (a -> b) -> HashSet a -> HashSet b map f = fromList . fold ((:) . f) [] {-------------------------------------------------------------------- Fold --------------------------------------------------------------------} -- | Fold over the elements of a set in an unspecified order. fold :: (a -> b -> b) -> b -> HashSet a -> b fold f z (HashSet s) = I.fold some_fold z s where some_fold (Only a) x = f a x some_fold (More t) x = S.fold f x t {-------------------------------------------------------------------- Conversions --------------------------------------------------------------------} -- | The elements of a set. (For sets, this is equivalent to toList). elems :: HashSet a -> [a] elems = toList -- | Convert the set to a list of elements. toList :: HashSet a -> [a] toList (HashSet s) = I.fold some_append [] s where some_append (Only a) acc = a : acc some_append (More t) acc = S.toList t ++ acc -- | Create a set from a list of elements. fromList :: (Hashable a, Ord a) => [a] -> HashSet a fromList xs = foldl' (flip insert) empty xs