Data/HashSet.hs

{-# LANGUAGE CPP #-}
{-# OPTIONS -Wall #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  Data.HashSet
-- Copyright   :  (c) Milan Straka 2010
-- License     :  BSD-style
-- Maintainer  :  fox@ucw.cz
-- Stability   :  provisional
-- Portability :  portable
--
-- Persistent 'HashSet', which is defined as
--
-- @
--   data 'HashSet' e = 'Data.IntMap.IntMap' ('Data.Set.Set' e)
-- @
--
-- is an 'Data.IntMap.IntMap' indexed by hash values of elements,
-- containing a set @'Data.Set.Set' e@ with elements of the same hash values.
--
-- The interface of a 'HashSet' is a suitable subset of 'Data.IntSet.IntSet'.
--
-- The complexity of operations is determined by the complexities of
-- 'Data.IntMap.IntMap' and 'Data.Set.Set' operations. See the sources of
-- 'HashSet' to see which operations from @containers@ package are used.
-----------------------------------------------------------------------------

module Data.HashSet ( HashSet

                    -- * Operators
                    , (\\)

                    -- * Query
                    , null
                    , size
                    , member
                    , notMember
                    , isSubsetOf
                    , isProperSubsetOf

                    -- * Construction
                    , empty
                    , singleton
                    , insert
                    , delete

                    -- * Combine
                    , union
                    , unions
                    , difference
                    , intersection

                    -- * Filter
                    , filter
                    , partition

                    -- * Map
                    , map

                    -- * Fold
                    , fold

                    -- * Conversion
                    , elems
                    , toList
                    , fromList
                    ) where

import Prelude hiding (lookup,map,filter,null)

import Data.Hashable
import Data.List (foldl')
import Data.Monoid (Monoid(..))
import Data.Typeable

#if __GLASGOW_HASKELL__
import Text.Read
import Data.Data (Data(..), mkNoRepType)
#endif

import qualified Data.IntMap as I
import qualified Data.Set as S


{--------------------------------------------------------------------
  Operators
--------------------------------------------------------------------}

-- | Same as 'difference'.
(\\) :: Ord a => HashSet a -> HashSet a -> HashSet a
s1 \\ s2 = difference s1 s2


{--------------------------------------------------------------------
  Types
--------------------------------------------------------------------}

-- | The abstract type of a @HashSet@. Its interface is a suitable
-- subset of 'Data.IntSet.IntSet'.
newtype HashSet a = HashSet (I.IntMap (S.Set a)) deriving (Eq, Ord)

instance Ord a => Monoid (HashSet a) where
  mempty  = empty
  mappend = union
  mconcat = unions

instance Show a => Show (HashSet a) where
  showsPrec d m   = showParen (d > 10) $
    showString "fromList " . shows (toList m)

instance (Hashable a, Ord a, Read a) => Read (HashSet a) where
#ifdef __GLASGOW_HASKELL__
  readPrec = parens $ prec 10 $ do
    Ident "fromList" <- lexP
    xs <- readPrec
    return (fromList xs)

  readListPrec = readListPrecDefault
#else
  readsPrec p = readParen (p > 10) $ \ r -> do
    ("fromList",s) <- lex r
    (xs,t) <- reads s
    return (fromList xs,t)
#endif

#include "Typeable.h"
INSTANCE_TYPEABLE1(HashSet,hashSetTc,"HashSet")


#if __GLASGOW_HASKELL__
{--------------------------------------------------------------------
  A Data instance
--------------------------------------------------------------------}

-- This instance preserves data abstraction at the cost of inefficiency.
-- We omit reflection services for the sake of data abstraction.

instance (Hashable a, Ord a, Data a) => Data (HashSet a) where
  gfoldl f z m = z fromList `f` (toList m)
  toConstr _   = error "toConstr"
  gunfold _ _  = error "gunfold"
  dataTypeOf _ = mkNoRepType "Data.HashSet.HashSet"
  dataCast1 f  = gcast1 f
#endif


{--------------------------------------------------------------------
  Query
--------------------------------------------------------------------}
-- | Is the set empty?
null :: HashSet a -> Bool
null (HashSet s) = I.null s

-- | Number of elements in the set.
size :: HashSet a -> Int
size (HashSet s) = I.fold ((+) . S.size) 0 s

-- | Is the element a member of the set?
member :: (Hashable a, Ord a) => a -> HashSet a -> Bool
member a (HashSet s) =
  case I.lookup (hash a) s of
    Nothing -> False
    Just s' -> S.member a s'

-- | Is the element not a member of the set?
notMember :: (Hashable a, Ord a) => a -> HashSet a -> Bool
notMember k s = not $ member k s

-- | Is this a subset?
isSubsetOf :: Ord a => HashSet a -> HashSet a -> Bool
isSubsetOf (HashSet s1) (HashSet s2) =
  I.isSubmapOfBy (S.isSubsetOf) s1 s2

-- | Is this a proper subset? (ie. a subset but not equal).
isProperSubsetOf :: Ord a => HashSet a -> HashSet a -> Bool
isProperSubsetOf s1 s2 = isSubsetOf s1 s2 && size s1 < size s2


{--------------------------------------------------------------------
  Construction
--------------------------------------------------------------------}
-- | The empty set.
empty :: HashSet a
empty = HashSet I.empty

-- | A set of one element.
singleton :: Hashable a => a -> HashSet a
singleton a = HashSet $
  I.singleton (hash a) $ S.singleton a

-- | Add a value to the set. When the value is already an element of the set,
-- it is replaced by the new one, ie. 'insert' is left-biased.
insert :: (Hashable a, Ord a) => a -> HashSet a -> HashSet a
insert a (HashSet s) = HashSet $
  I.insertWith (\_ -> S.insert a) (hash a) (S.singleton a) s


nonempty :: S.Set a -> Maybe (S.Set a)
nonempty m | S.null m  = Nothing
           | otherwise = Just m

-- | Delete a value in the set. Returns the original set when the value was not
-- present.
delete :: (Hashable a, Ord a) => a -> HashSet a -> HashSet a
delete a (HashSet s) = HashSet $
  I.update (nonempty . S.delete a) (hash a) s


{--------------------------------------------------------------------
  Combine
--------------------------------------------------------------------}

-- | The union of two sets.
union :: Ord a => HashSet a -> HashSet a -> HashSet a
union (HashSet s1) (HashSet s2) = HashSet $ I.unionWith S.union s1 s2

-- | The union of a list of sets.
unions :: Ord a => [HashSet a] -> HashSet a
unions xs = foldl' union empty xs

-- | Difference between two sets.
difference :: Ord a => HashSet a -> HashSet a -> HashSet a
difference (HashSet s1) (HashSet s2) = HashSet $
  I.differenceWith (\t1 t2 -> nonempty $ S.difference t1 t2) s1 s2

delete_empty :: I.IntMap (S.Set a) -> I.IntMap (S.Set a)
delete_empty = I.filter (not . S.null)

-- | The intersection of two sets.
intersection :: Ord a => HashSet a -> HashSet a -> HashSet a
intersection (HashSet s1) (HashSet s2) = HashSet $ delete_empty $
  I.intersectionWith S.intersection s1 s2


{--------------------------------------------------------------------
  Filter
--------------------------------------------------------------------}
-- | Filter all elements that satisfy some predicate.
filter :: Ord a => (a -> Bool) -> HashSet a -> HashSet a
filter p (HashSet s) = HashSet $
  I.mapMaybe (nonempty . S.filter p) s

-- | Partition the set according to some predicate. The first set contains all
-- elements that satisfy the predicate, the second all elements that fail the
-- predicate.
partition :: Ord a => (a -> Bool) -> HashSet a -> (HashSet a, HashSet a)
partition p s = (filter p s, filter (not . p) s)


{--------------------------------------------------------------------
  Map
--------------------------------------------------------------------}
-- | @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
--
-- It's worth noting that the size of the result may be smaller if, for some
-- @(x,y)@, @x /= y && f x == f y@
map :: (Hashable b, Ord b) => (a -> b) -> HashSet a -> HashSet b
map f = fromList . fold ((:) . f) []


{--------------------------------------------------------------------
  Fold
--------------------------------------------------------------------}
-- | Fold over the elements of a set in an unspecified order.
fold :: (a -> b -> b) -> b -> HashSet a -> b
fold f z (HashSet s) = I.fold (flip $ S.fold f) z s


{--------------------------------------------------------------------
  Conversions
--------------------------------------------------------------------}
-- | The elements of a set. (For sets, this is equivalent to toList).
elems :: HashSet a -> [a]
elems = toList

-- | Convert the set to a list of elements.
toList :: HashSet a -> [a]
toList (HashSet s) = I.fold ((++) . S.toList) [] s

-- | Create a set from a list of elements.
fromList :: (Hashable a, Ord a) => [a] -> HashSet a
fromList xs = foldl' (flip insert) empty xs