#if !defined(TESTING) && __GLASGOW_HASKELL__ >= 703
{-# LANGUAGE Safe #-}
#endif
-----------------------------------------------------------------------------
-- |
-- Module : Data.Set
-- Copyright : (c) Daan Leijen 2002
-- License : BSD-style
-- Maintainer : libraries@haskell.org
-- Stability : provisional
-- Portability : portable
--
-- An efficient implementation of sets.
--
-- Since many function names (but not the type name) clash with
-- "Prelude" names, this module is usually imported @qualified@, e.g.
--
-- > import Data.Set (Set)
-- > import qualified Data.Set as Set
--
-- The implementation of 'Set' is based on /size balanced/ binary trees (or
-- trees of /bounded balance/) as described by:
--
-- * Stephen Adams, \"/Efficient sets: a balancing act/\",
-- Journal of Functional Programming 3(4):553-562, October 1993,
-- .
--
-- * J. Nievergelt and E.M. Reingold,
-- \"/Binary search trees of bounded balance/\",
-- SIAM journal of computing 2(1), March 1973.
--
-- Note that the implementation is /left-biased/ -- the elements of a
-- first argument are always preferred to the second, for example in
-- 'union' or 'insert'. Of course, left-biasing can only be observed
-- when equality is an equivalence relation instead of structural
-- equality.
-----------------------------------------------------------------------------
-- It is crucial to the performance that the functions specialize on the Ord
-- type when possible. GHC 7.0 and higher does this by itself when it sees th
-- unfolding of a function -- that is why all public functions are marked
-- INLINABLE (that exposes the unfolding).
--
-- For other compilers and GHC pre 7.0, we mark some of the functions INLINE.
-- We mark the functions that just navigate down the tree (lookup, insert,
-- delete and similar). That navigation code gets inlined and thus specialized
-- when possible. There is a price to pay -- code growth. The code INLINED is
-- therefore only the tree navigation, all the real work (rebalancing) is not
-- INLINED by using a NOINLINE.
--
-- All methods that can be INLINE are not recursive -- a 'go' function doing
-- the real work is provided.
module Data.Set (
-- * Set type
#if !defined(TESTING)
Set -- instance Eq,Ord,Show,Read,Data,Typeable
#else
Set(..)
#endif
-- * Operators
, (\\)
-- * Query
, null
, size
, member
, notMember
, isSubsetOf
, isProperSubsetOf
-- * Construction
, empty
, singleton
, insert
, delete
-- * Combine
, union
, unions
, difference
, intersection
-- * Filter
, filter
, partition
, split
, splitMember
-- * Map
, map
, mapMonotonic
-- * Folds
, foldr
, foldl
-- ** Strict folds
, foldr'
, foldl'
-- ** Legacy folds
, fold
-- * Min\/Max
, findMin
, findMax
, deleteMin
, deleteMax
, deleteFindMin
, deleteFindMax
, maxView
, minView
-- * Conversion
-- ** List
, elems
, toList
, fromList
-- ** Ordered list
, toAscList
, fromAscList
, fromDistinctAscList
-- * Debugging
, showTree
, showTreeWith
, valid
#if defined(TESTING)
-- Internals (for testing)
, bin
, balanced
, join
, merge
#endif
) where
import Prelude hiding (filter,foldl,foldr,null,map)
import qualified Data.List as List
import Data.Monoid (Monoid(..))
import qualified Data.Foldable as Foldable
import Data.Typeable
import Control.DeepSeq (NFData(rnf))
{-
-- just for testing
import QuickCheck
import List (nub,sort)
import qualified List
-}
#if __GLASGOW_HASKELL__
import Text.Read
import Data.Data
#endif
-- Use macros to define strictness of functions.
-- STRICT_x_OF_y denotes an y-ary function strict in the x-th parameter.
-- We do not use BangPatterns, because they are not in any standard and we
-- want the compilers to be compiled by as many compilers as possible.
#define STRICT_1_OF_2(fn) fn arg _ | arg `seq` False = undefined
{--------------------------------------------------------------------
Operators
--------------------------------------------------------------------}
infixl 9 \\ --
-- | /O(n+m)/. See 'difference'.
(\\) :: Ord a => Set a -> Set a -> Set a
m1 \\ m2 = difference m1 m2
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE (\\) #-}
#endif
{--------------------------------------------------------------------
Sets are size balanced trees
--------------------------------------------------------------------}
-- | A set of values @a@.
data Set a = Tip
| Bin {-# UNPACK #-} !Size !a !(Set a) !(Set a)
type Size = Int
instance Ord a => Monoid (Set a) where
mempty = empty
mappend = union
mconcat = unions
instance Foldable.Foldable Set where
fold Tip = mempty
fold (Bin _ k l r) = Foldable.fold l `mappend` k `mappend` Foldable.fold r
foldr = foldr
foldl = foldl
foldMap _ Tip = mempty
foldMap f (Bin _ k l r) = Foldable.foldMap f l `mappend` f k `mappend` Foldable.foldMap f r
#if __GLASGOW_HASKELL__
{--------------------------------------------------------------------
A Data instance
--------------------------------------------------------------------}
-- This instance preserves data abstraction at the cost of inefficiency.
-- We omit reflection services for the sake of data abstraction.
instance (Data a, Ord a) => Data (Set a) where
gfoldl f z set = z fromList `f` (toList set)
toConstr _ = error "toConstr"
gunfold _ _ = error "gunfold"
dataTypeOf _ = mkNoRepType "Data.Set.Set"
dataCast1 f = gcast1 f
#endif
{--------------------------------------------------------------------
Query
--------------------------------------------------------------------}
-- | /O(1)/. Is this the empty set?
null :: Set a -> Bool
null Tip = True
null (Bin {}) = False
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE null #-}
#endif
-- | /O(1)/. The number of elements in the set.
size :: Set a -> Int
size Tip = 0
size (Bin sz _ _ _) = sz
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE size #-}
#endif
-- | /O(log n)/. Is the element in the set?
member :: Ord a => a -> Set a -> Bool
member = go
where
STRICT_1_OF_2(go)
go _ Tip = False
go x (Bin _ y l r) = case compare x y of
LT -> go x l
GT -> go x r
EQ -> True
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE member #-}
#else
{-# INLINE member #-}
#endif
-- | /O(log n)/. Is the element not in the set?
notMember :: Ord a => a -> Set a -> Bool
notMember a t = not $ member a t
{-# INLINE notMember #-}
{--------------------------------------------------------------------
Construction
--------------------------------------------------------------------}
-- | /O(1)/. The empty set.
empty :: Set a
empty = Tip
-- | /O(1)/. Create a singleton set.
singleton :: a -> Set a
singleton x = Bin 1 x Tip Tip
{--------------------------------------------------------------------
Insertion, Deletion
--------------------------------------------------------------------}
-- | /O(log n)/. Insert an element in a set.
-- If the set already contains an element equal to the given value,
-- it is replaced with the new value.
insert :: Ord a => a -> Set a -> Set a
insert = go
where
STRICT_1_OF_2(go)
go x Tip = singleton x
go x (Bin sz y l r) = case compare x y of
LT -> balanceL y (go x l) r
GT -> balanceR y l (go x r)
EQ -> Bin sz x l r
#if __GLASGOW_HASKELL__ >= 700
{-# INLINEABLE insert #-}
#else
{-# INLINE insert #-}
#endif
-- Insert an element to the set only if it is not in the set. Used by
-- `union`.
insertR :: Ord a => a -> Set a -> Set a
insertR = go
where
STRICT_1_OF_2(go)
go x Tip = singleton x
go x t@(Bin _ y l r) = case compare x y of
LT -> balanceL y (go x l) r
GT -> balanceR y l (go x r)
EQ -> t
#if __GLASGOW_HASKELL__ >= 700
{-# INLINEABLE insertR #-}
#else
{-# INLINE insertR #-}
#endif
-- | /O(log n)/. Delete an element from a set.
delete :: Ord a => a -> Set a -> Set a
delete = go
where
STRICT_1_OF_2(go)
go _ Tip = Tip
go x (Bin _ y l r) = case compare x y of
LT -> balanceR y (go x l) r
GT -> balanceL y l (go x r)
EQ -> glue l r
#if __GLASGOW_HASKELL__ >= 700
{-# INLINEABLE delete #-}
#else
{-# INLINE delete #-}
#endif
{--------------------------------------------------------------------
Subset
--------------------------------------------------------------------}
-- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
isProperSubsetOf :: Ord a => Set a -> Set a -> Bool
isProperSubsetOf s1 s2
= (size s1 < size s2) && (isSubsetOf s1 s2)
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE isProperSubsetOf #-}
#endif
-- | /O(n+m)/. Is this a subset?
-- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2@.
isSubsetOf :: Ord a => Set a -> Set a -> Bool
isSubsetOf t1 t2
= (size t1 <= size t2) && (isSubsetOfX t1 t2)
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE isSubsetOf #-}
#endif
isSubsetOfX :: Ord a => Set a -> Set a -> Bool
isSubsetOfX Tip _ = True
isSubsetOfX _ Tip = False
isSubsetOfX (Bin _ x l r) t
= found && isSubsetOfX l lt && isSubsetOfX r gt
where
(lt,found,gt) = splitMember x t
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE isSubsetOfX #-}
#endif
{--------------------------------------------------------------------
Minimal, Maximal
--------------------------------------------------------------------}
-- | /O(log n)/. The minimal element of a set.
findMin :: Set a -> a
findMin (Bin _ x Tip _) = x
findMin (Bin _ _ l _) = findMin l
findMin Tip = error "Set.findMin: empty set has no minimal element"
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE findMin #-}
#endif
-- | /O(log n)/. The maximal element of a set.
findMax :: Set a -> a
findMax (Bin _ x _ Tip) = x
findMax (Bin _ _ _ r) = findMax r
findMax Tip = error "Set.findMax: empty set has no maximal element"
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE findMax #-}
#endif
-- | /O(log n)/. Delete the minimal element.
deleteMin :: Set a -> Set a
deleteMin (Bin _ _ Tip r) = r
deleteMin (Bin _ x l r) = balanceR x (deleteMin l) r
deleteMin Tip = Tip
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE deleteMin #-}
#endif
-- | /O(log n)/. Delete the maximal element.
deleteMax :: Set a -> Set a
deleteMax (Bin _ _ l Tip) = l
deleteMax (Bin _ x l r) = balanceL x l (deleteMax r)
deleteMax Tip = Tip
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE deleteMax #-}
#endif
{--------------------------------------------------------------------
Union.
--------------------------------------------------------------------}
-- | The union of a list of sets: (@'unions' == 'foldl' 'union' 'empty'@).
unions :: Ord a => [Set a] -> Set a
unions = foldlStrict union empty
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE unions #-}
#endif
-- | /O(n+m)/. The union of two sets, preferring the first set when
-- equal elements are encountered.
-- The implementation uses the efficient /hedge-union/ algorithm.
-- Hedge-union is more efficient on (bigset `union` smallset).
union :: Ord a => Set a -> Set a -> Set a
union Tip t2 = t2
union t1 Tip = t1
union (Bin _ x Tip Tip) t = insert x t
union t (Bin _ x Tip Tip) = insertR x t
union t1 t2 = hedgeUnion NothingS NothingS t1 t2
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE union #-}
#endif
hedgeUnion :: Ord a
=> MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
hedgeUnion _ _ t1 Tip
= t1
hedgeUnion blo bhi Tip (Bin _ x l r)
= join x (filterGt blo l) (filterLt bhi r)
hedgeUnion blo bhi (Bin _ x l r) t2
= join x (hedgeUnion blo bmi l (trim blo bmi t2))
(hedgeUnion bmi bhi r (trim bmi bhi t2))
where
bmi = JustS x
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE hedgeUnion #-}
#endif
{--------------------------------------------------------------------
Difference
--------------------------------------------------------------------}
-- | /O(n+m)/. Difference of two sets.
-- The implementation uses an efficient /hedge/ algorithm comparable with /hedge-union/.
difference :: Ord a => Set a -> Set a -> Set a
difference Tip _ = Tip
difference t1 Tip = t1
difference t1 t2 = hedgeDiff NothingS NothingS t1 t2
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE difference #-}
#endif
hedgeDiff :: Ord a
=> MaybeS a -> MaybeS a -> Set a -> Set a -> Set a
hedgeDiff _ _ Tip _
= Tip
hedgeDiff blo bhi (Bin _ x l r) Tip
= join x (filterGt blo l) (filterLt bhi r)
hedgeDiff blo bhi t (Bin _ x l r)
= merge (hedgeDiff blo bmi (trim blo bmi t) l)
(hedgeDiff bmi bhi (trim bmi bhi t) r)
where
bmi = JustS x
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE hedgeDiff #-}
#endif
{--------------------------------------------------------------------
Intersection
--------------------------------------------------------------------}
-- | /O(n+m)/. The intersection of two sets.
-- Elements of the result come from the first set, so for example
--
-- > import qualified Data.Set as S
-- > data AB = A | B deriving Show
-- > instance Ord AB where compare _ _ = EQ
-- > instance Eq AB where _ == _ = True
-- > main = print (S.singleton A `S.intersection` S.singleton B,
-- > S.singleton B `S.intersection` S.singleton A)
--
-- prints @(fromList [A],fromList [B])@.
intersection :: Ord a => Set a -> Set a -> Set a
intersection Tip _ = Tip
intersection _ Tip = Tip
intersection t1@(Bin s1 x1 l1 r1) t2@(Bin s2 x2 l2 r2) =
if s1 >= s2 then
let (lt,found,gt) = splitLookup x2 t1
tl = intersection lt l2
tr = intersection gt r2
in case found of
Just x -> join x tl tr
Nothing -> merge tl tr
else let (lt,found,gt) = splitMember x1 t2
tl = intersection l1 lt
tr = intersection r1 gt
in if found then join x1 tl tr
else merge tl tr
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE intersection #-}
#endif
{--------------------------------------------------------------------
Filter and partition
--------------------------------------------------------------------}
-- | /O(n)/. Filter all elements that satisfy the predicate.
filter :: Ord a => (a -> Bool) -> Set a -> Set a
filter _ Tip = Tip
filter p (Bin _ x l r)
| p x = join x (filter p l) (filter p r)
| otherwise = merge (filter p l) (filter p r)
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE filter #-}
#endif
-- | /O(n)/. Partition the set into two sets, one with all elements that satisfy
-- the predicate and one with all elements that don't satisfy the predicate.
-- See also 'split'.
partition :: Ord a => (a -> Bool) -> Set a -> (Set a,Set a)
partition _ Tip = (Tip, Tip)
partition p (Bin _ x l r) = case (partition p l, partition p r) of
((l1, l2), (r1, r2))
| p x -> (join x l1 r1, merge l2 r2)
| otherwise -> (merge l1 r1, join x l2 r2)
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE partition #-}
#endif
{----------------------------------------------------------------------
Map
----------------------------------------------------------------------}
-- | /O(n*log n)/.
-- @'map' f s@ is the set obtained by applying @f@ to each element of @s@.
--
-- It's worth noting that the size of the result may be smaller if,
-- for some @(x,y)@, @x \/= y && f x == f y@
map :: (Ord a, Ord b) => (a->b) -> Set a -> Set b
map f = fromList . List.map f . toList
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE map #-}
#endif
-- | /O(n)/. The
--
-- @'mapMonotonic' f s == 'map' f s@, but works only when @f@ is monotonic.
-- /The precondition is not checked./
-- Semi-formally, we have:
--
-- > and [x < y ==> f x < f y | x <- ls, y <- ls]
-- > ==> mapMonotonic f s == map f s
-- > where ls = toList s
mapMonotonic :: (a->b) -> Set a -> Set b
mapMonotonic _ Tip = Tip
mapMonotonic f (Bin sz x l r) = Bin sz (f x) (mapMonotonic f l) (mapMonotonic f r)
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE mapMonotonic #-}
#endif
{--------------------------------------------------------------------
Fold
--------------------------------------------------------------------}
-- | /O(n)/. Fold the elements in the set using the given right-associative
-- binary operator. This function is an equivalent of 'foldr' and is present
-- for compatibility only.
--
-- /Please note that fold will be deprecated in the future and removed./
fold :: (a -> b -> b) -> b -> Set a -> b
fold = foldr
{-# INLINE fold #-}
-- | /O(n)/. Fold the elements in the set using the given right-associative
-- binary operator, such that @'foldr' f z == 'Prelude.foldr' f z . 'toAscList'@.
--
-- For example,
--
-- > toAscList set = foldr (:) [] set
foldr :: (a -> b -> b) -> b -> Set a -> b
foldr f = go
where
go z Tip = z
go z (Bin _ x l r) = go (f x (go z r)) l
{-# INLINE foldr #-}
-- | /O(n)/. A strict version of 'foldr'. Each application of the operator is
-- evaluated before using the result in the next application. This
-- function is strict in the starting value.
foldr' :: (a -> b -> b) -> b -> Set a -> b
foldr' f = go
where
STRICT_1_OF_2(go)
go z Tip = z
go z (Bin _ x l r) = go (f x (go z r)) l
{-# INLINE foldr' #-}
-- | /O(n)/. Fold the elements in the set using the given left-associative
-- binary operator, such that @'foldl' f z == 'Prelude.foldl' f z . 'toAscList'@.
--
-- For example,
--
-- > toDescList set = foldl (flip (:)) [] set
foldl :: (a -> b -> a) -> a -> Set b -> a
foldl f = go
where
go z Tip = z
go z (Bin _ x l r) = go (f (go z l) x) r
{-# INLINE foldl #-}
-- | /O(n)/. A strict version of 'foldl'. Each application of the operator is
-- evaluated before using the result in the next application. This
-- function is strict in the starting value.
foldl' :: (a -> b -> a) -> a -> Set b -> a
foldl' f = go
where
STRICT_1_OF_2(go)
go z Tip = z
go z (Bin _ x l r) = go (f (go z l) x) r
{-# INLINE foldl' #-}
{--------------------------------------------------------------------
List variations
--------------------------------------------------------------------}
-- | /O(n)/. The elements of a set.
elems :: Set a -> [a]
elems = toList
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE elems #-}
#endif
{--------------------------------------------------------------------
Lists
--------------------------------------------------------------------}
-- | /O(n)/. Convert the set to a list of elements.
toList :: Set a -> [a]
toList = toAscList
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE toList #-}
#endif
-- | /O(n)/. Convert the set to an ascending list of elements.
toAscList :: Set a -> [a]
toAscList = foldr (:) []
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE toAscList #-}
#endif
-- | /O(n*log n)/. Create a set from a list of elements.
fromList :: Ord a => [a] -> Set a
fromList = foldlStrict ins empty
where
ins t x = insert x t
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE fromList #-}
#endif
{--------------------------------------------------------------------
Building trees from ascending/descending lists can be done in linear time.
Note that if [xs] is ascending that:
fromAscList xs == fromList xs
--------------------------------------------------------------------}
-- | /O(n)/. Build a set from an ascending list in linear time.
-- /The precondition (input list is ascending) is not checked./
fromAscList :: Eq a => [a] -> Set a
fromAscList xs
= fromDistinctAscList (combineEq xs)
where
-- [combineEq xs] combines equal elements with [const] in an ordered list [xs]
combineEq xs'
= case xs' of
[] -> []
[x] -> [x]
(x:xx) -> combineEq' x xx
combineEq' z [] = [z]
combineEq' z (x:xs')
| z==x = combineEq' z xs'
| otherwise = z:combineEq' x xs'
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE fromAscList #-}
#endif
-- | /O(n)/. Build a set from an ascending list of distinct elements in linear time.
-- /The precondition (input list is strictly ascending) is not checked./
fromDistinctAscList :: [a] -> Set a
fromDistinctAscList xs
= build const (length xs) xs
where
-- 1) use continutations so that we use heap space instead of stack space.
-- 2) special case for n==5 to build bushier trees.
build c 0 xs' = c Tip xs'
build c 5 xs' = case xs' of
(x1:x2:x3:x4:x5:xx)
-> c (bin x4 (bin x2 (singleton x1) (singleton x3)) (singleton x5)) xx
_ -> error "fromDistinctAscList build 5"
build c n xs' = seq nr $ build (buildR nr c) nl xs'
where
nl = n `div` 2
nr = n - nl - 1
buildR n c l (x:ys) = build (buildB l x c) n ys
buildR _ _ _ [] = error "fromDistinctAscList buildR []"
buildB l x c r zs = c (bin x l r) zs
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE fromDistinctAscList #-}
#endif
{--------------------------------------------------------------------
Eq converts the set to a list. In a lazy setting, this
actually seems one of the faster methods to compare two trees
and it is certainly the simplest :-)
--------------------------------------------------------------------}
instance Eq a => Eq (Set a) where
t1 == t2 = (size t1 == size t2) && (toAscList t1 == toAscList t2)
{--------------------------------------------------------------------
Ord
--------------------------------------------------------------------}
instance Ord a => Ord (Set a) where
compare s1 s2 = compare (toAscList s1) (toAscList s2)
{--------------------------------------------------------------------
Show
--------------------------------------------------------------------}
instance Show a => Show (Set a) where
showsPrec p xs = showParen (p > 10) $
showString "fromList " . shows (toList xs)
{--------------------------------------------------------------------
Read
--------------------------------------------------------------------}
instance (Read a, Ord a) => Read (Set a) where
#ifdef __GLASGOW_HASKELL__
readPrec = parens $ prec 10 $ do
Ident "fromList" <- lexP
xs <- readPrec
return (fromList xs)
readListPrec = readListPrecDefault
#else
readsPrec p = readParen (p > 10) $ \ r -> do
("fromList",s) <- lex r
(xs,t) <- reads s
return (fromList xs,t)
#endif
{--------------------------------------------------------------------
Typeable/Data
--------------------------------------------------------------------}
#include "Typeable.h"
INSTANCE_TYPEABLE1(Set,setTc,"Set")
{--------------------------------------------------------------------
NFData
--------------------------------------------------------------------}
instance NFData a => NFData (Set a) where
rnf Tip = ()
rnf (Bin _ y l r) = rnf y `seq` rnf l `seq` rnf r
{--------------------------------------------------------------------
Utility functions that return sub-ranges of the original
tree. Some functions take a `Maybe value` as an argument to
allow comparisons against infinite values. These are called `blow`
(Nothing is -\infty) and `bhigh` (here Nothing is +\infty).
We use MaybeS value, which is a Maybe strict in the Just case.
[trim blow bhigh t] A tree that is either empty or where [x > blow]
and [x < bhigh] for the value [x] of the root.
[filterGt blow t] A tree where for all values [k]. [k > blow]
[filterLt bhigh t] A tree where for all values [k]. [k < bhigh]
[split k t] Returns two trees [l] and [r] where all values
in [l] are <[k] and all keys in [r] are >[k].
[splitMember k t] Just like [split] but also returns whether [k]
was found in the tree.
--------------------------------------------------------------------}
data MaybeS a = NothingS | JustS !a
{--------------------------------------------------------------------
[trim blo bhi t] trims away all subtrees that surely contain no
values between the range [blo] to [bhi]. The returned tree is either
empty or the key of the root is between @blo@ and @bhi@.
--------------------------------------------------------------------}
trim :: Ord a => MaybeS a -> MaybeS a -> Set a -> Set a
trim NothingS NothingS t = t
trim (JustS lx) NothingS t = greater lx t where greater lo (Bin _ x _ r) | x <= lo = greater lo r
greater _ t' = t'
trim NothingS (JustS hx) t = lesser hx t where lesser hi (Bin _ x l _) | x >= hi = lesser hi l
lesser _ t' = t'
trim (JustS lx) (JustS hx) t = middle lx hx t where middle lo hi (Bin _ x _ r) | x <= lo = middle lo hi r
middle lo hi (Bin _ x l _) | x >= hi = middle lo hi l
middle _ _ t' = t'
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE trim #-}
#endif
{--------------------------------------------------------------------
[filterGt b t] filter all values >[b] from tree [t]
[filterLt b t] filter all values <[b] from tree [t]
--------------------------------------------------------------------}
filterGt :: Ord a => MaybeS a -> Set a -> Set a
filterGt NothingS t = t
filterGt (JustS b) t = filter' b t
where filter' _ Tip = Tip
filter' b' (Bin _ x l r) =
case compare b' x of LT -> join x (filter' b' l) r
EQ -> r
GT -> filter' b' r
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE filterGt #-}
#endif
filterLt :: Ord a => MaybeS a -> Set a -> Set a
filterLt NothingS t = t
filterLt (JustS b) t = filter' b t
where filter' _ Tip = Tip
filter' b' (Bin _ x l r) =
case compare x b' of LT -> join x l (filter' b' r)
EQ -> l
GT -> filter' b' l
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE filterLt #-}
#endif
{--------------------------------------------------------------------
Split
--------------------------------------------------------------------}
-- | /O(log n)/. The expression (@'split' x set@) is a pair @(set1,set2)@
-- where @set1@ comprises the elements of @set@ less than @x@ and @set2@
-- comprises the elements of @set@ greater than @x@.
split :: Ord a => a -> Set a -> (Set a,Set a)
split _ Tip = (Tip,Tip)
split x (Bin _ y l r)
= case compare x y of
LT -> let (lt,gt) = split x l in (lt,join y gt r)
GT -> let (lt,gt) = split x r in (join y l lt,gt)
EQ -> (l,r)
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE split #-}
#endif
-- | /O(log n)/. Performs a 'split' but also returns whether the pivot
-- element was found in the original set.
splitMember :: Ord a => a -> Set a -> (Set a,Bool,Set a)
splitMember x t = let (l,m,r) = splitLookup x t in
(l,maybe False (const True) m,r)
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE splitMember #-}
#endif
-- | /O(log n)/. Performs a 'split' but also returns the pivot
-- element that was found in the original set.
splitLookup :: Ord a => a -> Set a -> (Set a,Maybe a,Set a)
splitLookup _ Tip = (Tip,Nothing,Tip)
splitLookup x (Bin _ y l r)
= case compare x y of
LT -> let (lt,found,gt) = splitLookup x l in (lt,found,join y gt r)
GT -> let (lt,found,gt) = splitLookup x r in (join y l lt,found,gt)
EQ -> (l,Just y,r)
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE splitLookup #-}
#endif
{--------------------------------------------------------------------
Utility functions that maintain the balance properties of the tree.
All constructors assume that all values in [l] < [x] and all values
in [r] > [x], and that [l] and [r] are valid trees.
In order of sophistication:
[Bin sz x l r] The type constructor.
[bin x l r] Maintains the correct size, assumes that both [l]
and [r] are balanced with respect to each other.
[balance x l r] Restores the balance and size.
Assumes that the original tree was balanced and
that [l] or [r] has changed by at most one element.
[join x l r] Restores balance and size.
Furthermore, we can construct a new tree from two trees. Both operations
assume that all values in [l] < all values in [r] and that [l] and [r]
are valid:
[glue l r] Glues [l] and [r] together. Assumes that [l] and
[r] are already balanced with respect to each other.
[merge l r] Merges two trees and restores balance.
Note: in contrast to Adam's paper, we use (<=) comparisons instead
of (<) comparisons in [join], [merge] and [balance].
Quickcheck (on [difference]) showed that this was necessary in order
to maintain the invariants. It is quite unsatisfactory that I haven't
been able to find out why this is actually the case! Fortunately, it
doesn't hurt to be a bit more conservative.
--------------------------------------------------------------------}
{--------------------------------------------------------------------
Join
--------------------------------------------------------------------}
join :: a -> Set a -> Set a -> Set a
join x Tip r = insertMin x r
join x l Tip = insertMax x l
join x l@(Bin sizeL y ly ry) r@(Bin sizeR z lz rz)
| delta*sizeL < sizeR = balanceL z (join x l lz) rz
| delta*sizeR < sizeL = balanceR y ly (join x ry r)
| otherwise = bin x l r
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE join #-}
#endif
-- insertMin and insertMax don't perform potentially expensive comparisons.
insertMax,insertMin :: a -> Set a -> Set a
insertMax x t
= case t of
Tip -> singleton x
Bin _ y l r
-> balanceR y l (insertMax x r)
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE insertMax #-}
#endif
insertMin x t
= case t of
Tip -> singleton x
Bin _ y l r
-> balanceL y (insertMin x l) r
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE insertMin #-}
#endif
{--------------------------------------------------------------------
[merge l r]: merges two trees.
--------------------------------------------------------------------}
merge :: Set a -> Set a -> Set a
merge Tip r = r
merge l Tip = l
merge l@(Bin sizeL x lx rx) r@(Bin sizeR y ly ry)
| delta*sizeL < sizeR = balanceL y (merge l ly) ry
| delta*sizeR < sizeL = balanceR x lx (merge rx r)
| otherwise = glue l r
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE merge #-}
#endif
{--------------------------------------------------------------------
[glue l r]: glues two trees together.
Assumes that [l] and [r] are already balanced with respect to each other.
--------------------------------------------------------------------}
glue :: Set a -> Set a -> Set a
glue Tip r = r
glue l Tip = l
glue l r
| size l > size r = let (m,l') = deleteFindMax l in balanceR m l' r
| otherwise = let (m,r') = deleteFindMin r in balanceL m l r'
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE glue #-}
#endif
-- | /O(log n)/. Delete and find the minimal element.
--
-- > deleteFindMin set = (findMin set, deleteMin set)
deleteFindMin :: Set a -> (a,Set a)
deleteFindMin t
= case t of
Bin _ x Tip r -> (x,r)
Bin _ x l r -> let (xm,l') = deleteFindMin l in (xm,balanceR x l' r)
Tip -> (error "Set.deleteFindMin: can not return the minimal element of an empty set", Tip)
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE deleteFindMin #-}
#endif
-- | /O(log n)/. Delete and find the maximal element.
--
-- > deleteFindMax set = (findMax set, deleteMax set)
deleteFindMax :: Set a -> (a,Set a)
deleteFindMax t
= case t of
Bin _ x l Tip -> (x,l)
Bin _ x l r -> let (xm,r') = deleteFindMax r in (xm,balanceL x l r')
Tip -> (error "Set.deleteFindMax: can not return the maximal element of an empty set", Tip)
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE deleteFindMax #-}
#endif
-- | /O(log n)/. Retrieves the minimal key of the set, and the set
-- stripped of that element, or 'Nothing' if passed an empty set.
minView :: Set a -> Maybe (a, Set a)
minView Tip = Nothing
minView x = Just (deleteFindMin x)
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE minView #-}
#endif
-- | /O(log n)/. Retrieves the maximal key of the set, and the set
-- stripped of that element, or 'Nothing' if passed an empty set.
maxView :: Set a -> Maybe (a, Set a)
maxView Tip = Nothing
maxView x = Just (deleteFindMax x)
#if __GLASGOW_HASKELL__ >= 700
{-# INLINABLE maxView #-}
#endif
{--------------------------------------------------------------------
[balance x l r] balances two trees with value x.
The sizes of the trees should balance after decreasing the
size of one of them. (a rotation).
[delta] is the maximal relative difference between the sizes of
two trees, it corresponds with the [w] in Adams' paper.
[ratio] is the ratio between an outer and inner sibling of the
heavier subtree in an unbalanced setting. It determines
whether a double or single rotation should be performed
to restore balance. It is correspondes with the inverse
of $\alpha$ in Adam's article.
Note that according to the Adam's paper:
- [delta] should be larger than 4.646 with a [ratio] of 2.
- [delta] should be larger than 3.745 with a [ratio] of 1.534.
But the Adam's paper is errorneous:
- it can be proved that for delta=2 and delta>=5 there does
not exist any ratio that would work
- delta=4.5 and ratio=2 does not work
That leaves two reasonable variants, delta=3 and delta=4,
both with ratio=2.
- A lower [delta] leads to a more 'perfectly' balanced tree.
- A higher [delta] performs less rebalancing.
In the benchmarks, delta=3 is faster on insert operations,
and delta=4 has slightly better deletes. As the insert speedup
is larger, we currently use delta=3.
--------------------------------------------------------------------}
delta,ratio :: Int
delta = 3
ratio = 2
-- The balance function is equivalent to the following:
--
-- balance :: a -> Set a -> Set a -> Set a
-- balance x l r
-- | sizeL + sizeR <= 1 = Bin sizeX x l r
-- | sizeR > delta*sizeL = rotateL x l r
-- | sizeL > delta*sizeR = rotateR x l r
-- | otherwise = Bin sizeX x l r
-- where
-- sizeL = size l
-- sizeR = size r
-- sizeX = sizeL + sizeR + 1
--
-- rotateL :: a -> Set a -> Set a -> Set a
-- rotateL x l r@(Bin _ _ ly ry) | size ly < ratio*size ry = singleL x l r
-- | otherwise = doubleL x l r
-- rotateR :: a -> Set a -> Set a -> Set a
-- rotateR x l@(Bin _ _ ly ry) r | size ry < ratio*size ly = singleR x l r
-- | otherwise = doubleR x l r
--
-- singleL, singleR :: a -> Set a -> Set a -> Set a
-- singleL x1 t1 (Bin _ x2 t2 t3) = bin x2 (bin x1 t1 t2) t3
-- singleR x1 (Bin _ x2 t1 t2) t3 = bin x2 t1 (bin x1 t2 t3)
--
-- doubleL, doubleR :: a -> Set a -> Set a -> Set a
-- doubleL x1 t1 (Bin _ x2 (Bin _ x3 t2 t3) t4) = bin x3 (bin x1 t1 t2) (bin x2 t3 t4)
-- doubleR x1 (Bin _ x2 t1 (Bin _ x3 t2 t3)) t4 = bin x3 (bin x2 t1 t2) (bin x1 t3 t4)
--
-- It is only written in such a way that every node is pattern-matched only once.
--
-- Only balanceL and balanceR are needed at the moment, so balance is not here anymore.
-- In case it is needed, it can be found in Data.Map.
-- Functions balanceL and balanceR are specialised versions of balance.
-- balanceL only checks whether the left subtree is too big,
-- balanceR only checks whether the right subtree is too big.
-- balanceL is called when left subtree might have been inserted to or when
-- right subtree might have been deleted from.
balanceL :: a -> Set a -> Set a -> Set a
balanceL x l r = case r of
Tip -> case l of
Tip -> Bin 1 x Tip Tip
(Bin _ _ Tip Tip) -> Bin 2 x l Tip
(Bin _ lx Tip (Bin _ lrx _ _)) -> Bin 3 lrx (Bin 1 lx Tip Tip) (Bin 1 x Tip Tip)
(Bin _ lx ll@(Bin _ _ _ _) Tip) -> Bin 3 lx ll (Bin 1 x Tip Tip)
(Bin ls lx ll@(Bin lls _ _ _) lr@(Bin lrs lrx lrl lrr))
| lrs < ratio*lls -> Bin (1+ls) lx ll (Bin (1+lrs) x lr Tip)
| otherwise -> Bin (1+ls) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+size lrr) x lrr Tip)
(Bin rs _ _ _) -> case l of
Tip -> Bin (1+rs) x Tip r
(Bin ls lx ll lr)
| ls > delta*rs -> case (ll, lr) of
(Bin lls _ _ _, Bin lrs lrx lrl lrr)
| lrs < ratio*lls -> Bin (1+ls+rs) lx ll (Bin (1+rs+lrs) x lr r)
| otherwise -> Bin (1+ls+rs) lrx (Bin (1+lls+size lrl) lx ll lrl) (Bin (1+rs+size lrr) x lrr r)
(_, _) -> error "Failure in Data.Map.balanceL"
| otherwise -> Bin (1+ls+rs) x l r
{-# NOINLINE balanceL #-}
-- balanceR is called when right subtree might have been inserted to or when
-- left subtree might have been deleted from.
balanceR :: a -> Set a -> Set a -> Set a
balanceR x l r = case l of
Tip -> case r of
Tip -> Bin 1 x Tip Tip
(Bin _ _ Tip Tip) -> Bin 2 x Tip r
(Bin _ rx Tip rr@(Bin _ _ _ _)) -> Bin 3 rx (Bin 1 x Tip Tip) rr
(Bin _ rx (Bin _ rlx _ _) Tip) -> Bin 3 rlx (Bin 1 x Tip Tip) (Bin 1 rx Tip Tip)
(Bin rs rx rl@(Bin rls rlx rll rlr) rr@(Bin rrs _ _ _))
| rls < ratio*rrs -> Bin (1+rs) rx (Bin (1+rls) x Tip rl) rr
| otherwise -> Bin (1+rs) rlx (Bin (1+size rll) x Tip rll) (Bin (1+rrs+size rlr) rx rlr rr)
(Bin ls _ _ _) -> case r of
Tip -> Bin (1+ls) x l Tip
(Bin rs rx rl rr)
| rs > delta*ls -> case (rl, rr) of
(Bin rls rlx rll rlr, Bin rrs _ _ _)
| rls < ratio*rrs -> Bin (1+ls+rs) rx (Bin (1+ls+rls) x l rl) rr
| otherwise -> Bin (1+ls+rs) rlx (Bin (1+ls+size rll) x l rll) (Bin (1+rrs+size rlr) rx rlr rr)
(_, _) -> error "Failure in Data.Map.balanceR"
| otherwise -> Bin (1+ls+rs) x l r
{-# NOINLINE balanceR #-}
{--------------------------------------------------------------------
The bin constructor maintains the size of the tree
--------------------------------------------------------------------}
bin :: a -> Set a -> Set a -> Set a
bin x l r
= Bin (size l + size r + 1) x l r
{-# INLINE bin #-}
{--------------------------------------------------------------------
Utilities
--------------------------------------------------------------------}
foldlStrict :: (a -> b -> a) -> a -> [b] -> a
foldlStrict f = go
where
go z [] = z
go z (x:xs) = let z' = f z x in z' `seq` go z' xs
{-# INLINE foldlStrict #-}
{--------------------------------------------------------------------
Debugging
--------------------------------------------------------------------}
-- | /O(n)/. Show the tree that implements the set. The tree is shown
-- in a compressed, hanging format.
showTree :: Show a => Set a -> String
showTree s
= showTreeWith True False s
{- | /O(n)/. The expression (@showTreeWith hang wide map@) shows
the tree that implements the set. If @hang@ is
@True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If
@wide@ is 'True', an extra wide version is shown.
> Set> putStrLn $ showTreeWith True False $ fromDistinctAscList [1..5]
> 4
> +--2
> | +--1
> | +--3
> +--5
>
> Set> putStrLn $ showTreeWith True True $ fromDistinctAscList [1..5]
> 4
> |
> +--2
> | |
> | +--1
> | |
> | +--3
> |
> +--5
>
> Set> putStrLn $ showTreeWith False True $ fromDistinctAscList [1..5]
> +--5
> |
> 4
> |
> | +--3
> | |
> +--2
> |
> +--1
-}
showTreeWith :: Show a => Bool -> Bool -> Set a -> String
showTreeWith hang wide t
| hang = (showsTreeHang wide [] t) ""
| otherwise = (showsTree wide [] [] t) ""
showsTree :: Show a => Bool -> [String] -> [String] -> Set a -> ShowS
showsTree wide lbars rbars t
= case t of
Tip -> showsBars lbars . showString "|\n"
Bin _ x Tip Tip
-> showsBars lbars . shows x . showString "\n"
Bin _ x l r
-> showsTree wide (withBar rbars) (withEmpty rbars) r .
showWide wide rbars .
showsBars lbars . shows x . showString "\n" .
showWide wide lbars .
showsTree wide (withEmpty lbars) (withBar lbars) l
showsTreeHang :: Show a => Bool -> [String] -> Set a -> ShowS
showsTreeHang wide bars t
= case t of
Tip -> showsBars bars . showString "|\n"
Bin _ x Tip Tip
-> showsBars bars . shows x . showString "\n"
Bin _ x l r
-> showsBars bars . shows x . showString "\n" .
showWide wide bars .
showsTreeHang wide (withBar bars) l .
showWide wide bars .
showsTreeHang wide (withEmpty bars) r
showWide :: Bool -> [String] -> String -> String
showWide wide bars
| wide = showString (concat (reverse bars)) . showString "|\n"
| otherwise = id
showsBars :: [String] -> ShowS
showsBars bars
= case bars of
[] -> id
_ -> showString (concat (reverse (tail bars))) . showString node
node :: String
node = "+--"
withBar, withEmpty :: [String] -> [String]
withBar bars = "| ":bars
withEmpty bars = " ":bars
{--------------------------------------------------------------------
Assertions
--------------------------------------------------------------------}
-- | /O(n)/. Test if the internal set structure is valid.
valid :: Ord a => Set a -> Bool
valid t
= balanced t && ordered t && validsize t
ordered :: Ord a => Set a -> Bool
ordered t
= bounded (const True) (const True) t
where
bounded lo hi t'
= case t' of
Tip -> True
Bin _ x l r -> (lo x) && (hi x) && bounded lo (x) hi r
balanced :: Set a -> Bool
balanced t
= case t of
Tip -> True
Bin _ _ l r -> (size l + size r <= 1 || (size l <= delta*size r && size r <= delta*size l)) &&
balanced l && balanced r
validsize :: Set a -> Bool
validsize t
= (realsize t == Just (size t))
where
realsize t'
= case t' of
Tip -> Just 0
Bin sz _ l r -> case (realsize l,realsize r) of
(Just n,Just m) | n+m+1 == sz -> Just sz
_ -> Nothing