----------------------------------------------------------------------------- -- | -- Module : Data.List.Ordered -- Copyright : (c) 2009-2010 Leon P Smith -- License : BSD3 -- -- Maintainer : leon@melding-monads.com -- Stability : experimental -- Portability : portable -- -- This module implements bag and set operations on ordered lists. -- Except for variations of the 'sort' and 'isSorted' functions, -- every function assumes that any list arguments are sorted lists. -- Assuming this precondition is met, every resulting list is also -- sorted. -- -- Note that these functions handle multisets, and are left-biased. -- Thus, even assuming the arguments are sorted, 'isect' does not always -- return the same results as Data.List.intersection, due to multiplicity. -- ----------------------------------------------------------------------------- module Data.List.Ordered ( -- * Predicates member, memberBy, has, hasBy , subset, subsetBy , isSorted, isSortedBy -- * Insertion Functions , insertBag, insertBagBy , insertSet, insertSetBy -- * Set-like operations , isect, isectBy , union, unionBy , minus, minusBy , xunion, xunionBy , merge, mergeBy , mergeAll, mergeAllBy , unionAll, unionAllBy -- * Lists to Ordered Lists , nub, nubBy , sort, sortBy , sortOn, sortOn' , nubSort, nubSortBy , nubSortOn, nubSortOn' ) where import Data.List(sort,sortBy) -- | The 'isSorted' predicate returns 'True' if the elements of a list occur in non-descending order, equivalent to 'isSortedBy' ('<='). isSorted :: (Ord a) => [a] -> Bool isSorted = isSortedBy (<=) -- | The 'isSortedBy' function returns 'True' iff the predicate returns true -- for all adjacent pairs of elements in the list. isSortedBy :: (a -> a -> Bool) -> [a] -> Bool isSortedBy lte = loop where loop [] = True loop [_] = True loop (x:y:zs) = (x `lte` y) && loop (y:zs) -- | The 'member' function returns 'True' if the element appears in the -- ordered list. member :: (Ord a) => a -> [a] -> Bool member = memberBy compare -- | The 'memberBy' function is the non-overloaded version of 'member'. memberBy :: (a -> a -> Ordering) -> a -> [a] -> Bool memberBy cmp x = loop where loop [] = False loop (y:ys) = case cmp x y of LT -> False EQ -> True GT -> loop ys -- | The 'has' function returns 'True' if the element appears in the list; -- it is equivalent to 'member' except the order of the arguments is reversed, -- making it a function from an ordered list to its characteristic function. has :: (Ord a) => [a] -> a -> Bool has xs y = memberBy compare y xs -- | The 'hasBy' function is the non-overloaded version of 'has'. hasBy :: (a -> a -> Ordering) -> [a] -> a -> Bool hasBy cmp xs y = memberBy cmp y xs -- | The 'insertBag' function inserts an element into a list. If the element -- is already there, then another copy of the element is inserted. insertBag :: (Ord a) => a -> [a] -> [a] insertBag = insertBagBy compare -- | The 'insertBagBy' function is the non-overloaded version of 'insertBag'. insertBagBy :: (a -> a -> Ordering) -> a -> [a] -> [a] insertBagBy cmp = loop where loop x [] = [x] loop x (y:ys) = case cmp x y of GT -> y:loop x ys _ -> x:y:ys -- | The 'insertSet' function inserts an element into an ordered list. -- If the element is already there, then the element replaces the existing -- element. insertSet :: (Ord a) => a -> [a] -> [a] insertSet = insertSetBy compare -- | The 'insertSetBy' function is the non-overloaded version of 'insertSet'. insertSetBy :: (a -> a -> Ordering) -> a -> [a] -> [a] insertSetBy cmp = loop where loop x [] = [x] loop x (y:ys) = case cmp x y of LT -> x:y:ys EQ -> x:ys GT -> y:loop x ys {- -- This function is moderately interesting, as it encompasses all the -- "venn diagram" functions on two sets. (though not merge; which isn't -- a set function) -- However, it doesn't seem that useful, considering that of the 8 possible -- functions, there are only 4 interesting variations: isect, union, minus, -- and xunion. Due to interactions with GHC's optimizer, coded seperately, -- these have a smaller combined object code size than the object code size -- for genSectBy. (Or, turn off certain optimizations and lose speed.) -- Each individual object code can be recovered from genSectBy via GHC's -- inliner and constant propogation; but this doesn't save much in terms -- of source code size and reduces portability. -- Note that the Static Argument Transformation is necessary for this to work -- correctly; inlining genSectBy allows for cmp and p to be inlined as well, -- or at least eliminate some indirect jumps. All of the *By functions in -- this module follow this idiom for this reason. genSectBy :: (a -> a -> Ordering) -> (a -> a -> Bool) -> [a] -> [a] -> [a] genSectBy cmp p = loop where loop [] ys | p False True = ys | otherwise = [] loop xs [] | p True False = xs | otherwise = [] loop (x:xs) (y:ys) = case cmp x y of LT | p True False -> x : loop xs (y:ys) | otherwise -> loop xs (y:ys) EQ | p True True -> x : loop xs ys | otherwise -> loop xs ys GT | p False True -> y : loop (x:xs) ys | otherwise -> loop (x:xs) ys -} -- | The 'isect' function computes the intersection of two ordered lists. -- An element occurs in the output as many times as the minimum number of -- occurences in either input. If either input is a set, then the output -- is a set. -- -- > isect [ 1,2, 3,4 ] [ 3,4, 5,6 ] == [ 3,4 ] -- > isect [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 1, 2,2 ] isect :: (Ord a) => [a] -> [a] -> [a] isect = isectBy compare -- | The 'isectBy' function is the non-overloaded version of 'isect'. isectBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a] isectBy cmp = loop where loop [] _ys = [] loop _xs [] = [] loop (x:xs) (y:ys) = case cmp x y of LT -> loop xs (y:ys) EQ -> x : loop xs ys GT -> loop (x:xs) ys -- | The 'union' function computes the union of two ordered lists. -- An element occurs in the output as many times as the maximum number -- of occurences in either input. If both inputs are sets, then the -- output is a set. -- -- > union [ 1,2, 3,4 ] [ 3,4, 5,6 ] == [ 1,2, 3,4, 5,6 ] -- > union [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 1,1,1, 2,2,2 ] union :: (Ord a) => [a] -> [a] -> [a] union = unionBy compare -- | The 'unionBy' function is the non-overloaded version of 'union'. unionBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a] unionBy cmp = loop where loop [] ys = ys loop xs [] = xs loop (x:xs) (y:ys) = case cmp x y of LT -> x : loop xs (y:ys) EQ -> x : loop xs ys GT -> y : loop (x:xs) ys -- | The 'minus' function computes the difference of two ordered lists. -- An element occurs in the output as many times as it occurs in -- the first input, minus the number of occurrences in the second input. -- If the first input is a set, then the output is a set. -- -- > minus [ 1,2, 3,4 ] [ 3,4, 5,6 ] == [ 1,2 ] -- > minus [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 2 ] minus :: (Ord a) => [a] -> [a] -> [a] minus = minusBy compare -- | The 'minusBy' function is the non-overloaded version of 'minus'. minusBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a] minusBy cmp = loop where loop [] _ys = [] loop xs [] = xs loop (x:xs) (y:ys) = case cmp x y of LT -> x : loop xs (y:ys) EQ -> loop xs ys GT -> loop (x:xs) ys -- | The 'xunion' function computes the exclusive union of two ordered lists. -- An element occurs in the output as many times as the absolute difference -- between the number of occurrences in the inputs. If both inputs -- are sets, then the output is a set. -- -- > xunion [ 1,2, 3,4 ] [ 3,4, 5,6 ] == [ 1,2, 5,6 ] -- > xunion [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 1,1, 2 ] xunion :: (Ord a) => [a] -> [a] -> [a] xunion = xunionBy compare -- | The 'xunionBy' function is the non-overloaded version of 'xunion'. xunionBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a] xunionBy cmp = loop where loop [] ys = ys loop xs [] = xs loop (x:xs) (y:ys) = case cmp x y of LT -> x : loop xs (y:ys) EQ -> loop xs ys GT -> y : loop (x:xs) ys -- | The 'merge' function combines all elements of two ordered lists. -- An element occurs in the output as many times as the sum of the -- occurences in the lists. -- -- > merge [ 1,2, 3,4 ] [ 3,4, 5,6 ] == [ 1,2, 3,3,4,4, 5,6 ] -- > merge [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 1,1,1,1, 2,2,2,2,2 ] merge :: (Ord a) => [a] -> [a] -> [a] merge = mergeBy compare -- | The 'mergeBy' function is the non-overloaded version of 'merge'. mergeBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a] mergeBy cmp = loop where loop [] ys = ys loop xs [] = xs loop (x:xs) (y:ys) = case cmp x y of GT -> y : loop (x:xs) ys _ -> x : loop xs (y:ys) -- | The 'subset' function returns true if the first list is a sub-list -- of the second. subset :: (Ord a) => [a] -> [a] -> Bool subset = subsetBy compare -- | The 'subsetBy' function is the non-overloaded version of 'subset'. subsetBy :: (a -> a -> Ordering) -> [a] -> [a] -> Bool subsetBy cmp = loop where loop [] _ys = True loop _xs [] = False loop (x:xs) (y:ys) = case cmp x y of LT -> False EQ -> loop xs ys GT -> loop (x:xs) ys {- -- This is Ian Lynagh's mergesort implementation, which appears as -- Data.List.sort, with the static argument transformation applied. -- It's not clear whether this modification is truly worthwhile or not. sort :: Ord a => [a] -> [a] sort = sortBy compare sortBy :: (a -> a -> Ordering) -> [a] -> [a] sortBy cmp = loop . map (\x -> [x]) where loop [] = [] loop [xs] = xs loop xss = loop (merge_pairs xss) merge_pairs [] = [] merge_pairs [xs] = [xs] merge_pairs (xs:ys:xss) = mergeBy cmp xs ys : merge_pairs xss -} -- | The 'sortOn' function provides the decorate-sort-undecorate idiom, -- also known as the \"Schwartzian transform\". sortOn :: Ord b => (a -> b) -> [a] -> [a] sortOn f = map snd . sortOn' fst . map (\x -> (f x, x)) -- | This variant of 'sortOn' recomputes the sorting key every comparison. -- This can be better for functions that are cheap to compute. -- This is definitely better for projections, as the decorate-sort-undecorate -- saves nothing and adds two traversals of the list and extra memory -- allocation. sortOn' :: Ord b => (a -> b) -> [a] -> [a] sortOn' f = sortBy (\x y -> compare (f x) (f y)) -- | The 'nubSort' function is equivalent to 'nub' '.' 'sort', except -- somewhat more efficient as duplicates are removed as it sorts. It is -- essentially Data.List.sort, a mergesort by Ian Lynagh, with 'merge' -- replaced by 'union'. nubSort :: Ord a => [a] -> [a] nubSort = nubSortBy compare -- | The 'nubSortBy' function is the non-overloaded version of 'nubSort'. nubSortBy :: (a -> a -> Ordering) -> [a] -> [a] nubSortBy cmp = loop . map (\x -> [x]) where loop [] = [] loop [xs] = xs loop xss = loop (union_pairs xss) union_pairs [] = [] union_pairs [xs] = [xs] union_pairs (xs:ys:xss) = unionBy cmp xs ys : union_pairs xss -- | The 'nubSortOn' function provides decorate-sort-undecorate for 'nubSort'. nubSortOn :: Ord b => (a -> b) -> [a] -> [a] nubSortOn f = map snd . nubSortOn' fst . map (\x -> (f x, x)) -- | This variant of 'nubSortOn' recomputes the for each comparison. nubSortOn' :: Ord b => (a -> b) -> [a] -> [a] nubSortOn' f = nubSortBy (\x y -> compare (f x) (f y)) -- | On ordered lists, 'nub' is equivalent to 'Data.List.nub', except that -- it runs in linear time instead of quadratic. On unordered lists it also -- removes elements that are smaller than any preceding element. -- -- > nub [1,1,1,2,2] == [1,2] -- > nub [2,0,1,3,3] == [2,3] -- > nub = nubBy (<) nub :: (Ord a) => [a] -> [a] nub = nubBy (<) -- | The 'nubBy' function is the greedy algorithm that returns a -- sublist of its input such that: -- -- > isSortedBy pred (nubBy pred xs) == True -- -- This is true for all lists, not just ordered lists, and all binary -- predicates, not just total orders. On infinite lists, this statement -- is true in a certain mathematical sense, but not a computational one. nubBy :: (a -> a -> Bool) -> [a] -> [a] nubBy p [] = [] nubBy p (x:xs) = x : loop x xs where loop _ [] = [] loop x (y:ys) | p x y = y : loop y ys | otherwise = loop x ys data People a = VIP a (People a) | Crowd [a] -- | The 'mergeAll' function generalizes \"'foldr' 'merge' []\" to a -- (possibly infinite) list of (possibly infinite) ordered lists. To make -- this possible, it adds the assumption that the heads of the non-empty -- lists themselves form a sorted list. -- -- The implementation is based on the article \"Implicit Heaps\" by -- Heinrich Apfelmus, which simplifies an algorithm by Dave Bayer. -- -- <http://apfelmus.nfshost.com/articles/implicit-heaps.html> -- -- The following definition is a simple and reasonably efficient implementation -- that is faster for inputs whose smallest elements are highly biased -- towards the first few lists: -- -- > mergeAll' = foldr merge' [] -- > where merge' [] ys = ys -- > merge' (x:xs) ys = x : merge xs ys -- -- This definition uses a linear chain of comparisons whereas the provided -- implementation uses a tree of comparisons, which is faster on a wide range -- of inputs. mergeAll :: (Ord a) => [[a]] -> [a] mergeAll = mergeAllBy compare -- | The 'mergeAllBy' function is the non-overloaded variant of the 'mergeAll' function. mergeAllBy :: (a -> a -> Ordering) -> [[a]] -> [a] mergeAllBy cmp xss = loop [ (VIP x (Crowd xs)) | (x:xs) <- xss ] where loop [] = [] loop ((VIP x xs):xss) = x : loop (xs:xss) loop [Crowd xs] = xs loop xss = loop (mergePairs xss) mergePairs [] = [] mergePairs [x] = [x] mergePairs (x:y:zs) = merge' x y : mergePairs zs merge' (VIP x xs) ys = VIP x (merge' xs ys) merge' (Crowd []) ys = ys merge' (Crowd xs) (Crowd ys) = Crowd (mergeBy cmp xs ys) merge' xs@(Crowd (x:xt)) ys@(VIP y yt) = case cmp x y of GT -> VIP y (merge' xs yt) _ -> VIP x (merge' (Crowd xt) ys) -- | The 'unionAll' function generalizes \"'foldr' 'union' []\" to a -- (possibly infinite) list of (possibly infinite) ordered lists. -- To make this possible, it adds the assumption that the heads of the -- non-empty lists themselves form a sorted list. -- -- The library implementation is based on some of the same techniques -- as used in 'mergeAll'. However, the analogous simple definition -- is not entirely satisfactory, because -- -- > unionAll' = foldr union' [] -- > where union' [] ys = ys -- > union' (x:xs) ys = x : union xs ys -- > -- > unionAll' [[1,2],[1,2]] == [1,1,2] -- -- whereas we really want the result -- -- > unionAll [[1,2],[1,2]] == foldr union [] [[1,2],[1,2]] == [1,2] -- -- The first equality is only true when both sets of assumptions are met: -- \"foldr union []\" assumes the outer list is finite, and 'unionAll' -- assumes that the heads of the inner lists are sorted. unionAll :: (Ord a) => [[a]] -> [a] unionAll = unionAllBy compare -- | The 'unionAllBy' function is the non-overloaded variant of the 'unionAll' function. unionAllBy :: (a -> a -> Ordering) -> [[a]] -> [a] unionAllBy cmp xss = loop [ (VIP x (Crowd xs)) | (x:xs) <- xss ] where loop [] = [] loop ( VIP x xs : VIP y ys : xss ) = case cmp x y of LT -> x : loop ( xs : VIP y ys : xss ) EQ -> loop ( VIP x (union' xs ys) : unionPairs xss ) GT -> error "Data.List.Ordered.unionAllBy: the heads of the lists are not sorted" loop ( VIP x xs : xss ) = x : loop (xs:xss) loop [Crowd xs] = xs loop (xs:xss) = loop (unionPairs (xs:xss)) unionPairs [] = [] unionPairs [x] = [x] unionPairs (x:y:zs) = union' x y : unionPairs zs union' (VIP x xs) (VIP y ys) = case cmp x y of LT -> VIP x (union' xs (VIP y ys)) EQ -> VIP x (union' xs ys) GT -> error "Data.List.Ordered.unionAllBy: the heads of the lists are not sorted" union' (VIP x xs) (Crowd ys) = VIP x (union' xs (Crowd ys)) union' (Crowd []) ys = ys union' (Crowd xs) (Crowd ys) = Crowd (unionBy cmp xs ys) union' xs@(Crowd (x:xt)) ys@(VIP y yt) = case cmp x y of LT -> VIP x (union' (Crowd xt) ys) EQ -> VIP x (union' (Crowd xt) yt) GT -> VIP y (union' xs yt)