Data/List/Ordered.hs

-----------------------------------------------------------------------------
-- |
-- Module      :  Data.List.Ordered
-- Copyright   :  (c) 2009-2010 Leon P Smith
-- License     :  BSD3
--
-- Maintainer  :  leon@melding-monads.com
-- Stability   :  experimental
-- Portability :  portable
--
-- This module implements bag and set operations on ordered lists.
-- Except for variations of the  'sort' and 'isSorted' functions,
-- every function assumes that any list arguments are sorted lists.
-- Assuming this precondition is met,  every resulting list is also
-- sorted.
--
-- Note that these functions handle multisets, and are left-biased.
-- Thus, even assuming the arguments are sorted,  'isect' does not always
-- return the same results as Data.List.intersection,  due to multiplicity.
--
-----------------------------------------------------------------------------

module  Data.List.Ordered
     (
        -- * Predicates
        member, memberBy, has, hasBy
     ,  subset, subsetBy
     ,  isSorted, isSortedBy

        -- * Insertion Functions
     ,  insertBag, insertBagBy
     ,  insertSet, insertSetBy

        -- * Set-like operations
     ,  isect, isectBy
     ,  union, unionBy
     ,  minus, minusBy
     ,  xunion, xunionBy
     ,  merge, mergeBy

        -- * Lists to Ordered Lists
     ,  nub, nubBy
     ,  sort, sortBy
     ,  sortOn, sortOn'
     ,  nubSort, nubSortBy
     ,  nubSortOn, nubSortOn'

     )  where

import Data.List(sort,sortBy)

-- |  'isSorted' returns 'True' if the elements of a list occur in non-descending order,  equivalent to 'isSortedBy' ('<=')
isSorted :: (Ord a) => [a] -> Bool
isSorted = isSortedBy (<=)

-- |  'isSortedBy' returns 'True' if the predicate returns true for all adjacent pairs of elements in the list
isSortedBy :: (a -> a -> Bool) -> [a] -> Bool
isSortedBy lte = loop
  where
    loop []       = True
    loop [_]      = True
    loop (x:y:zs) = (x `lte` y) && loop (y:zs)

-- |  'member' returns 'True' if the element appears in the ordered list
member :: (Ord a) => a -> [a] -> Bool
member = memberBy compare

-- |  'memberBy' is the non-overloaded version of 'member'
memberBy :: (a -> a -> Ordering) -> a -> [a] -> Bool
memberBy cmp x = loop
  where
    loop []     = False
    loop (y:ys) = case cmp x y of
                    LT -> False
                    EQ -> True
                    GT -> loop ys

-- |  'has' returns 'True' if the element appears in the list; it is a function from an ordered list to its characteristic function.
has :: (Ord a) => [a] -> a -> Bool
has xs y = memberBy compare y xs

-- |  'hasBy' is the non-overloaded version of 'has'
hasBy :: (a -> a -> Ordering) -> [a] -> a -> Bool
hasBy cmp xs y = memberBy cmp y xs

-- |  'insertBag' inserts an element into a list,  allowing for duplicate elements
insertBag :: (Ord a) => a -> [a] -> [a]
insertBag = insertBagBy compare

-- |  'insertBagBy' is the non-overloaded version of 'insertBag'
insertBagBy :: (a -> a -> Ordering) -> a -> [a] -> [a]
insertBagBy cmp = loop
  where
    loop x [] = [x]
    loop x (y:ys)
      = case cmp x y of
         GT -> y:loop x ys
         _  -> x:y:ys

-- |  'insertSet' inserts an element into an ordered list, or replaces the first occurrence if it is already there.
insertSet :: (Ord a) => a -> [a] -> [a]
insertSet = insertSetBy compare

-- |  'insertSetBy' is the non-overloaded version of 'insertSet'
insertSetBy :: (a -> a -> Ordering) -> a -> [a] -> [a]
insertSetBy cmp = loop
  where
    loop x [] = [x]
    loop x (y:ys) = case cmp x y of
            LT -> x:y:ys
            EQ -> x:ys
            GT -> y:loop x ys

-- |  'isect' computes the intersection of two ordered lists.
-- The result contains those elements contained in both arguments
--
-- > isect [1,3,5] [2,4,6] == []
-- > isect [2,4,6,8] [3,6,9] == [6]
-- > isect [1,2,2,2] [1,1,1,2,2] == [1,2,2]
isect :: (Ord a) => [a] -> [a] -> [a]
isect = isectBy compare

-- |  'isectBy' is the non-overloaded version of 'isect'
isectBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a]
isectBy cmp = loop
  where
     loop [] _ys  = []
     loop _xs []  = []
     loop (x:xs) (y:ys)
       = case cmp x y of
          LT ->     loop xs (y:ys)
          EQ -> x : loop xs ys
          GT ->     loop (x:xs) ys

-- |  'union' computes the union of two ordered lists.
-- The result contains those elements contained in either argument;
-- elements that appear in both lists are appear in the result only once.
--
-- > union [1,3,5] [2,4,6] == [1..6]
-- > union [2,4,6,8] [3,6,9] == [2,3,4,6,8,9]
-- > union [1,2,2,2] [1,1,1,2,2] == [1,1,1,2,2,2]
union :: (Ord a) => [a] -> [a] -> [a]
union = unionBy compare

-- |  'unionBy' is the non-overloaded version of 'union'
unionBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a]
unionBy cmp = loop
  where
     loop [] ys = ys
     loop xs [] = xs
     loop (x:xs) (y:ys)
       = case cmp x y of
          LT -> x : loop xs (y:ys)
          EQ -> x : loop xs ys
          GT -> y : loop (x:xs) ys



-- |  'minus' computes the multiset difference of two ordered lists.
-- Each occurence of an element in the second argument is removed from the first list,  if it is there.
--
-- > minus [1,3,5] [2,4,6] == [1,3,5]
-- > minus [2,4,6,8] [3,6,9] == [2,4,8]
-- > minus [1,2,2,2] [1,1,1,2,2] == [2]
minus :: (Ord a) => [a] -> [a] -> [a]
minus = minusBy compare

-- |  'minusBy' is the non-overloaded version of 'minus'
minusBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a]
minusBy cmp = loop
  where
     loop [] _ys = []
     loop xs [] = xs
     loop (x:xs) (y:ys)
       = case cmp x y of
          LT -> x : loop xs (y:ys)
          EQ ->     loop xs ys
          GT ->     loop (x:xs) ys

-- |  'xunion' computes the multiset exclusive union of two ordered lists.
-- The result contains those elements that appear in either list,  but not both.
--
-- > xunion [1,3,5] [2,4,6] == [1..6]
-- > xunion [2,4,6,8] [3,6,9] == [2,3,4,8]
-- > xunion [1,2,2,2] [1,1,1,2,2] == [1,1,2]
xunion :: (Ord a) => [a] -> [a] -> [a]
xunion = xunionBy compare

-- |  'xunionBy' is the non-overloaded version of 'xunion'
xunionBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a]
xunionBy cmp = loop
  where
     loop [] ys = ys
     loop xs [] = xs
     loop (x:xs) (y:ys)
       = case cmp x y of
          LT -> x : loop xs (y:ys)
          EQ ->     loop xs ys
          GT -> y : loop (x:xs) ys

{-
genSectBy cmp p = loop
  where
     loop [] ys | p False True = ys
                | otherwise    = []
     loop xs [] | p True False = xs
                | otherwise    = []
     loop (x:xs) (y:ys)
       = case cmp x y of
          LT | p True False -> x : loop xs (y:ys)
             | otherwise    ->     loop xs (y:ys)
          EQ | p True True  -> x : loop xs ys
             | otherwise    ->     loop xs ys
          GT | p False True -> y : loop (x:xs) ys
             | otherwise    ->     loop (x:xs) ys
-}

-- |  'merge' combines all elements of two ordered lists.   The result contains those elements that appear in either list;  elements that appear in both lists appear in the result multiple times.
--
-- > merge [1,3,5] [2,4,6] == [1,2,3,4,5,6]
-- > merge [2,4,6,8] [3,6,9] == [2,3,4,6,6,8,9]
-- > merge [1,2,2,2] [1,1,1,2,2] == [1,1,1,1,2,2,2,2,2]
merge :: (Ord a) => [a] -> [a] -> [a]
merge = mergeBy compare

-- |  'mergeBy' is the non-overloaded version of 'merge'
mergeBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a]
mergeBy cmp = loop
  where
    loop [] ys  = ys
    loop xs []  = xs
    loop (x:xs) (y:ys)
      = case cmp x y of
         GT -> y : loop (x:xs) ys
         _  -> x : loop xs (y:ys)

-- |  'subset' returns true if the first list is a sub-list of the second.
subset :: (Ord a) => [a] -> [a] -> Bool
subset = subsetBy compare

-- |  'subsetBy' is the non-overloaded version of 'subset'
subsetBy :: (a -> a -> Ordering) -> [a] -> [a] -> Bool
subsetBy cmp = loop
  where
    loop [] _ys = True
    loop _xs [] = False
    loop (x:xs) (y:ys)
      = case cmp x y of
         LT -> False
         EQ -> loop xs ys
         GT -> loop (x:xs) ys

{-
sort :: Ord a => [a] -> [a]
sort = sortBy compare

-- This is Ian Lynaugh's mergesort implementation provided in Data.List.sort with the
-- static argument transformation applied.   It's not clear if this is really worthwhile or not.

sortBy :: (a -> a -> Ordering) -> [a] -> [a]
sortBy cmp = loop . map (\x -> [x])
  where
    loop []   = []
    loop [xs] = xs
    loop xss  = loop (merge_pairs xss)

    merge_pairs []          = []
    merge_pairs [xs]        = [xs]
    merge_pairs (xs:ys:xss) = mergeBy cmp xs ys : merge_pairs xss
-}

-- |  'sortOn' provides the decorate-sort-undecorate idiom, aka the \"Schwartzian transform\"
sortOn :: Ord b => (a -> b) -> [a] -> [a]
sortOn f  = map snd . sortOn' fst .  map (\x -> (f x, x))

-- |  This variant of 'sortOn' recomputes the function to sort on every comparison.  This can is better
-- for functions that are cheap to compute,  including projections.

sortOn' :: Ord b => (a -> b) -> [a] -> [a]
sortOn' f = sortBy (\x y -> compare (f x) (f y))

-- |  'nubSort' is equivalent to 'nub' '.' 'sort',  except somewhat more efficient as duplicates
-- are removed as it sorts.  It is essentially Data.List.sort, a mergesort by Ian Lynagh,  with
-- 'merge' replaced by 'union'.
nubSort :: Ord a => [a] -> [a]
nubSort = nubSortBy compare

-- |  'nubSortBy' is the non-overloaded version of 'nubSort'
nubSortBy :: (a -> a -> Ordering) -> [a] -> [a]
nubSortBy cmp = loop . map (\x -> [x])
  where
    loop []   = []
    loop [xs] = xs
    loop xss  = loop (union_pairs xss)

    union_pairs []          = []
    union_pairs [xs]        = [xs]
    union_pairs (xs:ys:xss) = unionBy cmp xs ys : union_pairs xss

-- |  'nubSortOn' provides decorate-sort-undecorate for 'nubSort'
nubSortOn :: Ord b => (a -> b) -> [a] -> [a]
nubSortOn f = map snd . nubSortOn' fst . map (\x -> (f x, x))

-- |  This variant of 'nubSortOn' recomputes the function for each comparison.
nubSortOn' :: Ord b => (a -> b) -> [a] -> [a]
nubSortOn' f = nubSortBy (\x y -> compare (f x) (f y))

-- | On ordered lists,  'nub' is equivalent to 'Data.List.nub', except that
-- it runs in linear time instead of quadratic.   On unordered lists it also
-- removes elements that are smaller than any preceding element.
--
-- > nub [1,1,1,2,2] == [1,2]
-- > nub [2,0,1,3,3] == [2,3]
-- > nub = nubBy (<)

nub :: (Ord a) => [a] -> [a]
nub = nubBy (<)

-- | 'nubBy' is the greedy algorithm that returns a sublist of its
-- input such that 'isSortedBy' is true.
--
-- > isSortedBy pred (nubBy pred xs) == True

nubBy :: (a -> a -> Bool) -> [a] -> [a]
nubBy p []     = []
nubBy p (x:xs) = x : loop x xs
  where
    loop _ [] = []
    loop x (y:ys)
       | p x y     = y : loop y ys
       | otherwise = loop x ys