{-# LANGUAGE CPP #-} {-# LANGUAGE BangPatterns #-} #ifdef __GLASGOW_HASKELL__ {-# LANGUAGE Trustworthy #-} #endif ----------------------------------------------------------------------------- -- | -- Module : Data.Containers.ListUtils -- Copyright : (c) Gershom Bazerman 2018 -- License : BSD-style -- Maintainer : libraries@haskell.org -- Portability : portable -- -- This module provides efficient containers-based functions on the list type. -- -- In the documentation, \(n\) is the number of elements in the list while -- \(d\) is the number of distinct elements in the list. \(W\) is the number -- of bits in an 'Int'. -- -- @since 0.6.0.1 ----------------------------------------------------------------------------- module Data.Containers.ListUtils ( nubOrd, nubOrdOn, nubInt, nubIntOn ) where import Data.Set (Set) import qualified Data.Set as Set import qualified Data.IntSet as IntSet import Data.IntSet (IntSet) #ifdef __GLASGOW_HASKELL__ import GHC.Exts ( build ) #endif -- *** Ord-based nubbing *** -- | \( O(n \log d) \). The @nubOrd@ function removes duplicate elements from a -- list. In particular, it keeps only the first occurrence of each element. By -- using a 'Set' internally it has better asymptotics than the standard -- 'Data.List.nub' function. -- -- ==== Strictness -- -- @nubOrd@ is strict in the elements of the list. -- -- ==== Efficiency note -- -- When applicable, it is almost always better to use 'nubInt' or 'nubIntOn' -- instead of this function, although it can be a little worse in certain -- pathological cases. For example, to nub a list of characters, use -- -- @ nubIntOn fromEnum xs @ -- -- @since 0.6.0.1 nubOrd :: Ord a => [a] -> [a] nubOrd = nubOrdOn id {-# INLINE nubOrd #-} -- | The @nubOrdOn@ function behaves just like 'nubOrd' except it performs -- comparisons not on the original datatype, but a user-specified projection -- from that datatype. -- -- ==== Strictness -- -- @nubOrdOn@ is strict in the values of the function applied to the -- elements of the list. -- -- @since 0.6.0.1 nubOrdOn :: Ord b => (a -> b) -> [a] -> [a] -- For some reason we need to write an explicit lambda here to allow this -- to inline when only applied to a function. nubOrdOn f = \xs -> nubOrdOnExcluding f Set.empty xs {-# INLINE nubOrdOn #-} -- Splitting nubOrdOn like this means that we don't have to worry about -- matching specifically on Set.empty in the rewrite-back rule. nubOrdOnExcluding :: Ord b => (a -> b) -> Set b -> [a] -> [a] nubOrdOnExcluding f = go where go _ [] = [] go s (x:xs) | fx `Set.member` s = go s xs | otherwise = x : go (Set.insert fx s) xs where !fx = f x #ifdef __GLASGOW_HASKELL__ -- We want this inlinable to specialize to the necessary Ord instance. {-# INLINABLE [1] nubOrdOnExcluding #-} {-# RULES -- Rewrite to a fusible form. "nubOrdOn" [~1] forall f as s. nubOrdOnExcluding f s as = build (\c n -> foldr (nubOrdOnFB f c) (constNubOn n) as s) -- Rewrite back to a plain form "nubOrdOnList" [1] forall f as s. foldr (nubOrdOnFB f (:)) (constNubOn []) as s = nubOrdOnExcluding f s as #-} nubOrdOnFB :: Ord b => (a -> b) -> (a -> r -> r) -> a -> (Set b -> r) -> Set b -> r nubOrdOnFB f c x r s | fx `Set.member` s = r s | otherwise = x `c` r (Set.insert fx s) where !fx = f x {-# INLINABLE [0] nubOrdOnFB #-} constNubOn :: a -> b -> a constNubOn x _ = x {-# INLINE [0] constNubOn #-} #endif -- *** Int-based nubbing *** -- | \( O(n \min(d,W)) \). The @nubInt@ function removes duplicate 'Int' -- values from a list. In particular, it keeps only the first occurrence -- of each element. By using an 'IntSet' internally, it attains better -- asymptotics than the standard 'Data.List.nub' function. -- -- See also 'nubIntOn', a more widely applicable generalization. -- -- ==== Strictness -- -- @nubInt@ is strict in the elements of the list. -- -- @since 0.6.0.1 nubInt :: [Int] -> [Int] nubInt = nubIntOn id {-# INLINE nubInt #-} -- | The @nubIntOn@ function behaves just like 'nubInt' except it performs -- comparisons not on the original datatype, but a user-specified projection -- from that datatype. For example, @nubIntOn 'fromEnum'@ can be used to -- nub characters and typical fixed-with numerical types efficiently. -- -- ==== Strictness -- -- @nubIntOn@ is strict in the values of the function applied to the -- elements of the list. -- -- @since 0.6.0.1 nubIntOn :: (a -> Int) -> [a] -> [a] -- For some reason we need to write an explicit lambda here to allow this -- to inline when only applied to a function. nubIntOn f = \xs -> nubIntOnExcluding f IntSet.empty xs {-# INLINE nubIntOn #-} -- Splitting nubIntOn like this means that we don't have to worry about -- matching specifically on IntSet.empty in the rewrite-back rule. nubIntOnExcluding :: (a -> Int) -> IntSet -> [a] -> [a] nubIntOnExcluding f = go where go _ [] = [] go s (x:xs) | fx `IntSet.member` s = go s xs | otherwise = x : go (IntSet.insert fx s) xs where !fx = f x #ifdef __GLASGOW_HASKELL__ -- We don't mark this INLINABLE because it doesn't seem obviously useful -- to inline it anywhere; the elements the function operates on are actually -- pulled from a list and installed in a list; the situation is very different -- when fusion occurs. In this case, we let GHC make the call. {-# NOINLINE [1] nubIntOnExcluding #-} {-# RULES "nubIntOn" [~1] forall f as s. nubIntOnExcluding f s as = build (\c n -> foldr (nubIntOnFB f c) (constNubOn n) as s) "nubIntOnList" [1] forall f as s. foldr (nubIntOnFB f (:)) (constNubOn []) as s = nubIntOnExcluding f s as #-} nubIntOnFB :: (a -> Int) -> (a -> r -> r) -> a -> (IntSet -> r) -> IntSet -> r nubIntOnFB f c x r s | fx `IntSet.member` s = r s | otherwise = x `c` r (IntSet.insert fx s) where !fx = f x {-# INLINABLE [0] nubIntOnFB #-} #endif