{-# LANGUAGE CPP #-} {-# LANGUAGE BangPatterns #-} #ifdef __GLASGOW_HASKELL__ {-# LANGUAGE Trustworthy #-} #endif ----------------------------------------------------------------------------- -- | -- Module : Data.Containers.ListUtils -- Copyright : (c) Gershom Bazerman 2018 -- License : BSD-style -- Maintainer : libraries@haskell.org -- Portability : portable -- -- This module provides efficient containers-based functions on the list type. -- -- In the documentation, \(n\) is the number of elements in the list while -- \(d\) is the number of distinct elements in the list. \(W\) is the number -- of bits in an 'Int'. -- -- @since 0.6.0.1 ----------------------------------------------------------------------------- module Data.Containers.ListUtils ( nubOrd, nubOrdOn, nubInt, nubIntOn ) where import Data.Set (Set) import qualified Data.Set as Set import qualified Data.IntSet as IntSet import Data.IntSet (IntSet) #ifdef __GLASGOW_HASKELL__ import GHC.Exts ( build ) #endif -- *** Ord-based nubbing *** -- | \( O(n \log d) \). The @nubOrd@ function removes duplicate elements from a -- list. In particular, it keeps only the first occurrence of each element. By -- using a 'Set' internally it has better asymptotics than the standard -- 'Data.List.nub' function. -- -- ==== Strictness -- -- @nubOrd@ is strict in the elements of the list. -- -- ==== Efficiency note -- -- When applicable, it is almost always better to use 'nubInt' or 'nubIntOn' -- instead of this function, although it can be a little worse in certain -- pathological cases. For example, to nub a list of characters, use -- -- @ nubIntOn fromEnum xs @ -- -- @since 0.6.0.1 nubOrd :: Ord a => [a] -> [a] nubOrd :: [a] -> [a] nubOrd = (a -> a) -> [a] -> [a] forall b a. Ord b => (a -> b) -> [a] -> [a] nubOrdOn a -> a forall a. a -> a id {-# INLINE nubOrd #-} -- | The @nubOrdOn@ function behaves just like 'nubOrd' except it performs -- comparisons not on the original datatype, but a user-specified projection -- from that datatype. -- -- ==== Strictness -- -- @nubOrdOn@ is strict in the values of the function applied to the -- elements of the list. -- -- @since 0.6.0.1 nubOrdOn :: Ord b => (a -> b) -> [a] -> [a] -- For some reason we need to write an explicit lambda here to allow this -- to inline when only applied to a function. nubOrdOn :: (a -> b) -> [a] -> [a] nubOrdOn a -> b f = \[a] xs -> (a -> b) -> Set b -> [a] -> [a] forall b a. Ord b => (a -> b) -> Set b -> [a] -> [a] nubOrdOnExcluding a -> b f Set b forall a. Set a Set.empty [a] xs {-# INLINE nubOrdOn #-} -- Splitting nubOrdOn like this means that we don't have to worry about -- matching specifically on Set.empty in the rewrite-back rule. nubOrdOnExcluding :: Ord b => (a -> b) -> Set b -> [a] -> [a] nubOrdOnExcluding :: (a -> b) -> Set b -> [a] -> [a] nubOrdOnExcluding a -> b f = Set b -> [a] -> [a] go where go :: Set b -> [a] -> [a] go Set b _ [] = [] go Set b s (a x:[a] xs) | b fx b -> Set b -> Bool forall a. Ord a => a -> Set a -> Bool `Set.member` Set b s = Set b -> [a] -> [a] go Set b s [a] xs | Bool otherwise = a x a -> [a] -> [a] forall a. a -> [a] -> [a] : Set b -> [a] -> [a] go (b -> Set b -> Set b forall a. Ord a => a -> Set a -> Set a Set.insert b fx Set b s) [a] xs where !fx :: b fx = a -> b f a x #ifdef __GLASGOW_HASKELL__ -- We want this inlinable to specialize to the necessary Ord instance. {-# INLINABLE [1] nubOrdOnExcluding #-} {-# RULES -- Rewrite to a fusible form. "nubOrdOn" [~1] forall f as s. nubOrdOnExcluding f s as = build (\c n -> foldr (nubOrdOnFB f c) (constNubOn n) as s) -- Rewrite back to a plain form "nubOrdOnList" [1] forall f as s. foldr (nubOrdOnFB f (:)) (constNubOn []) as s = nubOrdOnExcluding f s as #-} nubOrdOnFB :: Ord b => (a -> b) -> (a -> r -> r) -> a -> (Set b -> r) -> Set b -> r nubOrdOnFB :: (a -> b) -> (a -> r -> r) -> a -> (Set b -> r) -> Set b -> r nubOrdOnFB a -> b f a -> r -> r c a x Set b -> r r Set b s | b fx b -> Set b -> Bool forall a. Ord a => a -> Set a -> Bool `Set.member` Set b s = Set b -> r r Set b s | Bool otherwise = a x a -> r -> r `c` Set b -> r r (b -> Set b -> Set b forall a. Ord a => a -> Set a -> Set a Set.insert b fx Set b s) where !fx :: b fx = a -> b f a x {-# INLINABLE [0] nubOrdOnFB #-} constNubOn :: a -> b -> a constNubOn :: a -> b -> a constNubOn a x b _ = a x {-# INLINE [0] constNubOn #-} #endif -- *** Int-based nubbing *** -- | \( O(n \min(d,W)) \). The @nubInt@ function removes duplicate 'Int' -- values from a list. In particular, it keeps only the first occurrence -- of each element. By using an 'IntSet' internally, it attains better -- asymptotics than the standard 'Data.List.nub' function. -- -- See also 'nubIntOn', a more widely applicable generalization. -- -- ==== Strictness -- -- @nubInt@ is strict in the elements of the list. -- -- @since 0.6.0.1 nubInt :: [Int] -> [Int] nubInt :: [Int] -> [Int] nubInt = (Int -> Int) -> [Int] -> [Int] forall a. (a -> Int) -> [a] -> [a] nubIntOn Int -> Int forall a. a -> a id {-# INLINE nubInt #-} -- | The @nubIntOn@ function behaves just like 'nubInt' except it performs -- comparisons not on the original datatype, but a user-specified projection -- from that datatype. For example, @nubIntOn 'fromEnum'@ can be used to -- nub characters and typical fixed-with numerical types efficiently. -- -- ==== Strictness -- -- @nubIntOn@ is strict in the values of the function applied to the -- elements of the list. -- -- @since 0.6.0.1 nubIntOn :: (a -> Int) -> [a] -> [a] -- For some reason we need to write an explicit lambda here to allow this -- to inline when only applied to a function. nubIntOn :: (a -> Int) -> [a] -> [a] nubIntOn a -> Int f = \[a] xs -> (a -> Int) -> IntSet -> [a] -> [a] forall a. (a -> Int) -> IntSet -> [a] -> [a] nubIntOnExcluding a -> Int f IntSet IntSet.empty [a] xs {-# INLINE nubIntOn #-} -- Splitting nubIntOn like this means that we don't have to worry about -- matching specifically on IntSet.empty in the rewrite-back rule. nubIntOnExcluding :: (a -> Int) -> IntSet -> [a] -> [a] nubIntOnExcluding :: (a -> Int) -> IntSet -> [a] -> [a] nubIntOnExcluding a -> Int f = IntSet -> [a] -> [a] go where go :: IntSet -> [a] -> [a] go IntSet _ [] = [] go IntSet s (a x:[a] xs) | Int fx Int -> IntSet -> Bool `IntSet.member` IntSet s = IntSet -> [a] -> [a] go IntSet s [a] xs | Bool otherwise = a x a -> [a] -> [a] forall a. a -> [a] -> [a] : IntSet -> [a] -> [a] go (Int -> IntSet -> IntSet IntSet.insert Int fx IntSet s) [a] xs where !fx :: Int fx = a -> Int f a x #ifdef __GLASGOW_HASKELL__ -- We don't mark this INLINABLE because it doesn't seem obviously useful -- to inline it anywhere; the elements the function operates on are actually -- pulled from a list and installed in a list; the situation is very different -- when fusion occurs. In this case, we let GHC make the call. {-# NOINLINE [1] nubIntOnExcluding #-} {-# RULES "nubIntOn" [~1] forall f as s. nubIntOnExcluding f s as = build (\c n -> foldr (nubIntOnFB f c) (constNubOn n) as s) "nubIntOnList" [1] forall f as s. foldr (nubIntOnFB f (:)) (constNubOn []) as s = nubIntOnExcluding f s as #-} nubIntOnFB :: (a -> Int) -> (a -> r -> r) -> a -> (IntSet -> r) -> IntSet -> r nubIntOnFB :: (a -> Int) -> (a -> r -> r) -> a -> (IntSet -> r) -> IntSet -> r nubIntOnFB a -> Int f a -> r -> r c a x IntSet -> r r IntSet s | Int fx Int -> IntSet -> Bool `IntSet.member` IntSet s = IntSet -> r r IntSet s | Bool otherwise = a x a -> r -> r `c` IntSet -> r r (Int -> IntSet -> IntSet IntSet.insert Int fx IntSet s) where !fx :: Int fx = a -> Int f a x {-# INLINABLE [0] nubIntOnFB #-} #endif