{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
#ifdef __GLASGOW_HASKELL__
{-# LANGUAGE Trustworthy #-}
#endif
-----------------------------------------------------------------------------
-- |
-- Module : Data.Containers.ListUtils
-- Copyright : (c) Gershom Bazerman 2018
-- License : BSD-style
-- Maintainer : libraries@haskell.org
-- Portability : portable
--
-- This module provides efficient containers-based functions on the list type.
--
-- In the documentation, \(n\) is the number of elements in the list while
-- \(d\) is the number of distinct elements in the list. \(W\) is the number
-- of bits in an 'Int'.
--
-- @since 0.6.0.1
-----------------------------------------------------------------------------
module Data.Containers.ListUtils (
nubOrd,
nubOrdOn,
nubInt,
nubIntOn
) where
import Data.Set (Set)
import qualified Data.Set as Set
import qualified Data.IntSet as IntSet
import Data.IntSet (IntSet)
#ifdef __GLASGOW_HASKELL__
import GHC.Exts ( build )
#endif
-- *** Ord-based nubbing ***
-- | \( O(n \log d) \). The @nubOrd@ function removes duplicate elements from a
-- list. In particular, it keeps only the first occurrence of each element. By
-- using a 'Set' internally it has better asymptotics than the standard
-- 'Data.List.nub' function.
--
-- ==== Strictness
--
-- @nubOrd@ is strict in the elements of the list.
--
-- ==== Efficiency note
--
-- When applicable, it is almost always better to use 'nubInt' or 'nubIntOn'
-- instead of this function, although it can be a little worse in certain
-- pathological cases. For example, to nub a list of characters, use
--
-- @ nubIntOn fromEnum xs @
--
-- @since 0.6.0.1
nubOrd :: Ord a => [a] -> [a]
nubOrd = nubOrdOn id
{-# INLINE nubOrd #-}
-- | The @nubOrdOn@ function behaves just like 'nubOrd' except it performs
-- comparisons not on the original datatype, but a user-specified projection
-- from that datatype.
--
-- ==== Strictness
--
-- @nubOrdOn@ is strict in the values of the function applied to the
-- elements of the list.
--
-- @since 0.6.0.1
nubOrdOn :: Ord b => (a -> b) -> [a] -> [a]
-- For some reason we need to write an explicit lambda here to allow this
-- to inline when only applied to a function.
nubOrdOn f = \xs -> nubOrdOnExcluding f Set.empty xs
{-# INLINE nubOrdOn #-}
-- Splitting nubOrdOn like this means that we don't have to worry about
-- matching specifically on Set.empty in the rewrite-back rule.
nubOrdOnExcluding :: Ord b => (a -> b) -> Set b -> [a] -> [a]
nubOrdOnExcluding f = go
where
go _ [] = []
go s (x:xs)
| fx `Set.member` s = go s xs
| otherwise = x : go (Set.insert fx s) xs
where !fx = f x
#ifdef __GLASGOW_HASKELL__
-- We want this inlinable to specialize to the necessary Ord instance.
{-# INLINABLE [1] nubOrdOnExcluding #-}
{-# RULES
-- Rewrite to a fusible form.
"nubOrdOn" [~1] forall f as s. nubOrdOnExcluding f s as =
build (\c n -> foldr (nubOrdOnFB f c) (constNubOn n) as s)
-- Rewrite back to a plain form
"nubOrdOnList" [1] forall f as s.
foldr (nubOrdOnFB f (:)) (constNubOn []) as s =
nubOrdOnExcluding f s as
#-}
nubOrdOnFB :: Ord b
=> (a -> b)
-> (a -> r -> r)
-> a
-> (Set b -> r)
-> Set b
-> r
nubOrdOnFB f c x r s
| fx `Set.member` s = r s
| otherwise = x `c` r (Set.insert fx s)
where !fx = f x
{-# INLINABLE [0] nubOrdOnFB #-}
constNubOn :: a -> b -> a
constNubOn x _ = x
{-# INLINE [0] constNubOn #-}
#endif
-- *** Int-based nubbing ***
-- | \( O(n \min(d,W)) \). The @nubInt@ function removes duplicate 'Int'
-- values from a list. In particular, it keeps only the first occurrence
-- of each element. By using an 'IntSet' internally, it attains better
-- asymptotics than the standard 'Data.List.nub' function.
--
-- See also 'nubIntOn', a more widely applicable generalization.
--
-- ==== Strictness
--
-- @nubInt@ is strict in the elements of the list.
--
-- @since 0.6.0.1
nubInt :: [Int] -> [Int]
nubInt = nubIntOn id
{-# INLINE nubInt #-}
-- | The @nubIntOn@ function behaves just like 'nubInt' except it performs
-- comparisons not on the original datatype, but a user-specified projection
-- from that datatype. For example, @nubIntOn 'fromEnum'@ can be used to
-- nub characters and typical fixed-with numerical types efficiently.
--
-- ==== Strictness
--
-- @nubIntOn@ is strict in the values of the function applied to the
-- elements of the list.
--
-- @since 0.6.0.1
nubIntOn :: (a -> Int) -> [a] -> [a]
-- For some reason we need to write an explicit lambda here to allow this
-- to inline when only applied to a function.
nubIntOn f = \xs -> nubIntOnExcluding f IntSet.empty xs
{-# INLINE nubIntOn #-}
-- Splitting nubIntOn like this means that we don't have to worry about
-- matching specifically on IntSet.empty in the rewrite-back rule.
nubIntOnExcluding :: (a -> Int) -> IntSet -> [a] -> [a]
nubIntOnExcluding f = go
where
go _ [] = []
go s (x:xs)
| fx `IntSet.member` s = go s xs
| otherwise = x : go (IntSet.insert fx s) xs
where !fx = f x
#ifdef __GLASGOW_HASKELL__
-- We don't mark this INLINABLE because it doesn't seem obviously useful
-- to inline it anywhere; the elements the function operates on are actually
-- pulled from a list and installed in a list; the situation is very different
-- when fusion occurs. In this case, we let GHC make the call.
{-# NOINLINE [1] nubIntOnExcluding #-}
{-# RULES
"nubIntOn" [~1] forall f as s. nubIntOnExcluding f s as =
build (\c n -> foldr (nubIntOnFB f c) (constNubOn n) as s)
"nubIntOnList" [1] forall f as s. foldr (nubIntOnFB f (:)) (constNubOn []) as s =
nubIntOnExcluding f s as
#-}
nubIntOnFB :: (a -> Int)
-> (a -> r -> r)
-> a
-> (IntSet -> r)
-> IntSet
-> r
nubIntOnFB f c x r s
| fx `IntSet.member` s = r s
| otherwise = x `c` r (IntSet.insert fx s)
where !fx = f x
{-# INLINABLE [0] nubIntOnFB #-}
#endif