-- File created: 2008-11-08 19:22:07
{-# LANGUAGE CPP, MultiParamTypeClasses, FlexibleInstances
, FlexibleContexts, UndecidableInstances #-}
#include "exports.h"
-- | The base implementation of a Patricia trie representing a set of lists,
-- generalized over any type of map from element values to tries.
--
-- Worst-case complexities are given in terms of @n@, @m@, and @k@. @n@ refers
-- to the number of keys in the set and @m@ to their maximum length. @k@ refers
-- to the length of a key given to the function, not any property of the set.
--
-- In addition, the trie's branching factor plays a part in almost every
-- operation, but the complexity depends on the underlying 'Map'. Thus, for
-- instance, 'member' is actually @O(m f(b))@ where @f(b)@ is the complexity of
-- a lookup operation on the 'Map' used. This complexity depends on the
-- underlying operation, which is not part of the specification of the visible
-- function. Thus it could change whilst affecting the complexity only for
-- certain Map types: hence this \"b factor\" is not shown explicitly.
--
-- Disclaimer: the complexities have not been proven.
module Data.ListTrie.Patricia.Set (SET_EXPORTS) where
import Control.Arrow ((***))
import Control.Monad (liftM3)
import Data.Binary (Binary,get,put)
import Data.Function (on)
import Data.Monoid (Monoid(..))
import Prelude hiding (filter, foldl, foldr, map, null)
import qualified Prelude
#if __GLASGOW_HASKELL__
import Text.Read (readPrec, lexP, parens, prec, Lexeme(Ident))
#endif
import qualified Data.ListTrie.Base.Map as Map
import qualified Data.ListTrie.Patricia.Base as Base
import Data.ListTrie.Base.Classes (Identity(..), Unwrappable(..))
import Data.ListTrie.Base.Map (Map, OrdMap)
import Data.ListTrie.Util ((.:), (.:.), both)
#include "docs.h"
-- Invariant: any (Tr False _ _) has at least two children, all of which are
-- True or have a True descendant.
--
-- In order to avoid a lot of special casing it has to be the case that there's
-- only one way to represent a given trie. The above property makes sure of
-- that, so that, for instance, 'fromList ["foo"]' can only be 'Tr True "foo"
-- Map.empty', and not 'Tr False "fo" (Map.fromList [('o',Tr True ""
-- Map.empty)])'. Base.tryCompress is a function which takes care of this.
--
-- This Base stuff is needed just as in the non-Patricia version.
data TrieSetBase map a bool = Tr !bool ![a] !(CMap map a bool)
type CMap map a bool = map a (TrieSetBase map a bool)
-- | The data structure itself: a set of keys of type @[a]@ implemented as a
-- trie, using @map@ to map keys of type @a@ to sub-tries.
--
-- Regarding the instances:
--
-- - The @CMap@ type is internal, ignore it. For 'Eq' and 'Ord' an 'Eq'
-- instance is required: what this means is that @map a v@ is expected to be
-- an instance of 'Eq', given 'Eq'@ v@.
--
-- - The 'Eq' constraint for the 'Ord' instance is misleading: it is needed
-- only because 'Eq' is a superclass of 'Ord'.
--
-- - The 'Monoid' instance defines 'mappend' as 'union' and 'mempty' as
-- 'empty'.
newtype TrieSet map a = TS { unTS :: TrieSetBase map a Bool }
inTS :: (TrieSetBase map a Bool -> TrieSetBase nap b Bool)
-> (TrieSet map a -> TrieSet nap b)
inTS f = TS . f . unTS
instance Map map k => Base.Trie TrieSetBase Identity map k where
mkTrie = Tr . unwrap
tParts (Tr b p m) = (Id b,p,m)
-- CMap contains TrieSetBase, not TrieSet, hence we must supply these instances
-- for TrieSetBase first
instance (Map map a, Eq (CMap map a Bool)) => Eq (TrieSetBase map a Bool) where
Tr b1 p1 m1 == Tr b2 p2 m2 =
b1 == b2 && Base.eqComparePrefixes (Map.eqCmp m1) p1 p2
&& m1 == m2
instance (Eq (CMap map a Bool), OrdMap map a, Ord a)
=> Ord (TrieSetBase map a Bool)
where
compare = compare `on` Base.toAscList
instance (Eq (CMap map a Bool), Map map a) => Eq (TrieSet map a) where
(==) = (==) `on` unTS
-- The CMap constraint is needed only because Eq is a superclass of Ord....
-- sigh
instance (Eq (CMap map a Bool), OrdMap map a, Ord a) => Ord (TrieSet map a)
where
compare = compare `on` unTS
instance Map map a => Monoid (TrieSet map a) where
mempty = empty
mappend = union
mconcat = unions
instance (Map map a, Show a) => Show (TrieSet map a) where
showsPrec p s = showParen (p > 10) $
showString "fromList " . shows (toList s)
instance (Map map a, Read a) => Read (TrieSet map a) where
#if __GLASGOW_HASKELL__
readPrec = parens $ prec 10 $ do
Ident "fromList" <- lexP
fmap fromList readPrec
#else
readsPrec p = readParen (p > 10) $ \r -> do
("fromList", list) <- lex r
(xs, rest) <- readsPrec (p+1) list
[(fromList xs, rest)]
#endif
instance (Map map k, Binary k, Binary a) => Binary (TrieSetBase map k a) where
put (Tr v k m) = put v >> put k >> (put . Map.serializeToList $ m)
get = liftM3 Tr get get (get >>= return . Map.deserializeFromList)
instance (Map map a, Binary a) => Binary (TrieSet map a) where
put = put . unTS
get = get >>= return . TS
-- * Construction
-- | @O(1)@. The empty set.
empty :: Map map a => TrieSet map a
empty = TS Base.empty
-- | @O(1)@. The singleton set containing only the given key.
singleton :: Map map a => [a] -> TrieSet map a
singleton k = TS$ Base.singleton k True
-- * Modification
-- | @O(min(m,s))@. Inserts the key into the set. If the key is already a
-- member of the set, the set is unchanged.
insert :: Map map a => [a] -> TrieSet map a -> TrieSet map a
insert k = inTS$ Base.insert k True
-- | @O(min(m,s))@. Removes the key from the set. If the key is not a member of
-- the set, the set is unchanged.
delete :: Map map a => [a] -> TrieSet map a -> TrieSet map a
delete = inTS . Base.delete
-- * Querying
-- | @O(1)@. 'True' iff the set is empty.
null :: Map map a => TrieSet map a -> Bool
null = Base.null . unTS
-- | @O(n m)@. The number of keys in the set. The value is built up lazily,
-- allowing for delivery of partial results without traversing the whole set.
size :: (Map map a, Num n) => TrieSet map a -> n
size = Base.size . unTS
-- | @O(n m)@. The number of keys in the set. The value is built strictly: no
-- value is returned until the set has been fully traversed.
size' :: (Map map a, Num n) => TrieSet map a -> n
size' = Base.size' . unTS
-- | @O(min(m,s))@. 'True' iff the given key is contained within the set.
member :: Map map a => [a] -> TrieSet map a -> Bool
member = Base.member .:. unTS
-- | @O(min(m,s))@. 'False' iff the given key is contained within the set.
notMember :: Map map a => [a] -> TrieSet map a -> Bool
notMember = Base.notMember .:. unTS
-- | @O(min(n1 m1,n2 m2))@. 'True' iff the first set is a subset of the second,
-- i.e. all keys that are members of the first set are also members of the
-- second set.
isSubsetOf :: Map map a => TrieSet map a -> TrieSet map a -> Bool
isSubsetOf = Base.isSubmapOfBy (&&) `on` unTS
-- | @O(min(n1 m1,n2 m2))@. 'True' iff the first set is a proper subset of the
-- second, i.e. the first is a subset of the second, but the sets are not
-- equal.
isProperSubsetOf :: Map map a => TrieSet map a -> TrieSet map a -> Bool
isProperSubsetOf = Base.isProperSubmapOfBy (&&) `on` unTS
-- * Combination
defaultUnion :: Bool -> Bool -> Bool
defaultUnion = error "TrieSet.union :: internal error"
-- | @O(min(n1 m1,n2 m2))@. The union of the two sets: the set which contains
-- all keys that are members of either set.
--
-- The worst-case performance occurs when the two sets are identical.
union :: Map map a => TrieSet map a -> TrieSet map a -> TrieSet map a
union = TS .: Base.unionWith defaultUnion `on` unTS
-- | @O(sum(n))@. The union of all the sets: the set which contains all keys
-- that are members of any of the sets.
--
-- The worst-case performance occurs when all the sets are identical.
unions :: Map map a => [TrieSet map a] -> TrieSet map a
unions = TS . Base.unionsWith defaultUnion . Prelude.map unTS
-- | @O(min(n1 m1,n2 m2))@. The difference of the two sets: the set which
-- contains all keys that are members of the first set and not members of the
-- second set.
--
-- The worst-case performance occurs when the two sets are identical.
difference :: Map map a => TrieSet map a -> TrieSet map a -> TrieSet map a
difference = TS .: Base.differenceWith
(error "TrieSet.difference :: internal error")
`on` unTS
-- | @O(min(n1 m1,n2 m2))@. The intersection of the two sets: the set which
-- contains all keys that are members of both sets.
--
-- The worst-case performance occurs when the two sets are identical.
intersection :: Map map a => TrieSet map a -> TrieSet map a -> TrieSet map a
intersection = TS .: Base.intersectionWith
(error "TrieSet.intersection :: internal error")
`on` unTS
-- * Filtering
-- | @O(n m)@. The set of those keys in the set for which the given predicate
-- returns 'True'.
filter :: Map map a => ([a] -> Bool) -> TrieSet map a -> TrieSet map a
filter p = inTS $ Base.filterWithKey (\k _ -> p k)
-- | @O(n m)@. A pair of sets: the first element contains those keys for which
-- the given predicate returns 'True', and the second element contains those
-- for which it was 'False'.
partition :: Map map a
=> ([a] -> Bool) -> TrieSet map a -> (TrieSet map a, TrieSet map a)
partition p = both TS . Base.partitionWithKey (\k _ -> p k) . unTS
-- * Mapping
-- | @O(n m)@. Apply the given function to all the keys in the set.
map :: (Map map a, Map map b) => ([a] -> [b]) -> TrieSet map a -> TrieSet map b
map = inTS . Base.mapKeysWith Base.fromList
-- | @O(n m)@. Apply the given function to the contents of all the keys in the
-- set.
mapIn :: (Map map a, Map map b) => (a -> b) -> TrieSet map a -> TrieSet map b
mapIn = inTS . Base.mapInKeysWith defaultUnion
-- * Folding
-- | @O(n m)@. Equivalent to a list @foldr@ on the 'toList' representation.
foldr :: Map map a => ([a] -> b -> b) -> b -> TrieSet map a -> b
foldr f = Base.foldrWithKey (\k _ -> f k) .:. unTS
-- | @O(n m)@. Equivalent to a list @foldr@ on the 'toAscList' representation.
foldrAsc :: OrdMap map a => ([a] -> b -> b) -> b -> TrieSet map a -> b
foldrAsc f = Base.foldrAscWithKey (\k _ -> f k) .:. unTS
-- | @O(n m)@. Equivalent to a list @foldr@ on the 'toDescList' representation.
foldrDesc :: OrdMap map a => ([a] -> b -> b) -> b -> TrieSet map a -> b
foldrDesc f = Base.foldrDescWithKey (\k _ -> f k) .:. unTS
-- | @O(n m)@. Equivalent to a list @foldl@ on the 'toList' representation.
foldl :: Map map a => ([a] -> b -> b) -> b -> TrieSet map a -> b
foldl f = Base.foldlWithKey (\k _ -> f k) .:. unTS
-- | @O(n m)@. Equivalent to a list @foldl@ on the 'toAscList' representation.
foldlAsc :: OrdMap map a => ([a] -> b -> b) -> b -> TrieSet map a -> b
foldlAsc f = Base.foldlAscWithKey (\k _ -> f k) .:. unTS
-- | @O(n m)@. Equivalent to a list @foldl@ on the 'toDescList' representation.
foldlDesc :: OrdMap map a => ([a] -> b -> b) -> b -> TrieSet map a -> b
foldlDesc f = Base.foldlDescWithKey (\k _ -> f k) .:. unTS
-- | @O(n m)@. Equivalent to a list @foldl'@ on the 'toList' representation.
foldl' :: Map map a => ([a] -> b -> b) -> b -> TrieSet map a -> b
foldl' f = Base.foldlWithKey' (\k _ -> f k) .:. unTS
-- | @O(n m)@. Equivalent to a list @foldl'@ on the 'toAscList' representation.
foldlAsc' :: OrdMap map a => ([a] -> b -> b) -> b -> TrieSet map a -> b
foldlAsc' f = Base.foldlAscWithKey' (\k _ -> f k) .:. unTS
-- | @O(n m)@. Equivalent to a list @foldl'@ on the 'toDescList'
-- representation.
foldlDesc' :: OrdMap map a => ([a] -> b -> b) -> b -> TrieSet map a -> b
foldlDesc' f = Base.foldlDescWithKey' (\k _ -> f k) .:. unTS
-- * Conversion between lists
-- | @O(n m)@. Converts the set to a list of the keys contained within, in
-- undefined order.
toList :: Map map a => TrieSet map a -> [[a]]
toList = Prelude.map fst . Base.toList . unTS
-- | @O(n m)@. Converts the set to a list of the keys contained within, in
-- ascending order.
toAscList :: OrdMap map a => TrieSet map a -> [[a]]
toAscList = Prelude.map fst . Base.toAscList . unTS
-- | @O(n m)@. Converts the set to a list of the keys contained within, in
-- descending order.
toDescList :: OrdMap map a => TrieSet map a -> [[a]]
toDescList = Prelude.map fst . Base.toDescList . unTS
-- | @O(n m)@. Creates a set from a list of keys.
fromList :: Map map a => [[a]] -> TrieSet map a
fromList = TS . Base.fromList . Prelude.map (flip (,) True)
-- * Ordering ops
-- | @O(m)@. Removes and returns the minimal key in the set. If the set is
-- empty, 'Nothing' and the original set are returned.
minView :: OrdMap map a => TrieSet map a -> (Maybe [a], TrieSet map a)
minView = (fmap fst *** TS) . Base.minView . unTS
-- | @O(m)@. Removes and returns the maximal key in the set. If the set is
-- empty, 'Nothing' and the original set are returned.
maxView :: OrdMap map a => TrieSet map a -> (Maybe [a], TrieSet map a)
maxView = (fmap fst *** TS) . Base.maxView . unTS
-- | @O(m)@. Like 'fst' composed with 'minView'. 'Just' the minimal key in the
-- set, or 'Nothing' if the set is empty.
findMin :: OrdMap map a => TrieSet map a -> Maybe [a]
findMin = fmap fst . Base.findMin . unTS
-- | @O(m)@. Like 'fst' composed with 'maxView'. 'Just' the maximal key in the
-- set, or 'Nothing' if the set is empty.
findMax :: OrdMap map a => TrieSet map a -> Maybe [a]
findMax = fmap fst . Base.findMax . unTS
-- | @O(m)@. Like 'snd' composed with 'minView'. The set without its minimal
-- key, or the unchanged original set if it was empty.
deleteMin :: OrdMap map a => TrieSet map a -> TrieSet map a
deleteMin = inTS Base.deleteMin
-- | @O(m)@. Like 'snd' composed with 'maxView'. The set without its maximal
-- key, or the unchanged original set if it was empty.
deleteMax :: OrdMap map a => TrieSet map a -> TrieSet map a
deleteMax = inTS Base.deleteMax
-- | @O(min(m,s))@. Splits the set in two about the given key. The first
-- element of the resulting pair is a set containing the keys lesser than the
-- given key; the second contains those keys that are greater.
split :: OrdMap map a => [a] -> TrieSet map a -> (TrieSet map a, TrieSet map a)
split = both TS .: Base.split .:. unTS
-- | @O(min(m,s))@. Like 'split', but also returns whether the given key was a
-- member of the set or not.
splitMember :: OrdMap map a
=> [a] -> TrieSet map a -> (TrieSet map a, Bool, TrieSet map a)
splitMember = (\(l,b,g) -> (TS l,unwrap b,TS g)) .: Base.splitLookup .:. unTS
-- | @O(m)@. 'Just' the key of the set which precedes the given key in order,
-- or 'Nothing' if the set is empty.
findPredecessor :: OrdMap map a => [a] -> TrieSet map a -> Maybe [a]
findPredecessor = fmap fst .: Base.findPredecessor .:. unTS
-- | @O(m)@. 'Just' the key of the set which succeeds the given key in order,
-- or 'Nothing' if the set is empty.
findSuccessor :: OrdMap map a => [a] -> TrieSet map a -> Maybe [a]
findSuccessor = fmap fst .: Base.findSuccessor .:. unTS
-- * Trie-only operations
-- | @O(s)@. Prepends the given key to all the keys of the set. For example:
--
-- > addPrefix "pre" (fromList ["a","b"]) == fromList ["prea","preb"]
addPrefix :: Map map a => [a] -> TrieSet map a -> TrieSet map a
addPrefix = TS .: Base.addPrefix .:. unTS
-- | @O(m)@. The set which contains all keys of which the given key is a
-- prefix, with the prefix removed from each key. If the given key is not a
-- prefix of any key in the set, an empty set is returned. For example:
--
-- > deletePrefix "a" (fromList ["a","ab","ac"]) == fromList ["","b","c"]
--
-- This function can be used, for instance, to reduce potentially expensive I/O
-- operations: if you need to check whether a string is a member of a set, but
-- you only have a prefix of it and retrieving the rest is an expensive
-- operation, calling 'deletePrefix' with what you have might allow you to
-- avoid the operation: if the resulting set is empty, the entire string cannot
-- be a member of the set.
deletePrefix :: Map map a => [a] -> TrieSet map a -> TrieSet map a
deletePrefix = TS .: Base.deletePrefix .:. unTS
-- | @O(1)@. A triple containing the longest common prefix of all keys in the
-- set, whether that prefix was a member of the set, and the set with that
-- prefix removed from all the keys as well as the set itself. Examples:
--
-- > splitPrefix (fromList ["a","b"]) == ("", False, fromList ["a","b"])
-- > splitPrefix (fromList ["a","ab","ac"]) == ("a", True, fromList ["b","c"])
splitPrefix :: Map map a => TrieSet map a -> ([a], Bool, TrieSet map a)
splitPrefix = (\(k,b,t) -> (k,unwrap b,TS t)) . Base.splitPrefix . unTS
-- | @O(1)@. The children of the longest common prefix in the trie as sets,
-- associated with their distinguishing key value. If the set contains less
-- than two keys, this function will return an empty map. Examples;
--
-- > children (fromList ["a","abc","abcd"])
-- > == Map.fromList [('b',fromList ["c","cd"])]
-- > children (fromList ["b","c"])
-- > == Map.fromList [('b',fromList [""]),('c',fromList [""])]
children :: Map map a => TrieSet map a -> map a (TrieSet map a)
children = Map.map TS . Base.children . unTS
-- | @O(1)@. The children of the first element of the longest common prefix in
-- the trie as sets, associated with their distinguishing key value. If the set
-- contains less than two keys, this function will return an empty map.
--
-- If the longest common prefix of all keys in the trie is the empty list, this
-- function is equivalent to 'children'. Otherwise, the result will always be a
-- single-element map.
--
-- Examples:
--
-- > children1 (fromList ["abc","abcd"])
-- > == Map.fromList [('a',fromList ["bc","bcd"])]
-- > children1 (fromList ["b","c"])
-- > == Map.fromList [('b',fromList [""]),('c',fromList [""])]
children1 :: Map map a => TrieSet map a -> map a (TrieSet map a)
children1 = Map.map TS . Base.children1 . unTS
-- * Visualization
-- | @O(n m)@. Displays the set's internal structure in an undefined way. That
-- is to say, no program should depend on the function's results.
showTrie :: (Show a, Map map a) => TrieSet map a -> ShowS
showTrie = Base.showTrieWith (\(Id b) -> showChar $ if b then 'X' else ' ')
. unTS