-- File created: 2008-11-08 19:22:07 {-# LANGUAGE CPP, MultiParamTypeClasses, FlexibleInstances , FlexibleContexts, UndecidableInstances #-} #include "exports.h" -- | The base implementation of a Patricia trie representing a set of lists, -- generalized over any type of map from element values to tries. -- -- Worst-case complexities are given in terms of @n@, @m@, and @s@. @n@ refers -- to the number of keys in the set and @m@ to their maximum length. @s@ refers -- to the length of a key given to the function, not any property of the set. -- -- In addition, the trie's branching factor plays a part in almost every -- operation, but the complexity depends on the underlying 'Map'. Thus, for -- instance, 'member' is actually @O(m f(b))@ where @f(b)@ is the complexity of -- a lookup operation on the 'Map' used. This complexity depends on the -- underlying operation, which is not part of the specification of the visible -- function. Thus it could change whilst affecting the complexity only for -- certain Map types: hence this \"b factor\" is not shown explicitly. -- -- Disclaimer: the complexities have not been proven. module Data.ListTrie.Patricia.Set (SET_EXPORTS) where import Control.Arrow ((***)) import Control.Monad (liftM3) import Data.Binary (Binary,get,put) import Data.Function (on) import qualified Data.List.NonEmpty as NE import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..), stimesIdempotent) import Prelude hiding (filter, foldl, foldr, map, null) import qualified Prelude #if __GLASGOW_HASKELL__ import Text.Read (readPrec, lexP, parens, prec, Lexeme(Ident)) #endif import qualified Data.ListTrie.Base.Map as Map import qualified Data.ListTrie.Patricia.Base as Base import Data.ListTrie.Base.Classes (Identity(..), Unwrappable(..)) import Data.ListTrie.Base.Map (Map, OrdMap) import Data.ListTrie.Util ((.:), (.:.), both) #include "docs.h" -- Invariant: any (Tr False _ _) has at least two children, all of which are -- True or have a True descendant. -- -- In order to avoid a lot of special casing it has to be the case that there's -- only one way to represent a given trie. The above property makes sure of -- that, so that, for instance, 'fromList ["foo"]' can only be 'Tr True "foo" -- Map.empty', and not 'Tr False "fo" (Map.fromList [('o',Tr True "" -- Map.empty)])'. Base.tryCompress is a function which takes care of this. -- -- This Base stuff is needed just as in the non-Patricia version. data TrieSetBase map a bool = Tr !bool ![a] !(CMap map a bool) type CMap map a bool = map a (TrieSetBase map a bool) -- | The data structure itself: a set of keys of type @[a]@ implemented as a -- trie, using @map@ to map keys of type @a@ to sub-tries. -- -- Regarding the instances: -- -- - The @CMap@ type is internal, ignore it. For 'Eq' and 'Ord' an 'Eq' -- instance is required: what this means is that @map a v@ is expected to be -- an instance of 'Eq', given 'Eq'@ v@. -- -- - The 'Eq' constraint for the 'Ord' instance is misleading: it is needed -- only because 'Eq' is a superclass of 'Ord'. -- -- - The 'Monoid' instance defines 'mappend' as 'union' and 'mempty' as -- 'empty'. newtype TrieSet map a = TS { unTS :: TrieSetBase map a Bool } inTS :: (TrieSetBase map a Bool -> TrieSetBase nap b Bool) -> (TrieSet map a -> TrieSet nap b) inTS f = TS . f . unTS instance Map map k => Base.Trie TrieSetBase Identity map k where mkTrie = Tr . unwrap tParts (Tr b p m) = (Id b,p,m) -- CMap contains TrieSetBase, not TrieSet, hence we must supply these instances -- for TrieSetBase first instance (Map map a, Eq (CMap map a Bool)) => Eq (TrieSetBase map a Bool) where Tr b1 p1 m1 == Tr b2 p2 m2 = b1 == b2 && Base.eqComparePrefixes (Map.eqCmp m1) p1 p2 && m1 == m2 instance (Eq (CMap map a Bool), OrdMap map a, Ord a) => Ord (TrieSetBase map a Bool) where compare = compare `on` Base.toAscList instance (Eq (CMap map a Bool), Map map a) => Eq (TrieSet map a) where (==) = (==) `on` unTS -- The CMap constraint is needed only because Eq is a superclass of Ord.... -- sigh instance (Eq (CMap map a Bool), OrdMap map a, Ord a) => Ord (TrieSet map a) where compare = compare `on` unTS instance Map map a => Semigroup (TrieSet map a) where (<>) = union sconcat = unions . NE.toList stimes = stimesIdempotent instance Map map a => Monoid (TrieSet map a) where mempty = empty mappend = (<>) mconcat = unions instance (Map map a, Show a) => Show (TrieSet map a) where showsPrec p s = showParen (p > 10) $ showString "fromList " . shows (toList s) instance (Map map a, Read a) => Read (TrieSet map a) where #if __GLASGOW_HASKELL__ readPrec = parens $ prec 10 $ do Ident "fromList" <- lexP fmap fromList readPrec #else readsPrec p = readParen (p > 10) $ \r -> do ("fromList", list) <- lex r (xs, rest) <- readsPrec (p+1) list [(fromList xs, rest)] #endif instance (Map map k, Binary k, Binary a) => Binary (TrieSetBase map k a) where put (Tr v k m) = put v >> put k >> (put . Map.serializeToList $ m) get = liftM3 Tr get get (get >>= return . Map.deserializeFromList) instance (Map map a, Binary a) => Binary (TrieSet map a) where put = put . unTS get = get >>= return . TS -- * Construction -- | @O(1)@. The empty set. empty :: Map map a => TrieSet map a empty = TS Base.empty -- | @O(1)@. The singleton set containing only the given key. singleton :: Map map a => [a] -> TrieSet map a singleton k = TS$ Base.singleton k True -- * Modification -- | @O(min(m,s))@. Inserts the key into the set. If the key is already a -- member of the set, the set is unchanged. insert :: Map map a => [a] -> TrieSet map a -> TrieSet map a insert k = inTS$ Base.insert k True -- | @O(min(m,s))@. Removes the key from the set. If the key is not a member of -- the set, the set is unchanged. delete :: Map map a => [a] -> TrieSet map a -> TrieSet map a delete = inTS . Base.delete -- * Querying -- | @O(1)@. 'True' iff the set is empty. null :: Map map a => TrieSet map a -> Bool null = Base.null . unTS -- | @O(n m)@. The number of keys in the set. The value is built up lazily, -- allowing for delivery of partial results without traversing the whole set. size :: (Map map a, Num n) => TrieSet map a -> n size = Base.size . unTS -- | @O(n m)@. The number of keys in the set. The value is built strictly: no -- value is returned until the set has been fully traversed. size' :: (Map map a, Num n) => TrieSet map a -> n size' = Base.size' . unTS -- | @O(min(m,s))@. 'True' iff the given key is contained within the set. member :: Map map a => [a] -> TrieSet map a -> Bool member = Base.member .:. unTS -- | @O(min(m,s))@. 'False' iff the given key is contained within the set. notMember :: Map map a => [a] -> TrieSet map a -> Bool notMember = Base.notMember .:. unTS -- | @O(min(n1 m1,n2 m2))@. 'True' iff the first set is a subset of the second, -- i.e. all keys that are members of the first set are also members of the -- second set. isSubsetOf :: Map map a => TrieSet map a -> TrieSet map a -> Bool isSubsetOf = Base.isSubmapOfBy (&&) `on` unTS -- | @O(min(n1 m1,n2 m2))@. 'True' iff the first set is a proper subset of the -- second, i.e. the first is a subset of the second, but the sets are not -- equal. isProperSubsetOf :: Map map a => TrieSet map a -> TrieSet map a -> Bool isProperSubsetOf = Base.isProperSubmapOfBy (&&) `on` unTS -- * Combination defaultUnion :: Bool -> Bool -> Bool defaultUnion = error "TrieSet.union :: internal error" -- | @O(min(n1 m1,n2 m2))@. The union of the two sets: the set which contains -- all keys that are members of either set. -- -- The worst-case performance occurs when the two sets are identical. union :: Map map a => TrieSet map a -> TrieSet map a -> TrieSet map a union = TS .: Base.unionWith defaultUnion `on` unTS -- | @O(sum(n))@. The union of all the sets: the set which contains all keys -- that are members of any of the sets. -- -- The worst-case performance occurs when all the sets are identical. unions :: Map map a => [TrieSet map a] -> TrieSet map a unions = TS . Base.unionsWith defaultUnion . Prelude.map unTS -- | @O(min(n1 m1,n2 m2))@. The difference of the two sets: the set which -- contains all keys that are members of the first set and not members of the -- second set. -- -- The worst-case performance occurs when the two sets are identical. difference :: Map map a => TrieSet map a -> TrieSet map a -> TrieSet map a difference = TS .: Base.differenceWith (error "TrieSet.difference :: internal error") `on` unTS -- | @O(min(n1 m1,n2 m2))@. The intersection of the two sets: the set which -- contains all keys that are members of both sets. -- -- The worst-case performance occurs when the two sets are identical. intersection :: Map map a => TrieSet map a -> TrieSet map a -> TrieSet map a intersection = TS .: Base.intersectionWith (error "TrieSet.intersection :: internal error") `on` unTS -- * Filtering -- | @O(n m)@. The set of those keys in the set for which the given predicate -- returns 'True'. filter :: Map map a => ([a] -> Bool) -> TrieSet map a -> TrieSet map a filter p = inTS $ Base.filterWithKey (\k _ -> p k) -- | @O(n m)@. A pair of sets: the first element contains those keys for which -- the given predicate returns 'True', and the second element contains those -- for which it was 'False'. partition :: Map map a => ([a] -> Bool) -> TrieSet map a -> (TrieSet map a, TrieSet map a) partition p = both TS . Base.partitionWithKey (\k _ -> p k) . unTS -- * Mapping -- | @O(n m)@. Apply the given function to all the keys in the set. map :: (Map map a, Map map b) => ([a] -> [b]) -> TrieSet map a -> TrieSet map b map = inTS . Base.mapKeysWith Base.fromList -- | @O(n m)@. Apply the given function to the contents of all the keys in the -- set. mapIn :: (Map map a, Map map b) => (a -> b) -> TrieSet map a -> TrieSet map b mapIn = inTS . Base.mapInKeysWith defaultUnion -- * Folding -- | @O(n m)@. Equivalent to a list @foldr@ on the 'toList' representation. foldr :: Map map a => ([a] -> b -> b) -> b -> TrieSet map a -> b foldr f = Base.foldrWithKey (\k _ -> f k) .:. unTS -- | @O(n m)@. Equivalent to a list @foldr@ on the 'toAscList' representation. foldrAsc :: OrdMap map a => ([a] -> b -> b) -> b -> TrieSet map a -> b foldrAsc f = Base.foldrAscWithKey (\k _ -> f k) .:. unTS -- | @O(n m)@. Equivalent to a list @foldr@ on the 'toDescList' representation. foldrDesc :: OrdMap map a => ([a] -> b -> b) -> b -> TrieSet map a -> b foldrDesc f = Base.foldrDescWithKey (\k _ -> f k) .:. unTS -- | @O(n m)@. Equivalent to a list @foldl@ on the 'toList' representation. foldl :: Map map a => ([a] -> b -> b) -> b -> TrieSet map a -> b foldl f = Base.foldlWithKey (\k _ -> f k) .:. unTS -- | @O(n m)@. Equivalent to a list @foldl@ on the 'toAscList' representation. foldlAsc :: OrdMap map a => ([a] -> b -> b) -> b -> TrieSet map a -> b foldlAsc f = Base.foldlAscWithKey (\k _ -> f k) .:. unTS -- | @O(n m)@. Equivalent to a list @foldl@ on the 'toDescList' representation. foldlDesc :: OrdMap map a => ([a] -> b -> b) -> b -> TrieSet map a -> b foldlDesc f = Base.foldlDescWithKey (\k _ -> f k) .:. unTS -- | @O(n m)@. Equivalent to a list @foldl'@ on the 'toList' representation. foldl' :: Map map a => ([a] -> b -> b) -> b -> TrieSet map a -> b foldl' f = Base.foldlWithKey' (\k _ -> f k) .:. unTS -- | @O(n m)@. Equivalent to a list @foldl'@ on the 'toAscList' representation. foldlAsc' :: OrdMap map a => ([a] -> b -> b) -> b -> TrieSet map a -> b foldlAsc' f = Base.foldlAscWithKey' (\k _ -> f k) .:. unTS -- | @O(n m)@. Equivalent to a list @foldl'@ on the 'toDescList' -- representation. foldlDesc' :: OrdMap map a => ([a] -> b -> b) -> b -> TrieSet map a -> b foldlDesc' f = Base.foldlDescWithKey' (\k _ -> f k) .:. unTS -- * Conversion between lists -- | @O(n m)@. Converts the set to a list of the keys contained within, in -- undefined order. toList :: Map map a => TrieSet map a -> [[a]] toList = Prelude.map fst . Base.toList . unTS -- | @O(n m)@. Converts the set to a list of the keys contained within, in -- ascending order. toAscList :: OrdMap map a => TrieSet map a -> [[a]] toAscList = Prelude.map fst . Base.toAscList . unTS -- | @O(n m)@. Converts the set to a list of the keys contained within, in -- descending order. toDescList :: OrdMap map a => TrieSet map a -> [[a]] toDescList = Prelude.map fst . Base.toDescList . unTS -- | @O(n m)@. Creates a set from a list of keys. fromList :: Map map a => [[a]] -> TrieSet map a fromList = TS . Base.fromList . Prelude.map (flip (,) True) -- * Ordering ops -- | @O(m)@. Removes and returns the minimal key in the set. If the set is -- empty, 'Nothing' and the original set are returned. minView :: OrdMap map a => TrieSet map a -> (Maybe [a], TrieSet map a) minView = (fmap fst *** TS) . Base.minView . unTS -- | @O(m)@. Removes and returns the maximal key in the set. If the set is -- empty, 'Nothing' and the original set are returned. maxView :: OrdMap map a => TrieSet map a -> (Maybe [a], TrieSet map a) maxView = (fmap fst *** TS) . Base.maxView . unTS -- | @O(m)@. Like 'fst' composed with 'minView'. 'Just' the minimal key in the -- set, or 'Nothing' if the set is empty. findMin :: OrdMap map a => TrieSet map a -> Maybe [a] findMin = fmap fst . Base.findMin . unTS -- | @O(m)@. Like 'fst' composed with 'maxView'. 'Just' the maximal key in the -- set, or 'Nothing' if the set is empty. findMax :: OrdMap map a => TrieSet map a -> Maybe [a] findMax = fmap fst . Base.findMax . unTS -- | @O(m)@. Like 'snd' composed with 'minView'. The set without its minimal -- key, or the unchanged original set if it was empty. deleteMin :: OrdMap map a => TrieSet map a -> TrieSet map a deleteMin = inTS Base.deleteMin -- | @O(m)@. Like 'snd' composed with 'maxView'. The set without its maximal -- key, or the unchanged original set if it was empty. deleteMax :: OrdMap map a => TrieSet map a -> TrieSet map a deleteMax = inTS Base.deleteMax -- | @O(min(m,s))@. Splits the set in two about the given key. The first -- element of the resulting pair is a set containing the keys lesser than the -- given key; the second contains those keys that are greater. split :: OrdMap map a => [a] -> TrieSet map a -> (TrieSet map a, TrieSet map a) split = both TS .: Base.split .:. unTS -- | @O(min(m,s))@. Like 'split', but also returns whether the given key was a -- member of the set or not. splitMember :: OrdMap map a => [a] -> TrieSet map a -> (TrieSet map a, Bool, TrieSet map a) splitMember = (\(l,b,g) -> (TS l,unwrap b,TS g)) .: Base.splitLookup .:. unTS -- | @O(m)@. 'Just' the key of the set which precedes the given key in order, -- or 'Nothing' if the set is empty. findPredecessor :: OrdMap map a => [a] -> TrieSet map a -> Maybe [a] findPredecessor = fmap fst .: Base.findPredecessor .:. unTS -- | @O(m)@. 'Just' the key of the set which succeeds the given key in order, -- or 'Nothing' if the set is empty. findSuccessor :: OrdMap map a => [a] -> TrieSet map a -> Maybe [a] findSuccessor = fmap fst .: Base.findSuccessor .:. unTS -- * Trie-only operations -- | @O(s)@. The set which contains all keys of which the given key is a -- prefix. For example: -- -- > lookupPrefix "ab" (fromList ["a","ab","ac","abc"]) -- > == fromList ["ab","abc"] lookupPrefix :: Map map a => [a] -> TrieSet map a -> TrieSet map a lookupPrefix = TS .: Base.lookupPrefix .:. unTS -- | @O(s)@. Prepends the given key to all the keys of the set. For example: -- -- > addPrefix "pre" (fromList ["a","b"]) == fromList ["prea","preb"] addPrefix :: Map map a => [a] -> TrieSet map a -> TrieSet map a addPrefix = TS .: Base.addPrefix .:. unTS -- | @O(s)@. The set which contains all keys of which the given key is a -- prefix, with the prefix removed from each key. If the given key is not a -- prefix of any key in the set, an empty set is returned. For example: -- -- > deletePrefix "a" (fromList ["a","ab","ac"]) == fromList ["","b","c"] -- -- This function can be used, for instance, to reduce potentially expensive I/O -- operations: if you need to check whether a string is a member of a set, but -- you only have a prefix of it and retrieving the rest is an expensive -- operation, calling 'deletePrefix' with what you have might allow you to -- avoid the operation: if the resulting set is empty, the entire string cannot -- be a member of the set. deletePrefix :: Map map a => [a] -> TrieSet map a -> TrieSet map a deletePrefix = TS .: Base.deletePrefix .:. unTS -- | @O(s)@. Deletes all keys which are suffixes of the given key. For example: -- -- > deleteSuffixes "ab" (fromList $ zip ["a","ab","ac","b","abc"] [1..]) -- > == fromList [("a",1),("ac",3),("b",4)] deleteSuffixes :: Map map a => [a] -> TrieSet map a -> TrieSet map a deleteSuffixes = TS .: Base.deleteSuffixes .:. unTS -- | @O(1)@. A triple containing the longest common prefix of all keys in the -- set, whether that prefix was a member of the set, and the set with that -- prefix removed from all the keys as well as the set itself. Examples: -- -- > splitPrefix (fromList ["a","b"]) == ("", False, fromList ["a","b"]) -- > splitPrefix (fromList ["a","ab","ac"]) == ("a", True, fromList ["b","c"]) splitPrefix :: Map map a => TrieSet map a -> ([a], Bool, TrieSet map a) splitPrefix = (\(k,b,t) -> (k,unwrap b,TS t)) . Base.splitPrefix . unTS -- | @O(1)@. The children of the longest common prefix in the trie as sets, -- associated with their distinguishing key value. If the set contains less -- than two keys, this function will return an empty map. Examples; -- -- > children (fromList ["a","abc","abcd"]) -- > == Map.fromList [('b',fromList ["c","cd"])] -- > children (fromList ["b","c"]) -- > == Map.fromList [('b',fromList [""]),('c',fromList [""])] children :: Map map a => TrieSet map a -> map a (TrieSet map a) children = Map.map TS . Base.children . unTS -- | @O(1)@. The children of the first element of the longest common prefix in -- the trie as sets, associated with their distinguishing key value. If the set -- contains less than two keys, this function will return an empty map. -- -- If the longest common prefix of all keys in the trie is the empty list, this -- function is equivalent to 'children'. -- -- Examples: -- -- > children1 (fromList ["abc","abcd"]) -- > == Map.fromList [('a',fromList ["bc","bcd"])] -- > children1 (fromList ["b","c"]) -- > == Map.fromList [('b',fromList [""]),('c',fromList [""])] children1 :: Map map a => TrieSet map a -> map a (TrieSet map a) children1 = Map.map TS . Base.children1 . unTS -- * Visualization -- | @O(n m)@. Displays the set's internal structure in an undefined way. That -- is to say, no program should depend on the function's results. showTrie :: (Show a, Map map a) => TrieSet map a -> ShowS showTrie = Base.showTrieWith (\(Id b) -> showChar $ if b then 'X' else ' ') . unTS