| 1 | -- ---------------------------------------------------------------------------- |
|---|
| 2 | |
|---|
| 3 | {- | |
|---|
| 4 | Module : Holumbus.Data.Trie |
|---|
| 5 | Copyright : Copyright (C) 2007 Timo B. Huebel |
|---|
| 6 | License : MIT |
|---|
| 7 | |
|---|
| 8 | Maintainer : Timo B. Huebel (t.h@gmx.info) |
|---|
| 9 | Stability : experimental |
|---|
| 10 | Portability: portable |
|---|
| 11 | Version : 0.5 |
|---|
| 12 | |
|---|
| 13 | An efficient implementation of maps from arbitrary byte key to arbitrary values. |
|---|
| 14 | |
|---|
| 15 | Values can associated with an arbitrary byte key. Searching for keys is very fast, but |
|---|
| 16 | the trie probably consumes more memory than "Data.Map". The main differences are the special |
|---|
| 17 | 'prefixFind' functions, which can be used to perform prefix queries. |
|---|
| 18 | |
|---|
| 19 | Most other function names clash with "Prelude" names, therefore this module is usually |
|---|
| 20 | imported @qualified@, e.g. |
|---|
| 21 | |
|---|
| 22 | > import Holumbus.Data.Trie (Trie) |
|---|
| 23 | > import qualified Holumbus.Data.Trie as T |
|---|
| 24 | |
|---|
| 25 | See also |
|---|
| 26 | |
|---|
| 27 | * Donald R. Morrison, |
|---|
| 28 | \"/PATRICIA - Practical Algorithm To Retrieve Information Coded In Alphanumeric/\", |
|---|
| 29 | Journal of the ACM, 15 (4), 1968, pages 514-534. |
|---|
| 30 | |
|---|
| 31 | Many functions have a worst-case complexity of /O(min(n,L))/. This means that the operation |
|---|
| 32 | can become linear with the number of elements with a maximum of /L/, the length of the |
|---|
| 33 | key (the number of bytes in the list). The functions for searching a prefix have a worst-case |
|---|
| 34 | complexity of /O(max(L,R))/. This means that the operation can become linear with |
|---|
| 35 | /R/, the number of elements found for the prefix, with a minimum of /L/. |
|---|
| 36 | |
|---|
| 37 | The module is completely exported for ultimate flexibility. Derived modules should only |
|---|
| 38 | export a restricted interface (as shown in "Holumbus.Data.StrMap"). |
|---|
| 39 | |
|---|
| 40 | -} |
|---|
| 41 | |
|---|
| 42 | -- ---------------------------------------------------------------------------- |
|---|
| 43 | |
|---|
| 44 | {-# OPTIONS -fglasgow-exts #-} |
|---|
| 45 | |
|---|
| 46 | module Holumbus.Data.Trie where |
|---|
| 47 | |
|---|
| 48 | import Prelude hiding (succ, lookup, map, null) |
|---|
| 49 | |
|---|
| 50 | import Data.Maybe |
|---|
| 51 | import Data.Char |
|---|
| 52 | import Data.Binary |
|---|
| 53 | import Data.Word |
|---|
| 54 | |
|---|
| 55 | import Control.Monad |
|---|
| 56 | |
|---|
| 57 | import Data.Foldable (Foldable) |
|---|
| 58 | import qualified Data.Foldable as F |
|---|
| 59 | |
|---|
| 60 | import qualified Data.List as L |
|---|
| 61 | import qualified Data.Map as M |
|---|
| 62 | |
|---|
| 63 | import Control.Parallel.Strategies |
|---|
| 64 | |
|---|
| 65 | -- | A map from arbitrary byte keys to values a. |
|---|
| 66 | data Trie a |
|---|
| 67 | = End !Key !a ![Trie a] |
|---|
| 68 | | Seq !Key ![Trie a] |
|---|
| 69 | |
|---|
| 70 | -- | The key type. |
|---|
| 71 | type Key = [Word8] |
|---|
| 72 | |
|---|
| 73 | -- Just deriving Eq will not work, because equality on the lists of successors takes the order |
|---|
| 74 | -- into account, whereas the order does not matter here. |
|---|
| 75 | instance Eq a => Eq (Trie a) where |
|---|
| 76 | (==) (End k1 v1 s1) (End k2 v2 s2) = k1 == k2 && v1 == v2 && s1 L.\\ s2 == [] |
|---|
| 77 | (==) (Seq k1 s1) (Seq k2 s2) = k1 == k2 && s1 L.\\ s2 == [] |
|---|
| 78 | (==) (Seq _ _) (End _ _ _) = False |
|---|
| 79 | (==) (End _ _ _) (Seq _ _) = False |
|---|
| 80 | (/=) m1 m2 = not (m1 == m2) |
|---|
| 81 | |
|---|
| 82 | -- Compare based on to-/fromList |
|---|
| 83 | instance Ord a => Ord (Trie a) where |
|---|
| 84 | compare m1 m2 = compare (toList m1) (toList m2) |
|---|
| 85 | |
|---|
| 86 | -- Simple instance of Functor. |
|---|
| 87 | instance Functor Trie where |
|---|
| 88 | fmap = map |
|---|
| 89 | |
|---|
| 90 | -- Simple instance of Data.Foldable |
|---|
| 91 | instance Foldable Trie where |
|---|
| 92 | foldr = fold |
|---|
| 93 | |
|---|
| 94 | -- Stolen from Data.IntMap |
|---|
| 95 | instance Show a => Show (Trie a) where |
|---|
| 96 | showsPrec d m = showParen (d > 10) $ |
|---|
| 97 | showString "fromList " . shows (toList m) |
|---|
| 98 | |
|---|
| 99 | -- Stolen from Data.IntMap |
|---|
| 100 | instance Read a => Read (Trie a) where |
|---|
| 101 | readsPrec p = readParen (p > 10) $ \ r -> do |
|---|
| 102 | ("fromList",s) <- lex r |
|---|
| 103 | (xs,t) <- reads s |
|---|
| 104 | return (fromList xs,t) |
|---|
| 105 | |
|---|
| 106 | -- Providing strict evaluation for 'StrMap'. |
|---|
| 107 | instance NFData a => NFData (Trie a) where |
|---|
| 108 | rnf (End k v t) = rnf k `seq` rnf v `seq` rnf t |
|---|
| 109 | rnf (Seq k t) = rnf k `seq` rnf t |
|---|
| 110 | |
|---|
| 111 | -- Provide native binary serialization (not via to-/fromList). |
|---|
| 112 | instance (Binary a) => Binary (Trie a) where |
|---|
| 113 | put (End k v t) = put (0 :: Word8) >> put k >> put v >> put t |
|---|
| 114 | put (Seq k t) = put (1 :: Word8) >> put k >> put t |
|---|
| 115 | |
|---|
| 116 | get = do tag <- getWord8 |
|---|
| 117 | case tag of |
|---|
| 118 | 0 -> liftM3 End get get get |
|---|
| 119 | 1 -> liftM2 Seq get get |
|---|
| 120 | _ -> fail "Trie.get: error while decoding StrMap" |
|---|
| 121 | |
|---|
| 122 | -- | /O(1)/ Create an empty trie. |
|---|
| 123 | empty :: Trie a |
|---|
| 124 | empty = Seq [] [] |
|---|
| 125 | |
|---|
| 126 | -- | /O(1)/ Is the map empty? |
|---|
| 127 | null :: Trie a -> Bool |
|---|
| 128 | null (Seq _ []) = True |
|---|
| 129 | null (Seq _ (_:_)) = False |
|---|
| 130 | null (End _ _ _) = error "Trie.null: root node should be Seq" |
|---|
| 131 | |
|---|
| 132 | -- | /O(1)/ Create a map with a single element. |
|---|
| 133 | singleton :: Key -> a -> Trie a |
|---|
| 134 | singleton k v = Seq [] [End k v []] |
|---|
| 135 | |
|---|
| 136 | -- | /O(1)/ Extract the key of a node |
|---|
| 137 | key :: Trie a -> Key |
|---|
| 138 | key (End k _ _) = k |
|---|
| 139 | key (Seq k _) = k |
|---|
| 140 | |
|---|
| 141 | -- | /O(1)/ Extract the value of a node (if there is one) |
|---|
| 142 | value :: Monad m => Trie a -> m a |
|---|
| 143 | value (End _ v _) = return v |
|---|
| 144 | value (Seq _ _) = fail "Trie.value: no value at this node" |
|---|
| 145 | |
|---|
| 146 | -- | /O(1)/ Extract the value of a node or return a default value if no value exists. |
|---|
| 147 | valueWithDefault :: a -> Trie a -> a |
|---|
| 148 | valueWithDefault _ (End _ v _) = v |
|---|
| 149 | valueWithDefault d (Seq _ _) = d |
|---|
| 150 | |
|---|
| 151 | -- | /O(1)/ Extract the successors of a node |
|---|
| 152 | succ :: Trie a -> [Trie a] |
|---|
| 153 | succ (End _ _ t) = t |
|---|
| 154 | succ (Seq _ t) = t |
|---|
| 155 | |
|---|
| 156 | -- | /O(1)/ Sets the key of a node. |
|---|
| 157 | setKey :: Key -> Trie a -> Trie a |
|---|
| 158 | setKey k (End _ v t) = End k v t |
|---|
| 159 | setKey k (Seq _ t) = Seq k t |
|---|
| 160 | |
|---|
| 161 | -- | /O(1)/ Sets the successors of a node. |
|---|
| 162 | setSucc :: [Trie a] -> Trie a -> Trie a |
|---|
| 163 | setSucc t (End k v _) = End k v t |
|---|
| 164 | setSucc t (Seq k _) = Seq k t |
|---|
| 165 | |
|---|
| 166 | -- | /O(min(n,L))/ Find the value at a key. Calls error when the element can not be found. |
|---|
| 167 | (!) :: Trie a -> Key -> a |
|---|
| 168 | (!) m k = if isNothing r then error ("Trie.!: element not in the map") |
|---|
| 169 | else fromJust r |
|---|
| 170 | where r = lookup k m |
|---|
| 171 | |
|---|
| 172 | -- | /O(min(n,L))/ Is the key a member of the map? |
|---|
| 173 | member :: Key -> Trie a -> Bool |
|---|
| 174 | member k m = maybe False (\_ -> True) (lookup k m) |
|---|
| 175 | |
|---|
| 176 | -- | /O(min(n,L))/ Delete an element from the map. If no element exists for the key, the map |
|---|
| 177 | -- remains unchanged. |
|---|
| 178 | delete :: Key -> Trie a -> Trie a |
|---|
| 179 | delete = (fromMaybe empty .) . delete' |
|---|
| 180 | |
|---|
| 181 | -- | The internal delete function. |
|---|
| 182 | delete' :: Key -> Trie a -> Maybe (Trie a) |
|---|
| 183 | delete' d n | L.null dr && L.null kr = deleteNode n |
|---|
| 184 | | not (L.null dr) && L.null kr = Just (mergeNode n (mapMaybe (delete' dr) (succ n))) |
|---|
| 185 | | otherwise = Just n |
|---|
| 186 | where (_, dr, kr) = split d (key n) |
|---|
| 187 | |
|---|
| 188 | -- | Merge a node with its successor if only one successor is left. |
|---|
| 189 | mergeNode :: Trie a -> [Trie a] -> Trie a |
|---|
| 190 | mergeNode (End k v _) t = End k v t |
|---|
| 191 | mergeNode (Seq k _) [t] = if not (L.null k) then setKey (k ++ (key t)) t else Seq k [t] |
|---|
| 192 | mergeNode (Seq k _) t = Seq k t |
|---|
| 193 | |
|---|
| 194 | -- | Delete a node by either merging it with its successors or removing it completely. |
|---|
| 195 | deleteNode :: Trie a -> Maybe (Trie a) |
|---|
| 196 | deleteNode (End _ _ []) = Nothing |
|---|
| 197 | deleteNode (End k _ [t]) = Just (setKey (k ++ key t) t) |
|---|
| 198 | deleteNode (End k _ t) = Just (Seq k t) |
|---|
| 199 | deleteNode n = Just n |
|---|
| 200 | |
|---|
| 201 | -- | /O(min(n,L))/ Insert with a combining function. If the key is already present in the map, |
|---|
| 202 | -- the value of @f key new_value old_value@ will be inserted. |
|---|
| 203 | insertWithKey :: (Key -> a -> a -> a) -> Key -> a -> Trie a -> Trie a |
|---|
| 204 | insertWithKey f nk nv n = insert' f nk nv nk n |
|---|
| 205 | |
|---|
| 206 | -- | /O(min(n,L))/ Insert with a combining function. If the key is already present in the map, |
|---|
| 207 | -- the value of @f new_value old_value@ will be inserted. |
|---|
| 208 | insertWith :: (a -> a -> a) -> Key -> a -> Trie a -> Trie a |
|---|
| 209 | insertWith f nk nv n = insert' (\_ new old -> f new old) nk nv nk n |
|---|
| 210 | |
|---|
| 211 | -- | /O(min(n,L))/ Insert a new key and value into the map. If the key is already present in |
|---|
| 212 | -- the map, the associated value will be replaced with the new value. |
|---|
| 213 | insert :: Key -> a -> Trie a -> Trie a |
|---|
| 214 | insert nk nv n = insertWith const nk nv n |
|---|
| 215 | |
|---|
| 216 | -- | The internal insert function which does the real work. The original new key has to |
|---|
| 217 | -- be put through because otherwise it will be shortened on every recursive call. |
|---|
| 218 | insert' :: (Key -> a -> a -> a) -> Key -> a -> Key -> Trie a -> Trie a |
|---|
| 219 | insert' f nk nv ok n | L.null nk = error "Empty key!" |
|---|
| 220 | -- Key already exists, the current value will be replaced with the new value. |
|---|
| 221 | | L.null cr && L.null nr = End s (maybe nv (f ok nv) (value n)) (succ n) |
|---|
| 222 | -- Insert into list of successors. |
|---|
| 223 | | L.null cr && not (L.null nr) = setSucc (insertSub f nr nv ok (succ n)) n |
|---|
| 224 | -- New intermediate End node with the new value and the current node with the |
|---|
| 225 | -- remainder of the key as successor. |
|---|
| 226 | | L.null nr && not (L.null cr) = End s nv [setKey cr n] |
|---|
| 227 | -- New intermediate Seq node which shares the prefix of the new key and the |
|---|
| 228 | -- key of the current node. |
|---|
| 229 | | otherwise = Seq s [setKey cr n, (End nr nv [])] |
|---|
| 230 | where (s, nr, cr) = split nk (key n) |
|---|
| 231 | |
|---|
| 232 | -- | Internal support function for insert which searches the correct successor to insert into |
|---|
| 233 | -- within a list of nodes (the successors of the current node, see call in insert' above). |
|---|
| 234 | insertSub :: (Key -> a -> a -> a) -> Key -> a -> Key -> [Trie a] -> [Trie a] |
|---|
| 235 | insertSub f k v o t = insertSub' f k v t [] |
|---|
| 236 | where |
|---|
| 237 | insertSub' :: (Key -> a -> a -> a) -> Key -> a -> [Trie a] -> [Trie a] -> [Trie a] |
|---|
| 238 | insertSub' _ nk nv [] r = (End nk nv []):r |
|---|
| 239 | insertSub' cf nk nv (x:xs) r = if head (key x) == head nk then (insert' cf nk nv o x):r ++ xs else |
|---|
| 240 | insertSub' cf nk nv xs (x:r) |
|---|
| 241 | |
|---|
| 242 | -- | Analyses two strings and splits them into three parts: A common prefix and both reminders |
|---|
| 243 | splitBy :: (Key -> Key) -> Key -> Key -> (Key, Key, Key) |
|---|
| 244 | splitBy f a b = splitBy' (f a) (f b) ([], [], []) |
|---|
| 245 | where |
|---|
| 246 | splitBy' :: Key -> Key -> (Key, Key, Key) -> (Key, Key, Key) |
|---|
| 247 | splitBy' n [] (p, nr, hr) = (p, nr ++ n, hr) |
|---|
| 248 | splitBy' [] h (p, nr, hr) = (p, nr, hr ++ h) |
|---|
| 249 | splitBy' (n:ns) (h:hs) (p, nr, hr) = if n == h then splitBy' ns hs (p ++ [n], nr, hr) |
|---|
| 250 | else (p, n:ns, h:hs) |
|---|
| 251 | |
|---|
| 252 | -- | Simple split without any preprocessing. |
|---|
| 253 | split :: Key -> Key -> (Key, Key, Key) |
|---|
| 254 | split = splitBy id |
|---|
| 255 | |
|---|
| 256 | -- | /O(n)/ Returns all values. |
|---|
| 257 | elems :: Trie a -> [a] |
|---|
| 258 | elems t = L.map snd (toList t) |
|---|
| 259 | |
|---|
| 260 | -- | /O(n)/ Creates a trie from a list of key\/value pairs. |
|---|
| 261 | fromList :: [(Key, a)] -> Trie a |
|---|
| 262 | fromList xs = L.foldl' (\p (k, v) -> insert k v p) empty xs |
|---|
| 263 | |
|---|
| 264 | -- | /O(n)/ Returns all elements as list of key value pairs, |
|---|
| 265 | toList :: Trie a -> [(Key, a)] |
|---|
| 266 | toList = foldWithKey (\k v r -> (k, v):r) [] |
|---|
| 267 | |
|---|
| 268 | -- | /O(n)/ The number of elements. |
|---|
| 269 | size :: Trie a -> Int |
|---|
| 270 | size = fold (\_ r -> r + 1) 0 |
|---|
| 271 | |
|---|
| 272 | -- | /O(max(L,R))/ Find all values where the string is a prefix of the key. |
|---|
| 273 | prefixFind :: Key -> Trie a -> [a] |
|---|
| 274 | prefixFind q n = L.map snd (prefixFindInternal split q n) |
|---|
| 275 | |
|---|
| 276 | -- | /O(max(L,R))/ Find all values where the string is a prefix of the key and include the keys |
|---|
| 277 | -- in the result. |
|---|
| 278 | prefixFindWithKey :: Key -> Trie a -> [(Key, a)] |
|---|
| 279 | prefixFindWithKey = prefixFindInternal split |
|---|
| 280 | |
|---|
| 281 | -- | /O(max(L,R))/ Same as 'prefixFind', but preprocesses the search key and every |
|---|
| 282 | -- key in the map with @f@ before comparison. |
|---|
| 283 | prefixFindBy :: (Key -> Key) -> Key -> Trie a -> [a] |
|---|
| 284 | prefixFindBy f q n = L.map snd (prefixFindInternal (splitBy f) q n) |
|---|
| 285 | |
|---|
| 286 | -- | /O(max(L,R))/ Same as 'prefixFindWithKey', but preprocesses the search key and every |
|---|
| 287 | -- key in the map with @f@ before comparison. |
|---|
| 288 | prefixFindWithKeyBy :: (Key -> Key) -> Key -> Trie a -> [(Key, a)] |
|---|
| 289 | prefixFindWithKeyBy f = prefixFindInternal (splitBy f) |
|---|
| 290 | |
|---|
| 291 | -- | Internal prefix find function which is used to implement every other prefix find function. |
|---|
| 292 | prefixFindInternal :: (Key -> Key -> (Key, Key, Key)) -> Key -> Trie a -> [(Key, a)] |
|---|
| 293 | prefixFindInternal f = prefixFindInternal' f [] |
|---|
| 294 | where |
|---|
| 295 | prefixFindInternal' sf a p n | L.null pr = L.map (\(k, v) -> (a ++ k, v)) (toList n) |
|---|
| 296 | | L.null kr = concat (L.map (prefixFindInternal' sf (a ++ (key n)) pr) (succ n)) |
|---|
| 297 | | otherwise = [] |
|---|
| 298 | where (_, pr, kr) = sf p (key n) |
|---|
| 299 | |
|---|
| 300 | -- | /O(min(n,L))/ Find the value associated with a key. The function will @return@ the result in |
|---|
| 301 | -- the monad or @fail@ in it if the key isn't in the map. |
|---|
| 302 | lookup :: Monad m => Key -> Trie a -> m a |
|---|
| 303 | lookup q n = case lookup' q n of |
|---|
| 304 | Just v -> return v |
|---|
| 305 | Nothing -> fail "Trie.lookup: Key not found" |
|---|
| 306 | |
|---|
| 307 | -- | Internal lookup function which is generalised for arbitrary monads above. |
|---|
| 308 | lookup' :: Key -> Trie a -> Maybe a |
|---|
| 309 | lookup' q n | L.null pr = if L.null kr then value n else Nothing |
|---|
| 310 | | L.null kr = let xs = (filter isJust (L.map (lookup pr) (succ n))) in |
|---|
| 311 | if L.null xs then Nothing else head xs |
|---|
| 312 | | otherwise = Nothing |
|---|
| 313 | where (_, pr, kr) = split q (key n) |
|---|
| 314 | |
|---|
| 315 | -- | /O(max(L,R))/ Same as 'lookup', but preprocesses the search key and every |
|---|
| 316 | -- key in the map with @f@ before comparison. |
|---|
| 317 | lookupBy :: (Key -> Key) -> Key -> Trie a -> [a] |
|---|
| 318 | lookupBy f q n | L.null pr = if L.null kr then maybeToList (value n) else [] |
|---|
| 319 | | L.null kr = concat (L.map (lookupBy f pr) (succ n)) |
|---|
| 320 | | otherwise = [] |
|---|
| 321 | where (_, pr, kr) = splitBy f q (key n) |
|---|
| 322 | |
|---|
| 323 | -- | /O(min(n,L))/ Find the value associated with a key or return a default value if nothing |
|---|
| 324 | -- was found. |
|---|
| 325 | findWithDefault :: a -> Key -> Trie a -> a |
|---|
| 326 | findWithDefault d q n = maybe d id (lookup q n) |
|---|
| 327 | |
|---|
| 328 | -- | /O(n)/ Fold over all key\/value pairs in the map. |
|---|
| 329 | foldWithKey :: (Key -> a -> b -> b) -> b -> Trie a -> b |
|---|
| 330 | foldWithKey f n m = fold' [] m n |
|---|
| 331 | where |
|---|
| 332 | fold' ck (End k v t) r = let nk = ck ++ k in foldr (fold' nk) (f nk v r) t |
|---|
| 333 | fold' ck (Seq k t) r = let nk = ck ++ k in foldr (fold' nk) r t |
|---|
| 334 | |
|---|
| 335 | -- | /O(n)/ Fold over all values in the map. |
|---|
| 336 | fold :: (a -> b -> b) -> b -> Trie a -> b |
|---|
| 337 | fold f = foldWithKey (\_ v r -> f v r) |
|---|
| 338 | |
|---|
| 339 | -- | /O(n)/ Map over all key\/value pairs in the map. |
|---|
| 340 | mapWithKey :: (Key -> a -> b) -> Trie a -> Trie b |
|---|
| 341 | mapWithKey f m = map' [] m |
|---|
| 342 | where |
|---|
| 343 | map' ck (End k v t) = let nk = ck ++ k in End k (f nk v) (L.map (map' nk) t) |
|---|
| 344 | map' ck (Seq k t) = let nk = ck ++ k in Seq k (L.map (map' nk) t) |
|---|
| 345 | |
|---|
| 346 | -- | /O(n)/ Map over all values in the map. |
|---|
| 347 | map :: (a -> b) -> Trie a -> Trie b |
|---|
| 348 | map f = mapWithKey (\_ v -> f v) |
|---|
| 349 | |
|---|
| 350 | -- | /O(n)/ Convert into an ordinary map. |
|---|
| 351 | toMap :: Trie a -> M.Map Key a |
|---|
| 352 | toMap = foldWithKey M.insert M.empty |
|---|
| 353 | |
|---|
| 354 | -- | /O(n)/ Convert an ordinary map into a StrMap. |
|---|
| 355 | fromMap :: M.Map Key a -> Trie a |
|---|
| 356 | fromMap = M.foldWithKey insert empty |
|---|
| 357 | |
|---|
| 358 | -- | /O(n)/ Calculate some statistics about the Trie for debugging purposes. |
|---|