Holumbus-Searchengine-1.2.3: A search and indexing engine.

Portabilitynot portable
Stabilityexperimental
MaintainerUwe Schmidt (uwe@fh-wedel.de)
Safe HaskellNone

Holumbus.Data.PrefixTree.Core

Description

An efficient implementation of maps from strings to arbitrary values.

Values can associated with an arbitrary byte key. Searching for keys is very fast, but the prefix tree probably consumes more memory than Data.Map. The main differences are the special prefixFind functions, which can be used to perform prefix queries. The interface is heavily borrowed from Data.Map and Data.IntMap.

Most other function names clash with Prelude names, therefore this module is usually imported qualified, e.g.

 import Holumbus.Data.PrefixTree (PrefixTree)
 import qualified Holumbus.Data.PrefixTree as T

Many functions have a worst-case complexity of O(min(n,L)). This means that the operation can become linear with the number of elements with a maximum of L, the length of the key (the number of bytes in the list). The functions for searching a prefix have a worst-case complexity of O(max(L,R)). This means that the operation can become linear with R, the number of elements found for the prefix, with a minimum of L.

The module exports include the internal data types, their constructors and access functions for ultimate flexibility. Derived modules should not export these (as shown in Holumbus.Data.StrMap) to provide only a restricted interface.

Synopsis

Documentation

data PrefixTree v Source

Constructors

Empty 
Val 

Fields

value' :: v
 
tree :: !(PrefixTree v)
 
Branch 

Fields

sym :: !Sym
 
child :: !(PrefixTree v)
 
next :: !(PrefixTree v)
 
Leaf 

Fields

value' :: v
 
Last 

Fields

sym :: !Sym
 
child :: !(PrefixTree v)
 
LsSeq 

Fields

syms :: !Key1
 
child :: !(PrefixTree v)
 
BrSeq 

Fields

syms :: !Key1
 
child :: !(PrefixTree v)
 
next :: !(PrefixTree v)
 
LsSeL 

Fields

syms :: !Key1
 
value' :: v
 
BrSeL 

Fields

syms :: !Key1
 
value' :: v
 
next :: !(PrefixTree v)
 
BrVal 

Fields

sym :: !Sym
 
value' :: v
 
next :: !(PrefixTree v)
 
LsVal 

Fields

sym :: !Sym
 
value' :: v
 

data Key1 Source

strict list of chars with unpacked fields

for internal use in prefix tree to optimize space efficiency

Constructors

Nil 
Cons !Sym !Key1 

Instances

null :: PrefixTree a -> BoolSource

O(1) Is the map empty?

singleton :: Key -> a -> PrefixTree aSource

O(1) Create a map with a single element.

value :: Monad m => PrefixTree a -> m aSource

O(1) Extract the value of a node (if there is one)

valueWithDefault :: a -> PrefixTree a -> aSource

O(1) Extract the value of a node or return a default value if no value exists.

succ :: PrefixTree a -> PrefixTree aSource

O(1) Extract the successors of a node

lookup :: Monad m => Key -> PrefixTree a -> m aSource

O(min(n,L)) Find the value associated with a key. The function will return the result in the monad or fail in it if the key isn't in the map.

findWithDefault :: a -> Key -> PrefixTree a -> aSource

O(min(n,L)) Find the value associated with a key. The function will return the result in the monad or fail in it if the key isn't in the map.

member :: Key -> PrefixTree a -> BoolSource

O(min(n,L)) Is the key a member of the map?

(!) :: PrefixTree a -> Key -> aSource

O(min(n,L)) Find the value at a key. Calls error when the element can not be found.

insert :: Key -> a -> PrefixTree a -> PrefixTree aSource

O(min(n,L)) Insert a new key and value into the map. If the key is already present in the map, the associated value will be replaced with the new value.

insertWith :: (a -> a -> a) -> Key -> a -> PrefixTree a -> PrefixTree aSource

O(min(n,L)) Insert with a combining function. If the key is already present in the map, the value of f new_value old_value will be inserted.

insertWithKey :: (Key -> a -> a -> a) -> Key -> a -> PrefixTree a -> PrefixTree aSource

O(min(n,L)) Insert with a combining function. If the key is already present in the map, the value of f key new_value old_value will be inserted.

update :: (a -> Maybe a) -> Key -> PrefixTree a -> PrefixTree aSource

O(min(n,L)) Updates a value at a given key (if that key is in the trie) or deletes the element if the result of the updating function is Nothing. If the key is not found, the trie is returned unchanged.

updateWithKey :: (Key -> a -> Maybe a) -> Key -> PrefixTree a -> PrefixTree aSource

O(min(n,L)) Updates a value at a given key (if that key is in the trie) or deletes the element if the result of the updating function is Nothing. If the key is not found, the trie is returned unchanged.

delete :: Key -> PrefixTree a -> PrefixTree aSource

O(min(n,L)) Delete an element from the map. If no element exists for the key, the map remains unchanged.

prefixFind :: Key -> PrefixTree a -> [a]Source

O(max(L,R)) Find all values where the string is a prefix of the key.

prefixFindWithKey :: Key -> PrefixTree a -> [(Key, a)]Source

O(max(L,R)) Find all values where the string is a prefix of the key and include the keys in the result.

insert' :: (a -> a -> a) -> a -> Key -> PrefixTree a -> PrefixTree aSource

update' :: (a -> Maybe a) -> Key -> PrefixTree a -> PrefixTree aSource

union :: PrefixTree a -> PrefixTree a -> PrefixTree aSource

O(n+m) Left-biased union of two maps. It prefers the first map when duplicate keys are encountered, i.e. (union == unionWith const).

unionWith :: (a -> a -> a) -> PrefixTree a -> PrefixTree a -> PrefixTree aSource

O(n+m) Union with a combining function.

union' :: (a -> a -> a) -> PrefixTree a -> PrefixTree a -> PrefixTree aSource

unionWithKey :: (Key -> a -> a -> a) -> PrefixTree a -> PrefixTree a -> PrefixTree aSource

O(n+m) Union with a combining function, including the key.

union'' :: (Key -> a -> a -> a) -> (Key -> Key) -> PrefixTree a -> PrefixTree a -> PrefixTree aSource

difference :: PrefixTree a -> PrefixTree b -> PrefixTree aSource

(O(min(n,m)) Difference between two tries (based on keys).

differenceWith :: (a -> b -> Maybe a) -> PrefixTree a -> PrefixTree b -> PrefixTree aSource

(O(min(n,m)) Difference with a combining function. If the combining function always returns Nothing, this is equal to proper set difference.

differenceWithKey :: (Key -> a -> b -> Maybe a) -> PrefixTree a -> PrefixTree b -> PrefixTree aSource

O(min(n,m)) Difference with a combining function, including the key. If two equal keys are encountered, the combining function is applied to the key and both values. If it returns Nothing, the element is discarded, if it returns Just a value, the element is updated with the new value.

diff'' :: (Key -> a -> b -> Maybe a) -> (Key -> Key) -> PrefixTree a -> PrefixTree b -> PrefixTree aSource

cutPx'' :: (PrefixTree a -> PrefixTree a) -> PrefixSet -> PrefixTree a -> PrefixTree aSource

cut off all branches from a tree t2 that are not part of set t1

the following laws must holds

lookup' k' . cutPx' (singlePS k) $ t == lookup' k t for every k' with k prefix of k'

lookup' k' . cutPx' (singlePS k) $ t == Nothing for every k' with k not being a prefix of k'

map :: (a -> b) -> PrefixTree a -> PrefixTree bSource

O(n) Map a function over all values in the prefix tree.

mapWithKey :: (Key -> a -> b) -> PrefixTree a -> PrefixTree bSource

map' :: (Key -> a -> b) -> (Key -> Key) -> PrefixTree a -> PrefixTree bSource

mapN :: (a -> b) -> PrefixTree a -> PrefixTree bSource

Variant of map that works on normalized trees

mapWithKeyN :: (Key -> a -> b) -> PrefixTree a -> PrefixTree bSource

map'' :: (Key -> a -> b) -> (Key -> Key) -> PrefixTree a -> PrefixTree bSource

mapM :: Monad m => (a -> m b) -> PrefixTree a -> m (PrefixTree b)Source

Monadic map

mapWithKeyM :: Monad m => (Key -> a -> m b) -> PrefixTree a -> m (PrefixTree b)Source

Monadic mapWithKey

mapM'' :: Monad m => (Key -> a -> m b) -> (Key -> Key) -> PrefixTree a -> m (PrefixTree b)Source

data PrefixTreeVisitor a b Source

Constructors

PTV 

Fields

v_empty :: b
 
v_val :: a -> b -> b
 
v_branch :: Sym -> b -> b -> b
 
v_leaf :: a -> b
 
v_last :: Sym -> b -> b
 
v_lsseq :: Key1 -> b -> b
 
v_brseq :: Key1 -> b -> b -> b
 
v_lssel :: Key1 -> a -> b
 
v_brsel :: Key1 -> a -> b -> b
 
v_lsval :: Sym -> a -> b
 
v_brval :: Sym -> a -> b -> b
 

space :: PrefixTree a -> IntSource

space required by a prefix tree (logically)

Singletons are counted as 0, all other n-ary constructors are counted as n+1 (1 for the constructor and 1 for every field) cons nodes of char lists are counted 2, 1 for the cons, 1 for the char for values only the ref to the value is counted, not the space for the value itself key chars are assumed to be unboxed

stat :: PrefixTree a -> PrefixTree IntSource

statistics about the # of different nodes in an optimized prefix tree

foldWithKey :: (Key -> a -> b -> b) -> b -> PrefixTree a -> bSource

O(n) Fold over all key/value pairs in the map.

fold :: (a -> b -> b) -> b -> PrefixTree a -> bSource

O(n) Fold over all values in the map.

fold' :: (Key -> a -> b -> b) -> b -> (Key -> Key) -> PrefixTree a -> bSource

toMap :: PrefixTree a -> Map Key aSource

O(n) Convert into an ordinary map.

fromMap :: Map Key a -> PrefixTree aSource

O(n) Convert an ordinary map into a Prefix tree

toList :: PrefixTree a -> [(Key, a)]Source

O(n) Returns all elements as list of key value pairs,

fromList :: [(Key, a)] -> PrefixTree aSource

O(n) Creates a trie from a list of key/value pairs.

size :: PrefixTree a -> IntSource

O(n) The number of elements.

elems :: PrefixTree a -> [a]Source

O(n) Returns all values.

keys :: PrefixTree a -> [Key]Source

O(n) Returns all values.

toListBF :: PrefixTree v -> [(Key, v)]Source

returns all key-value pairs in breadth first order (short words first) this enables prefix search with upper bounds on the size of the result set e.g. search ... >>> toListBF >>> take 1000 will give the 1000 shortest words found in the result set and will ignore all long words

toList is derived from the following code found in the net when searching haskell breadth first search

Haskell Standard Libraray Implementation

 br :: Tree a -> [a]
 br t = map rootLabel $
        concat $
        takeWhile (not . null) $                
        iterate (concatMap subForest) [t]

rootLabel :: (Key -> Key) -> PrefixTree v -> [(Key, v)]Source

subForest :: (Key -> Key) -> PrefixTree v -> [(Key -> Key, PrefixTree v)]Source

prefixFindWithKeyBF :: Key -> PrefixTree a -> [(Key, a)]Source

O(max(L,R)) Find all values where the string is a prefix of the key and include the keys in the result. The result list contains short words first