-- Compiler Toolkit: finite maps -- -- Author : Manuel M. T. Chakravarty -- Created: 23 March 95 -- -- Version $Revision: 1.12 $ from $Date: 2003/04/16 11:11:46 $ -- -- Copyright (c) [1995..2000] Manuel M. T. Chakravarty -- -- This file is free software; you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation; either version 2 of the License, or -- (at your option) any later version. -- -- This file is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- --- DESCRIPTION --------------------------------------------------------------- -- -- This module provides finite maps as an abstract data type. The idea is -- taken from the GHC module `FiniteMap' and the implementation follows -- closely the ideas found in ``Efficient sets---a balancing act'' from -- Stephan Adams in ``Journal of Functional Programming'', 3(4), 1993, -- drawing also from the longer exposition in ``Implementing Sets Efficiently -- in a Functional Language'' also from Stephan Adams, CSTR 92-10 in Technical -- Report Series, Unversity of Southampton, Department of Electronics and -- Computer Science, U.K. -- --- DOCU ---------------------------------------------------------------------- -- -- language: Haskell 98 -- -- * This implementation is based in bounded balance binary trees. They -- achieve good balancing while being simpler to maintain than AVL trees. -- -- * The implementation design is based on the idea of smart constructors, -- i.e., constructors that guarantee the compliance of the result with some -- constraints applied to the construction of the data type. -- --- TODO ---------------------------------------------------------------------- -- -- * `joinFM' would be a bit more efficient if the ``hedge union'' algorithm -- of the above mentioned technical report would be implemented. -- module Text.CTK.FiniteMaps (FiniteMap, zeroFM, unitFM, listToFM, listToCombFM, joinFM, joinCombFM, sizeFM, addToFM, addToCombFM, delFromFM, diffFM, intersectFM, intersectCombFM, mapFM, foldFM, filterFM, lookupFM, lookupDftFM, toListFM, domFM, imageFM) where -- finite maps are represented as ordered binary trees; each node represents -- a key-element pair in the map, its children contain pair with smaller and -- greater keys respectively (this requires an ordering relation on the keys); -- all keys in a tree are distinct -- data (Ord key) => FiniteMap key elem = Leaf | Node key -- this key elem -- assoc with key Int -- size >= 1 (FiniteMap key elem) -- smaller keys (FiniteMap key elem) -- greater keys -- we define two finite maps to be equal if they range over the same domain -- --instance Ord k => Eq (FiniteMap k e) where -- fm1 == fm2 = ((map fst . toListFM) $ fm1) == ((map fst . toListFM) $ fm2) instance (Ord k, Eq e) => Eq (FiniteMap k e) where fm1 == fm2 = (toListFM fm1) == (toListFM fm2) -- we define a total ordering on finite maps by lifting the lexicographical -- ordering over their domains (which we assume to be sorted) -- --instance Ord k => Ord (FiniteMap k e) where -- fm1 <= fm2 = ((map fst . toListFM) $ fm1) <= ((map fst . toListFM) $ fm2) instance (Ord k, Ord e) => Ord (FiniteMap k e) where fm1 <= fm2 = (toListFM fm1) <= (toListFM fm2) instance (Show k, Show e, Ord k) => Show (FiniteMap k e) where showsPrec = toShowS -- defined below -- weight ratio is respected by the balanced tree, i.e., no subtree will ever -- contain `ratio' times more elements than its sister -- ratio :: Int ratio = 5 -- this gives us an empty map -- zeroFM :: Ord k => FiniteMap k e zeroFM = Leaf -- a map with a single element -- unitFM :: Ord k => k -> e -> FiniteMap k e unitFM k e = Node k e 1 Leaf Leaf -- makes a list of key-element pairs into a finite map -- -- in case of duplicates, the last is taken -- listToFM :: Ord k => [(k, e)] -> FiniteMap k e listToFM = listToCombFM const -- makes a list of key-element pairs into a finite map where collisions are -- resolved by an explicit combiner fun -- -- the combiner expects the new element as its first argument -- listToCombFM :: Ord k => (e -> e -> e) -> [(k, e)] -> FiniteMap k e listToCombFM c = foldl addOnePair zeroFM where addOnePair m (k, e) = addToCombFM c k e m -- the number of elements in the map -- sizeFM :: Ord k => FiniteMap k e -> Int sizeFM Leaf = 0 sizeFM (Node _ _ s _ _) = s -- builds a node that automagically contains the right size -- smartNode :: Ord k => k -> e -> (FiniteMap k e) -> (FiniteMap k e) -> (FiniteMap k e) smartNode k e sm gr = Node k e (1 + sizeFM sm + sizeFM gr) sm gr -- builds a node that automagically balances the tree if necessary and inserts -- the right size; ONLY ONE of the subtrees is allowed to be off balance and -- only by ONE element -- smarterNode :: Ord k => k -> e -> (FiniteMap k e) -> (FiniteMap k e) -> (FiniteMap k e) smarterNode k e sm gr = let sm_n = sizeFM sm gr_n = sizeFM gr in if (sm_n + gr_n) < 2 -- very small tree (one part is a leaf) then smartNode k e sm gr -- => construct directly else if gr_n > (ratio * sm_n) -- child with greater keys is too big then -- => rotate left let Node _ _ _ gr_sm gr_gr = gr gr_sm_n = sizeFM gr_sm gr_gr_n = sizeFM gr_gr in if gr_sm_n < gr_gr_n then single_L k e sm gr else double_L k e sm gr else if sm_n > (ratio * gr_n) -- child with smaller keys is too big then -- => rotate right let Node _ _ _ sm_sm sm_gr = sm sm_sm_n = sizeFM sm_sm sm_gr_n = sizeFM sm_gr in if sm_gr_n < sm_sm_n then single_R k e sm gr else double_R k e sm gr else smartNode k e sm gr -- else nearly balanced => construct directly where single_L ka ea x (Node kb eb _ y z) = smartNode kb eb (smartNode ka ea x y) z double_L ka ea x (Node kc ec _ (Node kb eb _ y1 y2) z) = smartNode kb eb (smartNode ka ea x y1) (smartNode kc ec y2 z) single_R kb eb (Node ka ea _ x y) z = smartNode ka ea x (smartNode kb eb y z) double_R kc ec (Node ka ea _ x (Node kb eb _ y1 y2)) z = smartNode kb eb (smartNode ka ea x y1) (smartNode kc ec y2 z) -- add the given key-element pair to the map -- -- overrides previous entries -- addToFM :: Ord k => k -> e -> FiniteMap k e -> FiniteMap k e addToFM = addToCombFM const -- add the given key-element pair to the map where collisions are resolved by -- an explicit combiner fun -- -- the combiner expects the new element as its first argument -- addToCombFM :: Ord k => (e -> e -> e) -> k -> e -> FiniteMap k e -> FiniteMap k e addToCombFM c k e Leaf = unitFM k e addToCombFM c k e (Node k' e' n sm gr) | k < k' = smarterNode k' e' (addToCombFM c k e sm) gr | k > k' = smarterNode k' e' sm (addToCombFM c k e gr) | otherwise = Node k (c e e') n sm gr -- removes the key-element pair specified by the given key from a map -- -- does not complain if the key is not in the map -- delFromFM :: Ord k => k -> FiniteMap k e -> FiniteMap k e delFromFM k Leaf = Leaf delFromFM k (Node k' e' n sm gr) | k < k' = smarterNode k' e' (delFromFM k sm) gr | k > k' = smarterNode k' e' sm (delFromFM k gr) | otherwise = smartGlue sm gr -- given two maps where all keys in the left are smaller than those in the -- right and they are not too far out of balance (within ratio), glue them -- into one map -- smartGlue :: Ord k => FiniteMap k e -> FiniteMap k e -> FiniteMap k e smartGlue Leaf gr = gr smartGlue sm Leaf = sm smartGlue sm gr = let (k, e, gr') = extractMin gr in smarterNode k e sm gr' -- extract the association with the minimal key (i.e., leftmost in the tree) -- and simultaneously return the map without this association -- extractMin :: Ord k => FiniteMap k e -> (k, e, FiniteMap k e) extractMin (Node k e _ Leaf gr) = (k, e, gr) extractMin (Node k e _ sm gr) = let (minK, minE, sm') = extractMin sm in (minK, minE, smarterNode k e sm' gr) -- given two maps where all keys in the left are smaller than those in the -- right, glue them into one map -- glue :: Ord k => FiniteMap k e -> FiniteMap k e -> FiniteMap k e glue Leaf gr = gr glue sm Leaf = sm glue sm@(Node k_sm e_sm n_sm sm_sm gr_sm) gr@(Node k_gr e_gr n_gr sm_gr gr_gr) | (ratio * n_sm) < n_gr = smarterNode k_gr e_gr (glue sm sm_gr) gr_gr | (ratio * n_gr) < n_sm = smarterNode k_sm e_sm sm_sm (glue gr_sm gr) | otherwise = let (k, e, gr') = extractMin gr in smarterNode k e sm gr' -- builds a node that automagically balances the tree if necessary and inserts -- the right size (just as `smarterNode'), BUT which is only applicable if the -- two given maps do not overlap (in their key values) and the new, given key -- lies between the keys in the first and the second map -- -- its time complexity is proportional to the _difference_ in the height of -- the two trees representing the given maps -- smartestNode :: Ord k => k -> e -> (FiniteMap k e) -> (FiniteMap k e) -> (FiniteMap k e) -- -- if any of both trees is too big (with respect to the ratio), we insert -- into the other; otherwise, a simple creation of a new node is sufficient -- smartestNode k e Leaf gr = addToFM k e gr smartestNode k e sm Leaf = addToFM k e sm smartestNode k e sm@(Node k_sm e_sm n_sm sm_sm gr_sm) gr@(Node k_gr e_gr n_gr sm_gr gr_gr) | (ratio * n_sm) < n_gr = smarterNode k_gr e_gr (smartestNode k e sm sm_gr) gr_gr | (ratio * n_gr) < n_sm = smarterNode k_sm e_sm sm_sm (smartestNode k e gr_sm gr) | otherwise = smartNode k e sm gr -- joins two maps -- -- entries in the left map shadow those in the right -- joinFM :: Ord k => FiniteMap k e -> FiniteMap k e -> FiniteMap k e -- -- explicitly coded, instead of using `joinCombFM', to avoid the `lookupFM' -- for each element in the left map, which is unnecessary in this case -- joinFM m Leaf = m joinFM Leaf m = m joinFM (Node k e _ sm gr) m = smartestNode k e sm' gr' where sm' = joinFM sm (smaller k m) gr' = joinFM gr (greater k m) -- joins two maps where collisions are resolved by an explicit combiner fun -- joinCombFM :: Ord k => (e -> e -> e) -> FiniteMap k e -> FiniteMap k e -> FiniteMap k e joinCombFM c m Leaf = m joinCombFM c Leaf m = m joinCombFM c (Node k e _ sm gr) m = smartestNode k e' sm' gr' where sm' = joinCombFM c sm (smaller k m) gr' = joinCombFM c gr (greater k m) e' = case lookupFM m k of Just f -> c e f Nothing -> e -- cut the part of the tree that is smaller than the given key out of the -- map -- smaller :: Ord k => k -> FiniteMap k e -> FiniteMap k e smaller _ Leaf = Leaf smaller k (Node k' e _ sm gr) | k < k' = smaller k sm | k > k' = smartestNode k' e sm (smaller k gr) | otherwise = sm -- cut the part of the tree that is greater than the given key out of the -- map -- greater :: Ord k => k -> FiniteMap k e -> FiniteMap k e greater _ Leaf = Leaf greater k (Node k' e _ sm gr) | k > k' = greater k gr | k < k' = smartestNode k' e (greater k sm) gr | otherwise = gr -- given two finite maps, yields a finite map containg all elements of the -- first argument except those having a key that is contained in the second -- map -- diffFM :: Ord k => FiniteMap k e -> FiniteMap k e' -> FiniteMap k e diffFM Leaf _ = Leaf diffFM m Leaf = m diffFM m (Node k _ _ sm gr) = glue (diffFM sm' sm) (diffFM gr' gr) where sm' = smaller k m gr' = greater k m -- given two finite maps, yield the map containing only entries of which the -- keys are in both maps -- -- the elements are taken from the left map -- intersectFM :: Ord k => FiniteMap k e -> FiniteMap k e -> FiniteMap k e intersectFM = intersectCombFM const -- given two finite maps, yield the map containing only entries of which the -- keys are in both maps -- -- the corresponding elements of the two maps are combined using the given, -- function -- intersectCombFM :: Ord k => (e -> e -> e) -> FiniteMap k e -> FiniteMap k e -> FiniteMap k e intersectCombFM c _ Leaf = Leaf intersectCombFM c Leaf _ = Leaf intersectCombFM c (Node k e _ sm gr) m | contained = smartestNode k (c e e') sm' gr' | otherwise = glue sm' gr' where sm' = intersectCombFM c sm (smaller k m) gr' = intersectCombFM c gr (greater k m) (contained, e') = case lookupFM m k of Just f -> (True, f) Nothing -> (False, undefined) undefined = error "FiniteMaps: intersectCombFM: Undefined" -- given a function on a finite maps elements and a finite map, yield the -- finite map where every element is replaced as specified by the function -- mapFM :: Ord k => (k -> e -> e') -> FiniteMap k e -> FiniteMap k e' mapFM f Leaf = Leaf mapFM f (Node k e n sm gr) = Node k (f k e) n (mapFM f sm) (mapFM f gr) -- folds a finite map according to a given function and _neutral_ value (with -- respect to the function) that is used for an empty map -- foldFM :: Ord k => (k -> e -> a -> a) -> a -> FiniteMap k e -> a foldFM f z Leaf = z foldFM f z (Node k e _ sm gr) = foldFM f (f k e (foldFM f z gr)) sm -- given a predicate and a finite map, yields the finite map containing all -- key-element pairs satisfying the predicate -- filterFM :: Ord k => (k -> e -> Bool) -> FiniteMap k e -> FiniteMap k e filterFM p Leaf = Leaf filterFM p (Node k e _ sm gr) | p k e = smartestNode k e sm' gr' | otherwise = glue sm' gr' where sm' = filterFM p sm gr' = filterFM p gr -- given a map and a key, returns `Just e' iff the key associates to `e'; -- if the key is not in the map, `Nothing' is returned -- lookupFM :: Ord k => FiniteMap k e -> k -> Maybe e lookupFM Leaf _ = Nothing lookupFM (Node k e _ sm gr) k' | k' == k = Just e | k' < k = lookupFM sm k' | k' > k = lookupFM gr k' -- just as `lookupFM', but instead of returning a `Maybe' type, a default -- value to be returned in case that the key is not in the map has to be -- specified -- lookupDftFM :: Ord k => FiniteMap k e -> e -> k -> e lookupDftFM map e k = case lookupFM map k of Just e' -> e' Nothing -> e -- given a finite map, yields a list of the key-element pairs -- toListFM :: Ord k => FiniteMap k e -> [(k, e)] toListFM = foldFM (\k e kes -> (k, e):kes) [] -- |Yield the domain of a finite map as a list -- domFM :: Ord k => FiniteMap k e -> [k] domFM = map fst . toListFM -- |Yield the image of a finite map as a list -- imageFM :: Ord k => FiniteMap k e -> [e] imageFM = map snd . toListFM -- pretty print routine (used as a method in FiniteMap's instance of `Show') -- toShowS :: (Show a, Show b, Ord a) => Int -> FiniteMap a b -> ShowS toShowS _ fm = format fm 0 where format Leaf _ = id format (Node k e n sm gr) indent = let this = showString (take indent (repeat ' ')) . shows k . showString " --> " . shows e . showString " (size: " . shows n . showString ")\n" in this . format sm (indent + 2) . format gr (indent + 2)