%
% (c) The AQUA Project, Glasgow University, 1994-1996
%
\section[FiniteMap]{An implementation of finite maps}
``Finite maps'' are the heart of the compiler's
lookup-tables/environments and its implementation of sets. Important
stuff!
This code is derived from that in the paper:
\begin{display}
S Adams
"Efficient sets: a balancing act"
Journal of functional programming 3(4) Oct 1993, pp553-562
\end{display}
The code is SPECIALIZEd to various highly-desirable types (e.g., Id)
near the end (only \tr{#ifdef COMPILING_GHC}).
\begin{code}
module Data.FiniteMap (
FiniteMap,
emptyFM, unitFM, listToFM,
addToFM,
addToFM_C,
addListToFM,
addListToFM_C,
delFromFM ,
delListFromFM,
plusFM,
plusFM_C,
minusFM,
foldFM,
intersectFM ,
intersectFM_C ,
mapFM , mapMaybeFM , filterFM ,
sizeFM, isEmptyFM, elemFM, lookupFM, lookupWithDefaultFM,
fmToList, keysFM, eltsFM
) where
import Maybe ( isJust )
\end{code}
%************************************************************************
%* *
\subsection{The signature of the module}
%* *
%************************************************************************
\begin{code}
emptyFM :: FiniteMap key elt
unitFM :: key -> elt -> FiniteMap key elt
listToFM :: (Ord key ) => [(key,elt)] -> FiniteMap key elt
addToFM :: (Ord key ) => FiniteMap key elt -> key -> elt -> FiniteMap key elt
addListToFM :: (Ord key ) => FiniteMap key elt -> [(key,elt)] -> FiniteMap key elt
addToFM_C :: (Ord key ) => (elt -> elt -> elt)
-> FiniteMap key elt -> key -> elt
-> FiniteMap key elt
addListToFM_C :: (Ord key ) => (elt -> elt -> elt)
-> FiniteMap key elt -> [(key,elt)]
-> FiniteMap key elt
delFromFM :: (Ord key ) => FiniteMap key elt -> key -> FiniteMap key elt
delListFromFM :: (Ord key ) => FiniteMap key elt -> [key] -> FiniteMap key elt
plusFM :: (Ord key ) => FiniteMap key elt -> FiniteMap key elt
-> FiniteMap key elt
plusFM_C :: (Ord key ) => (elt -> elt -> elt)
-> FiniteMap key elt -> FiniteMap key elt -> FiniteMap key elt
minusFM :: (Ord key ) => FiniteMap key elt -> FiniteMap key elt -> FiniteMap key elt
intersectFM :: (Ord key ) => FiniteMap key elt -> FiniteMap key elt -> FiniteMap key elt
intersectFM_C :: (Ord key ) => (elt1 -> elt2 -> elt3)
-> FiniteMap key elt1 -> FiniteMap key elt2 -> FiniteMap key elt3
foldFM :: (key -> elt -> a -> a) -> a -> FiniteMap key elt -> a
mapFM :: (key -> elt1 -> elt2) -> FiniteMap key elt1 -> FiniteMap key elt2
filterFM :: (Ord key ) => (key -> elt -> Bool)
-> FiniteMap key elt -> FiniteMap key elt
mapMaybeFM :: (Ord key )
=> (key -> elt1 -> Maybe elt2)
-> FiniteMap key elt1
-> FiniteMap key elt2
sizeFM :: FiniteMap key elt -> Int
isEmptyFM :: FiniteMap key elt -> Bool
elemFM :: (Ord key ) => key -> FiniteMap key elt -> Bool
lookupFM :: (Ord key ) => FiniteMap key elt -> key -> Maybe elt
lookupWithDefaultFM
:: (Ord key ) => FiniteMap key elt -> elt -> key -> elt
fmToList :: FiniteMap key elt -> [(key,elt)]
keysFM :: FiniteMap key elt -> [key]
eltsFM :: FiniteMap key elt -> [elt]
\end{code}
%************************************************************************
%* *
\subsection{The @FiniteMap@ data type, and building of same}
%* *
%************************************************************************
Invariants about @FiniteMap@:
\begin{enumerate}
\item
all keys in a FiniteMap are distinct
\item
all keys in left subtree are $<$ key in Branch and
all keys in right subtree are $>$ key in Branch
\item
size field of a Branch gives number of Branch nodes in the tree
\item
size of left subtree is differs from size of right subtree by a
factor of at most \tr{sIZE_RATIO}
\end{enumerate}
\begin{code}
data FiniteMap key elt
= EmptyFM
| Branch key elt
Int
(FiniteMap key elt)
(FiniteMap key elt)
\end{code}
\begin{code}
emptyFM = EmptyFM
unitFM key elt = Branch key elt 1 emptyFM emptyFM
listToFM = addListToFM emptyFM
\end{code}
%************************************************************************
%* *
\subsection{Adding to and deleting from @FiniteMaps@}
%* *
%************************************************************************
\begin{code}
addToFM fm key elt = addToFM_C (\ old new -> new) fm key elt
addToFM_C combiner EmptyFM key elt = unitFM key elt
addToFM_C combiner (Branch key elt size fm_l fm_r) new_key new_elt
| new_key < key = mkBalBranch key elt (addToFM_C combiner fm_l new_key new_elt) fm_r
| new_key > key = mkBalBranch key elt fm_l (addToFM_C combiner fm_r new_key new_elt)
| otherwise = Branch new_key (combiner elt new_elt) size fm_l fm_r
addListToFM fm key_elt_pairs = addListToFM_C (\ old new -> new) fm key_elt_pairs
addListToFM_C combiner fm key_elt_pairs
= foldl add fm key_elt_pairs
where
add fmap (key,elt) = addToFM_C combiner fmap key elt
\end{code}
\begin{code}
delFromFM EmptyFM del_key = emptyFM
delFromFM (Branch key elt size fm_l fm_r) del_key
| del_key > key
= mkBalBranch key elt fm_l (delFromFM fm_r del_key)
| del_key < key
= mkBalBranch key elt (delFromFM fm_l del_key) fm_r
| key == del_key
= glueBal fm_l fm_r
delListFromFM fm keys = foldl delFromFM fm keys
\end{code}
%************************************************************************
%* *
\subsection{Combining @FiniteMaps@}
%* *
%************************************************************************
\begin{code}
plusFM_C combiner EmptyFM fm2 = fm2
plusFM_C combiner fm1 EmptyFM = fm1
plusFM_C combiner fm1 (Branch split_key elt2 _ left right)
= mkVBalBranch split_key new_elt
(plusFM_C combiner lts left)
(plusFM_C combiner gts right)
where
lts = splitLT fm1 split_key
gts = splitGT fm1 split_key
new_elt = case lookupFM fm1 split_key of
Nothing -> elt2
Just elt1 -> combiner elt1 elt2
plusFM EmptyFM fm2 = fm2
plusFM fm1 EmptyFM = fm1
plusFM fm1 (Branch split_key elt1 _ left right)
= mkVBalBranch split_key elt1 (plusFM lts left) (plusFM gts right)
where
lts = splitLT fm1 split_key
gts = splitGT fm1 split_key
minusFM EmptyFM fm2 = emptyFM
minusFM fm1 EmptyFM = fm1
minusFM fm1 (Branch split_key elt _ left right)
= glueVBal (minusFM lts left) (minusFM gts right)
where
lts = splitLT fm1 split_key
gts = splitGT fm1 split_key
intersectFM fm1 fm2 = intersectFM_C (\ left right -> right) fm1 fm2
intersectFM_C combiner fm1 EmptyFM = emptyFM
intersectFM_C combiner EmptyFM fm2 = emptyFM
intersectFM_C combiner fm1 (Branch split_key elt2 _ left right)
| isJust maybe_elt1
= mkVBalBranch split_key (combiner elt1 elt2) (intersectFM_C combiner lts left)
(intersectFM_C combiner gts right)
| otherwise
= glueVBal (intersectFM_C combiner lts left) (intersectFM_C combiner gts right)
where
lts = splitLT fm1 split_key
gts = splitGT fm1 split_key
maybe_elt1 = lookupFM fm1 split_key
Just elt1 = maybe_elt1
\end{code}
%************************************************************************
%* *
\subsection{Mapping, folding, and filtering with @FiniteMaps@}
%* *
%************************************************************************
\begin{code}
foldFM k z EmptyFM = z
foldFM k z (Branch key elt _ fm_l fm_r)
= foldFM k (k key elt (foldFM k z fm_r)) fm_l
mapFM f EmptyFM = emptyFM
mapFM f (Branch key elt size fm_l fm_r)
= Branch key (f key elt) size (mapFM f fm_l) (mapFM f fm_r)
mapMaybeFM f EmptyFM = emptyFM
mapMaybeFM f (Branch key elt _ fm_l fm_r) =
case f key elt of
Nothing -> glueVBal (mapMaybeFM f fm_l) (mapMaybeFM f fm_r)
Just elt' -> mkVBalBranch key elt' (mapMaybeFM f fm_l) (mapMaybeFM f fm_r)
filterFM p EmptyFM = emptyFM
filterFM p (Branch key elt _ fm_l fm_r)
| p key elt
= mkVBalBranch key elt (filterFM p fm_l) (filterFM p fm_r)
| otherwise
= glueVBal (filterFM p fm_l) (filterFM p fm_r)
\end{code}
%************************************************************************
%* *
\subsection{Interrogating @FiniteMaps@}
%* *
%************************************************************************
\begin{code}
sizeFM EmptyFM = 0
sizeFM (Branch _ _ size _ _) = size
isEmptyFM fm = sizeFM fm == 0
lookupFM EmptyFM key = Nothing
lookupFM (Branch key elt _ fm_l fm_r) key_to_find
| key_to_find < key = lookupFM fm_l key_to_find
| key_to_find > key = lookupFM fm_r key_to_find
| otherwise = Just elt
key `elemFM` fm
= case (lookupFM fm key) of { Nothing -> False; Just elt -> True }
lookupWithDefaultFM fm deflt key
= case (lookupFM fm key) of { Nothing -> deflt; Just elt -> elt }
\end{code}
%************************************************************************
%* *
\subsection{Listifying @FiniteMaps@}
%* *
%************************************************************************
\begin{code}
fmToList fm = foldFM (\ key elt rest -> (key,elt) : rest) [] fm
keysFM fm = foldFM (\ key elt rest -> key : rest) [] fm
eltsFM fm = foldFM (\ key elt rest -> elt : rest) [] fm
\end{code}
%************************************************************************
%* *
\subsection{The implementation of balancing}
%* *
%************************************************************************
%************************************************************************
%* *
\subsubsection{Basic construction of a @FiniteMap@}
%* *
%************************************************************************
@mkBranch@ simply gets the size component right. This is the ONLY
(non-trivial) place the Branch object is built, so the ASSERTion
recursively checks consistency. (The trivial use of Branch is in
@unitFM@.)
\begin{code}
sIZE_RATIO :: Int
sIZE_RATIO = 5
mkBranch :: (Ord key )
=> Int
-> key -> elt
-> FiniteMap key elt -> FiniteMap key elt
-> FiniteMap key elt
mkBranch which key elt fm_l fm_r
= --{--}
let
result = Branch key elt (unbox (1 + left_size + right_size)) fm_l fm_r
in
result
where
left_ok = case fm_l of
EmptyFM -> True
Branch left_key _ _ _ _ -> let
biggest_left_key = fst (findMax fm_l)
in
biggest_left_key < key
right_ok = case fm_r of
EmptyFM -> True
Branch right_key _ _ _ _ -> let
smallest_right_key = fst (findMin fm_r)
in
key < smallest_right_key
balance_ok = True
left_size = sizeFM fm_l
right_size = sizeFM fm_r
unbox :: Int -> Int
unbox x = x
\end{code}
%************************************************************************
%* *
\subsubsection{{\em Balanced} construction of a @FiniteMap@}
%* *
%************************************************************************
@mkBalBranch@ rebalances, assuming that the subtrees aren't too far
out of whack.
\begin{code}
mkBalBranch :: (Ord key )
=> key -> elt
-> FiniteMap key elt -> FiniteMap key elt
-> FiniteMap key elt
mkBalBranch key elt fm_L fm_R
| size_l + size_r < 2
= mkBranch 1 key elt fm_L fm_R
| size_r > sIZE_RATIO * size_l
= case fm_R of
Branch _ _ _ fm_rl fm_rr
| sizeFM fm_rl < 2 * sizeFM fm_rr -> single_L fm_L fm_R
| otherwise -> double_L fm_L fm_R
| size_l > sIZE_RATIO * size_r
= case fm_L of
Branch _ _ _ fm_ll fm_lr
| sizeFM fm_lr < 2 * sizeFM fm_ll -> single_R fm_L fm_R
| otherwise -> double_R fm_L fm_R
| otherwise
= mkBranch 2 key elt fm_L fm_R
where
size_l = sizeFM fm_L
size_r = sizeFM fm_R
single_L fm_l (Branch key_r elt_r _ fm_rl fm_rr)
= mkBranch 3 key_r elt_r (mkBranch 4 key elt fm_l fm_rl) fm_rr
double_L fm_l (Branch key_r elt_r _ (Branch key_rl elt_rl _ fm_rll fm_rlr) fm_rr)
= mkBranch 5 key_rl elt_rl (mkBranch 6 key elt fm_l fm_rll)
(mkBranch 7 key_r elt_r fm_rlr fm_rr)
single_R (Branch key_l elt_l _ fm_ll fm_lr) fm_r
= mkBranch 8 key_l elt_l fm_ll (mkBranch 9 key elt fm_lr fm_r)
double_R (Branch key_l elt_l _ fm_ll (Branch key_lr elt_lr _ fm_lrl fm_lrr)) fm_r
= mkBranch 10 key_lr elt_lr (mkBranch 11 key_l elt_l fm_ll fm_lrl)
(mkBranch 12 key elt fm_lrr fm_r)
\end{code}
\begin{code}
mkVBalBranch :: (Ord key )
=> key -> elt
-> FiniteMap key elt -> FiniteMap key elt
-> FiniteMap key elt
mkVBalBranch key elt EmptyFM fm_r = addToFM fm_r key elt
mkVBalBranch key elt fm_l EmptyFM = addToFM fm_l key elt
mkVBalBranch key elt fm_l@(Branch key_l elt_l _ fm_ll fm_lr)
fm_r@(Branch key_r elt_r _ fm_rl fm_rr)
| sIZE_RATIO * size_l < size_r
= mkBalBranch key_r elt_r (mkVBalBranch key elt fm_l fm_rl) fm_rr
| sIZE_RATIO * size_r < size_l
= mkBalBranch key_l elt_l fm_ll (mkVBalBranch key elt fm_lr fm_r)
| otherwise
= mkBranch 13 key elt fm_l fm_r
where
size_l = sizeFM fm_l
size_r = sizeFM fm_r
\end{code}
%************************************************************************
%* *
\subsubsection{Gluing two trees together}
%* *
%************************************************************************
@glueBal@ assumes its two arguments aren't too far out of whack, just
like @mkBalBranch@. But: all keys in first arg are $<$ all keys in
second.
\begin{code}
glueBal :: (Ord key )
=> FiniteMap key elt -> FiniteMap key elt
-> FiniteMap key elt
glueBal EmptyFM fm2 = fm2
glueBal fm1 EmptyFM = fm1
glueBal fm1 fm2
| sizeFM fm2 > sizeFM fm1
= mkBalBranch mid_key2 mid_elt2 fm1 (deleteMin fm2)
| otherwise
= mkBalBranch mid_key1 mid_elt1 (deleteMax fm1) fm2
where
(mid_key1, mid_elt1) = findMax fm1
(mid_key2, mid_elt2) = findMin fm2
\end{code}
@glueVBal@ copes with arguments which can be of any size.
But: all keys in first arg are $<$ all keys in second.
\begin{code}
glueVBal :: (Ord key )
=> FiniteMap key elt -> FiniteMap key elt
-> FiniteMap key elt
glueVBal EmptyFM fm2 = fm2
glueVBal fm1 EmptyFM = fm1
glueVBal fm_l@(Branch key_l elt_l _ fm_ll fm_lr)
fm_r@(Branch key_r elt_r _ fm_rl fm_rr)
| sIZE_RATIO * size_l < size_r
= mkBalBranch key_r elt_r (glueVBal fm_l fm_rl) fm_rr
| sIZE_RATIO * size_r < size_l
= mkBalBranch key_l elt_l fm_ll (glueVBal fm_lr fm_r)
| otherwise
= glueBal fm_l fm_r
where
size_l = sizeFM fm_l
size_r = sizeFM fm_r
\end{code}
%************************************************************************
%* *
\subsection{Local utilities}
%* *
%************************************************************************
\begin{code}
splitLT, splitGT :: (Ord key ) => FiniteMap key elt -> key -> FiniteMap key elt
splitLT EmptyFM split_key = emptyFM
splitLT (Branch key elt _ fm_l fm_r) split_key
| split_key < key = splitLT fm_l split_key
| split_key > key = mkVBalBranch key elt fm_l (splitLT fm_r split_key)
| otherwise = fm_l
splitGT EmptyFM split_key = emptyFM
splitGT (Branch key elt _ fm_l fm_r) split_key
| split_key > key = splitGT fm_r split_key
| split_key < key = mkVBalBranch key elt (splitGT fm_l split_key) fm_r
| otherwise = fm_r
findMin :: FiniteMap key elt -> (key,elt)
findMin (Branch key elt _ EmptyFM _) = (key,elt)
findMin (Branch key elt _ fm_l _) = findMin fm_l
deleteMin :: (Ord key ) => FiniteMap key elt -> FiniteMap key elt
deleteMin (Branch key elt _ EmptyFM fm_r) = fm_r
deleteMin (Branch key elt _ fm_l fm_r) = mkBalBranch key elt (deleteMin fm_l) fm_r
findMax :: FiniteMap key elt -> (key,elt)
findMax (Branch key elt _ _ EmptyFM) = (key,elt)
findMax (Branch key elt _ _ fm_r) = findMax fm_r
deleteMax :: (Ord key ) => FiniteMap key elt -> FiniteMap key elt
deleteMax (Branch key elt _ fm_l EmptyFM) = fm_l
deleteMax (Branch key elt _ fm_l fm_r) = mkBalBranch key elt fm_l (deleteMax fm_r)
\end{code}
%************************************************************************
%* *
\subsection{Output-ery}
%* *
%************************************************************************
\begin{code}
instance (Eq key, Eq elt) => Eq (FiniteMap key elt) where
fm_1 == fm_2 = (sizeFM fm_1 == sizeFM fm_2) &&
(fmToList fm_1 == fmToList fm_2)
\end{code}
%************************************************************************
%* *
\subsection{Efficiency pragmas for GHC}
%* *
%************************************************************************
When the FiniteMap module is used in GHC, we specialise it for
\tr{Uniques}, for dastardly efficiency reasons.
\begin{code}
\end{code}