% % Halipeto -- Haskell static web page generator % Copyright 2004 Andrew Cooke (andrew@acooke.org) % Copyright 2007-2010 Peter Simons (simons@cryp.to) % % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA % \section{Dictionary} This section provides a data structure to associate strings with values (much like a hash table). Key value pairs are added by generating a new instance, reusing the old structure where appropriate. Since Dictionaries are purely functional data structures they provide stack--like semantics on return (ie. if modified dictionaries are only passed downwards then returning from a function ``pops'' the data that was added within that function's scope). The dictionary may be sensitive to the case of the keys or not (two different implementations of a single class that share much underlying code). Since the class interfaces is general the same functions manipulate either type of dictionary. Functions oustide the class interface are identified by appending ``NC'' to the case insensitive version. \begin{code} module Halipeto.Dictionary ( Dictionary, null, empty, emptyNC, toDot, fromDot, SubDictionary, OrdDictionary, add, add', addAll, addAll', search, search', keys, keys', contents, contents', values, children, children', adopt, adopt', merge, substitute, subAll, search'', diff, diff' ) where import Prelude hiding (null) import Data.Char import Data.List hiding (find, null, partition, insert) import Halipeto.Utilities \end{code} \subsection{Namespaces and Subsets} Keys in the dictionary can be used to construct a hierarchical namespace by following the convention that the null character separates ``words''. %%Haddock: The namespace separator \begin{code} null :: Char null = chr 0 \end{code} There's a slight ugliness in the code here, because the field separators are stored in the same way as the fields themselves (as text). This could lead to confusing results if searches are made that include null. Since null is messy for the end user to manipulate (which is why the problem above is not so serious in practice), two alternative interfaces are provided to this hierarchical namespace. One uses ``.'' to represent the separator in keys, the other represents keys as a list of words. %%Haddock: Convert a list of strings to a ``dot'' separated string \begin{code} toDot :: [String] -> String toDot = toSep '.' \end{code} %%Haddock: Convert a ``dot'' separated string to a list of strings \begin{code} fromDot :: String -> [String] fromDot = fromSep '.' \end{code} %%Haddock: Convert an array of strings to a null-separated string \begin{code} toNull :: [String] -> String toNull = toSep null \end{code} %%Haddock: Convert a null separated string to a list of strings \begin{code} fromNull :: String -> [String] fromNull = fromSep null \end{code} Functions in the class interface that use simple strings have a tick (single quote) suffix (in general, the list based interfaces should be used in Haskell; the string based interfaces give a simpler interface to paths embedded in HTML templates). \subsection{Class} The general dictionary class. This requires -fglasgow-exts for compilation with ghc because more than one type parameter is present (a is needed to make the SubDictionary class, as far as I can see). %%Haddock: Associate string keys with values \begin{code} class Dictionary d a where add :: d a -> ([String], a) -> d a -- ^ Add a key-value pair add' :: d a -> (String, a) -> d a -- ^ Add .-format addAll :: d a -> [([String], a)] -> d a -- ^ Add a key-value list addAll' :: d a -> [(String, a)] -> d a -- ^ Add list .-format search :: d a -> [String] -> Maybe a -- ^ Lookup a key search' :: d a -> String -> Maybe a -- ^ Lookup .-format keys :: d a -> [[String]] -- ^ All keys keys' :: d a -> [String] -- ^ All keys .-format contents :: d a -> [([String], a)] -- ^ All key-value pairs contents' :: d a -> [(String, a)] -- ^ All pairs .-format values :: d a -> [a] -- ^ All values children :: d a -> [String] -> [d a] -- ^ Sub-dictionaries children' :: d a -> String -> [d a] -- ^ Children .-format adopt :: d a -> ([String], d a) -> d a -- ^ Append sub-dictionary adopt' :: d a -> (String, d a) -> d a -- ^ Adopt .-format merge :: d a -> d a -> d a -- ^ Combine two dictionaries \end{code} \subsection{Case Sensitive} This builds directly on the tree--based implementation described below. \begin{code} data DictCase a = DictCase (Dict a) (Maybe a) instance (Show a) => Show (DictCase a) where show (DictCase d v) = "{" ++ show d ++ "," ++ show v ++ "}" \end{code} %%Haddock: An empty dictionary \begin{code} empty :: Dictionary DictCase a => DictCase a empty = DictCase Empty Nothing pack :: Unpacked a -> DictCase a pack (d, v) = DictCase d v unpack :: DictCase a -> Unpacked a unpack (DictCase d v) = (d, v) instance Dictionary DictCase a where add d (k, v) = pack $ insert (unpack d) (toNull k, v) add' d (k, v) = add d (fromDot k, v) addAll = foldl add addAll' = foldl add' search d k = find (unpack d) (toNull k) search' d k = search d (fromDot k) keys = map fst . contents keys' = map toDot . keys contents = map (\(k, v) -> (fromNull k, v)) . contents'' . unpack contents' = map (\(k, v) -> (toDot k, v)) . contents values = map snd . contents children d k = map pack $ children'' (unpack d) (toNull k) children' d k = children d (fromDot k) adopt d1 (k, d2) = pack $ adopt'' (unpack d1) (toNull k) (unpack d2) adopt' d1 (k, d2) = adopt d1 (fromDot k, d2) merge d1 d2 = pack $ merge'' (unpack d1) (unpack d2) \end{code} \subsection{Case Insensitive} Again, this builds on the tree--based implementation described below. Is there a better way to generalise this packing/unpacking to a common form that occurs in the code above and below? \begin{code} data DictNoCase a = DictNoCase (Dict a) (Maybe a) instance (Show a) => Show (DictNoCase a) where show (DictNoCase d v) = "{" ++ show d ++ "," ++ show v ++ "}" \end{code} %%Haddock: An empty case-insensitive dictionary \begin{code} emptyNC :: Dictionary DictNoCase a => DictNoCase a emptyNC = DictNoCase Empty Nothing packNC :: Unpacked a -> DictNoCase a packNC (d, v) = DictNoCase d v unpackNC :: DictNoCase a -> Unpacked a unpackNC (DictNoCase d v) = (d, v) uncase :: String -> String uncase = map toLower instance Dictionary DictNoCase a where add d (k, v) = packNC $ insert (unpackNC d) (uncase $ toNull k, v) add' d (k, v) = add d (fromDot k, v) addAll = foldl add addAll' = foldl add' search d k = find (unpackNC d) (uncase $ toNull k) search' d k = search d (fromDot k) keys = map fst . contents keys' = map toDot . keys contents = map (\(k, v) -> (fromNull k, v)) . contents'' . unpackNC contents' = map (\(k, v) -> (toDot k, v)) . contents values = map snd . contents children d k = map packNC $ children'' (unpackNC d) (uncase $ toNull k) children' d k = children d (fromDot k) adopt d1 (k, d2) = packNC $ adopt'' (unpackNC d1) (uncase $ toNull k) (unpackNC d2) adopt' d1 (k, d2) = adopt d1 (fromDot k, d2) merge d1 d2 = packNC $ merge'' (unpackNC d1) (unpackNC d2) \end{code} \subsection{Partition} Diff generates the differences between two dictionaries. The first list in the result contains entries in d1 that are not present in d2; the second contains entries in d2 that are not present in d1; the third entries comomn to both. %%Haddock: A dictionary whose values can be ordered (and so sorted) \begin{code} class (Ord a, Dictionary d a) => OrdDictionary d a where diff :: d a -> d a -> ([([String], a)], [([String], a)], [([String], a)]) -- ^ Partition into common and distinct values diff' :: d a -> d a -> ([(String, a)], [(String, a)], [(String, a)]) -- ^ Partition into common and distinct values .-format instance Ord a => OrdDictionary DictCase a where diff d1 d2 = mapT3 (map (\(k, v) -> (fromNull k, v))) $ diff'' (unpack d1) (unpack d2) diff' d1 d2 = mapT3 (map (\(k, v) -> (toDot k, v))) $ diff d1 d2 instance Ord a => OrdDictionary DictNoCase a where diff d1 d2 = mapT3 (map (\(k, v) -> (fromNull k, v))) $ diff'' (unpackNC d1) (unpackNC d2) diff' d1 d2 = mapT3 (map (\(k, v) -> (toDot k, v))) $ diff d1 d2 diff'' :: Ord a => Unpacked a -> Unpacked a -> ([(String, a)], [(String, a)], [(String, a)]) diff'' d1 d2 = partition [] [] [] (contents'' d1) (contents'' d2) partition :: Ord a => [(String, a)] -> [(String, a)] -> [(String, a)] -> [(String, a)] -> [(String, a)] -> ([(String, a)], [(String, a)], [(String, a)]) partition o1 o2 b [] [] = (o1, o2, b) partition o1 o2 b d1 [] = (o1++d1, o2, b) partition o1 o2 b [] d2 = (o1, o2++d2, b) partition o1 o2 b d1'@((k1, v1):d1) d2'@((k2, v2):d2) = case compare k1 k2 of LT -> partition (o1++[(k1, v1)]) o2 b d1 d2' GT -> partition o1 (o2++[(k2, v2)]) b d1' d2 EQ -> case compare v1 v2 of LT -> partition (o1++[(k1, v1)]) o2 b d1 d2' GT -> partition o1 (o2++[(k2, v2)]) b d1' d2 EQ -> partition o1 o2 (b++[(k1, v1)]) d1 d2 \end{code} \subsection{Substitution} A very simple ``language'' for substituting values from dictionaries into strings simplifies several parts of Halipeto. Clearly the dictionary must return string values for this to work. The syntax is simple: text within curly braces is taken as a path name and substituted for the corresponding text. If the key does not correspond to any value it is left as literal text. Braces can be nested (inner braces are necesarily evaluated first) and can be escaped using the ``$\backslash$'' character (which itself must be escaped if required as a literal). For example, given the dictionary: \begin{verbatim} foo.bar = baz foo.baz = hello \end{verbatim} the string ``\{foo.\{foo.bar\}\} $\backslash$$\backslash$ world'' will evaluate to ``hello $\backslash$ world''. %%Haddock: A dictionary that supports recursive substitution \begin{code} class (OrdDictionary d String) => SubDictionary d where substitute :: d String -> String -> String -- ^ Replace keys substitute d s = unescape $ txt d s subAll :: d String -> [String] -> [String] -- ^ Replace on all subAll d = map (substitute d) search'' :: d String -> [String] -> Maybe String -- ^ Replace & search search'' d s = search d $ subAll d s instance SubDictionary DictCase instance SubDictionary DictNoCase \end{code} The following code processes the text from left to right (the mutually recursive structure of the code, with no returns and multiple passes along the string was a surprise --- I was intending to write a traditional recursive descent parser, but I think this is different --- and any comments would be welcome). \begin{code} txt, txt' :: (Dictionary d String) => d String -> String -> String txt _ "" = "" txt d (c:s) | c == '\\' = c : (txt' d s) | c == '{' = pth d [] "" s | otherwise = c : (txt d s) txt' _ "" = error "end of string during character escape" txt' d (c:s) = c : (txt d s) pth, pth' :: (Dictionary d String) => d String -> [String] -> String -> String -> String pth _ _ _ "" = error $ "end of string during substitution\n" ++ " (probably missing '}')" pth d l p (c:s) | c == '\\' = pth' d l (p++[c]) s | c == '.' = pth d (l++[p]) "" s | c == '{' = pth d l p (pth d [] "" s) | c == '}' = case search d (l++[p]) of -- Nothing -> error $ "cannot find translation for " ++ (toDot (l++[p])) Nothing -> (toDot $ l++[p]) ++ (txt d s) Just x -> (txt d x) ++ (txt d s) | otherwise = pth d l (p++[c]) s pth' _ _ _ "" = error "end of string during character escape" pth' d l p (c:s) = pth d l (p++[c]) s unescape, unescape' :: String -> String unescape "" = "" unescape (c:s) | c == '\\' = unescape' s | otherwise = c : (unescape s) unescape' "" = error "end of string during character escape" unescape' (c:s) = c : (unescape s) \end{code} \subsection{Unpacked} The Unpacked type adds support to Dict for values associated with the empty string. \begin{code} type Unpacked a = (Dict a, Maybe a) \end{code} \subsection{Access} Insert and find are the two basic dictionary operations. \begin{code} insert :: Unpacked a -> (String, a) -> Unpacked a insert (d, _) ("", x) = (d, Just x) insert (d, v) sx = (copy f d sx, v) where f n v' = n {value = Just v'} find :: Unpacked a -> String -> Maybe a find (_, v) "" = v find (d, _) s = apply f d s where f Empty = Nothing f n = value n \end{code} The keys available in a dictionary can be listed with keys''. \begin{code} contents'' :: Unpacked a -> [(String, a)] contents'' (d, Just x) = ("", x) : (contents'' (d, Nothing)) contents'' (d, Nothing) = map rev $ foldD f (\_ -> []) d "" where f c (Just v) l m r s = [(c:s, v)] ++ (l s) ++ (m $ c:s) ++ (r s) f c Nothing l m r s = (l s) ++ (m $ c:s) ++ (r s) rev (s, x) = (reverse s, x) \end{code} Merge adds values from the second dictionary into the first (I suspect there's a more efficient version of this that works on nodes directly). \begin{code} merge'' :: Unpacked a -> Unpacked a -> Unpacked a merge'' d1 d2 = foldl insert d1 (contents'' d2) \end{code} \subsection{Subtrees} The children and adopt functions make explicit use of this namespace. Children returns a labelled forest under a given key. For example, if a dictionary included \begin{verbatim} mytext.1.a = "A1" mytext.2.a = "A2" mytext.2.b = "B2" \end{verbatim} then the children of mytext would be two dictionaries, associated with the lablels ``1'' and ``2'': \begin{verbatim} 1: a = "A1" 2: a = "A2"; b = "B2" \end{verbatim} The labels are sorted so that they will be correctly ordered when they are integer values, as above. In addition, empty keys are handled correctly (not consistently, necesarily, but in a way that is intuitively correct when iterating over values in a template). So, in the example above, if \begin{verbatim} mytext.3 = "3 with no key" \end{verbatim} then the forest would also contain a dictionary with label ``3'' that associates the empty string with the value ``3 with no key''. A sub--tree can be re--inserted into the dictionary at a different path using adopt. This provides a natural way of iterating over data in the dictionary (note that a level of hierarchy --- the values that are returned as lables by children --- is removed by adopting a child). The SU type is a wrapper for (String, Unpacked a) that allows sorting via the Ord class. \begin{code} children'' :: Unpacked a -> String -> [Unpacked a] children'' (d, Just x) "" = combine $ (SU "" (Empty, Just x)):(sort $ subTree d [null]) children'' (d, _) s = combine . sort $ subTree d (s ++ [null]) subTree :: Dict a -> String -> [SU a] subTree = apply f where f Empty = [] f n = collect (match n) \end{code} Collect should gather each key and the associated subtree. So it should collect each node below a null. \begin{code} collect :: Dict a -> [(SU a)] collect d = foldD' f g d ("", Nothing) where f n c v l m r pre'@(pre, vp) | c == null = (l pre') ++ [SU (reverse pre) (match n, vp)] ++ (r pre') | otherwise = (l pre') ++ (m $ (c:pre, v)) ++ (r pre') g (pre, Just vp) = [SU (reverse pre) (Empty, Just vp)] g (_, Nothing) = [] data SU a = SU String (Unpacked a) instance Ord (SU a) where compare (SU s1 _) (SU s2 _) = compare' s1 s2 EQ instance Eq (SU a) where (==) a b = EQ == compare a b compare' :: String -> String -> Ordering -> Ordering compare' [] [] def = def compare' _ [] _ = GT compare' [] _ _ = LT compare' (a:as) (b:bs) EQ = compare' as bs (compare a b) compare' (_:as) (_:bs) def = compare' as bs def combine :: [SU a] -> [Unpacked a] combine l = map (\(SU _ x) -> x) $ foldr f [] l where f x [] = [x] f a@(SU _ (_, Just _)) (b@(SU _ (_, Just _)):bs) = a:b:bs f (SU sa (_, Just xa)) ( (SU sb (db, _)):bs) | sa == sb = (SU sa (db, Just xa)):bs f (SU sa (da, _)) ( (SU sb (_, Just xb)):bs) | sa == sb = (SU sa (da, Just xb)):bs f a@(SU _ _) (b@(SU _ _):bs) = a:b:bs adopt'' :: Unpacked a -> String -> Unpacked a -> Unpacked a adopt'' d s (d', Just x) = adopt'' (insert d (s, x)) s (d', Nothing) adopt'' (d, x) s (d', Nothing) = (copy f d (s ++ [null], d'), x) where f n1 n2 = n1 {match = fst $ merge'' (match n1, Nothing) (n2, Nothing)} \end{code} \subsection{Tree Implementation} The underlying implementation for all this is a simple ternary tree. It's not very efficient, but will do for now. Here we define the structure and basic folds. The second fold includes the node itself, which saves us from having to reconstruct sub-trees (an efficiency hack). \begin{code} data Dict a = Node {char :: Char, value :: Maybe a, left :: Dict a, match :: Dict a, right :: Dict a} | Empty foldD :: (Char -> Maybe b -> a -> a -> a -> a) -> a -> Dict b -> a foldD _ a Empty = a foldD f a (Node c v l m r) = f c v (foldD f a l) (foldD f a m) (foldD f a r) foldD' :: (Dict b -> Char -> Maybe b -> a -> a -> a -> a) -> a -> Dict b -> a foldD' _ a Empty = a foldD' f a n@(Node c v l m r) = f n c v (foldD' f a l) (foldD' f a m) (foldD' f a r) instance (Show a) => Show (Dict a) where show x = showDict x showDict :: (Show t) => Dict t -> [Char] showDict Empty = "-" showDict (Node c v l m r) = "[" ++ [c] ++ ":" ++ (show v) ++ "," ++ (show l) ++ "," ++ (show m) ++ "," ++ (show r) ++ "]" \end{code} The support functions copy and apply descend the tree to the point specified by the string and then apply a function. On the way down, copy copies the tree. \begin{code} apply :: (Dict a -> b) -> Dict a -> String -> b apply fn = foldD' f (\_ -> fn Empty) where f n c _ l _ r s'@[s] | s == c = fn n | s < c = l s' | s > c = r s' f _ c _ l m r s'@(s:ss) | s == c = m ss | s < c = l s' | s > c = r s' copy :: (Dict a -> b -> Dict a) -> Dict a -> (String, b) -> Dict a copy fn = foldD' f g where f n c _ l m r a@((s:ss), v') | s == c && ss == [] = fn n v' | s == c = n {match = m (ss, v')} | s < c = n {left = l a} | s > c = n {right = r a} g ((s:ss), v') | ss == [] = fn (Node s Nothing Empty Empty Empty) v' | otherwise = Node s Nothing Empty (g (ss, v')) Empty \end{code}