% % Halipeto -- Haskell static web page generator % Copyright 2004 Andrew Cooke (andrew@acooke.org) % Copyright 2007-2010 Peter Simons (simons@cryp.to) % % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA % \section{Simple Database} This is a very simple database, implemented on top of the file system using the Dictionary class. The Dictionary namespace reflects the file path to the text (plus, in some cases, an additional name that depends on the file type as described below). There is currently no support for writing data. Some care must be taken with the cases of characters in filenames on Windows systems. It may be wise to force all filenames to lower case via the translate function, or to use a dictionary that is case insensitive. The main disadvantage of the SimpleDB implementation, to my mind, is that there is no formal specification of the structure that verifies that the information in related directories is consistent. Nothing warns the user that the second image directory is missing a details.haldx file, for example. This could be fixed, but it's difficult to find the energy to make a quick fix'' better. A full SQL interface should be developed instead. \begin{code}
module Halipeto.SimpleDB (
) where

import Halipeto.Template
import Halipeto.Dictionary
import Halipeto.Utilities
import Text.Pandoc
import System.IO
import System.Directory
import Data.List
import Data.Char

\end{code} \subsection{File Formats} Three different file formats are supported, giving three ways of associating the content with the dictionary namespace. The formats are distinguished by the file extension. \begin{itemize} \item {\bf hal} Files ending in .hal'' have all their contents stored under the file's main name (with the associated directory path). For example, the contents of file diary/2004/jul/text.hal would be stored in diary.2004.jul.text''. \begin{code}
readHal :: Dictionary d String =>
Translate -> d String -> FilePath -> [String] -> IO (d String)
return $add dct (ky, txt)  \end{code} \item {\bf hals} Each paragraph (separated by one or more blank lines) in a file with extension .hals'' is numbered. So if the contents of file months.hals had the contents \begin{verbatim} jan feb mar .. \end{verbatim} then the dictionary would associate, for example, months.5'' with May'' (this could be used to order the months for display, for example --- a simpler solution might be available via the translation facility described below). More often this format is used to store several paragraphs of text that are iterated over in a template so that each paragraph is enlosed within its own$<$p$>$element. \begin{code} readHals :: Dictionary d String => Translate -> d String -> FilePath -> [String] -> IO (d String) readHals tr dct fp ky = do h <- openFile fp ReadMode dct' <- readParas h tr dct ky 0 hClose h return dct' readParas :: Dictionary d String => Handle -> Translate -> d String -> [String] -> Int -> IO (d String) readParas h tr dct ky n = do done <- hIsEOF h if done then return$ dct
else do txt <- hGetPara h
case txt of
Nothing   -> readParas h tr dct ky n
Just txt' ->
case tr $ky ++ [(show n)] of Nothing -> readParas h tr dct ky n Just k -> readParas h tr (add dct (k, txt')) ky (n+1) hGetPara :: Handle -> IO (Maybe String) hGetPara h = do done <- hIsEOF h if done then return Nothing else do txt <- hGetLine h if "" == dropSpace txt then hGetPara h else do txt' <- collect h txt return$ Just txt'

collect :: Handle -> String -> IO String
collect h txt = do done <- hIsEOF h
if done
then return txt
else do txt' <- hGetLine h
if "" == dropSpace txt'
then return txt
else collect h $txt ++ "\n" ++ txt'  \end{code} \item {\bf haldx} The contents of files ending in .haldx'' are read as key value pairs. The first word on each line is the key, subsequent text, starting with the first non--space character, is the value. For example, if the file diary/2004/highlights.haldx contains \begin{verbatim} 1.month Jan 1.day 2 1.p 4 2.month July 2.day 20 2.p 1 \end{verbatim} Then the dictionary would associate the value July'' with the key diary.2004.highlights.2.month''. \begin{code} readHaldx :: Dictionary d String => Translate -> d String -> FilePath -> [String] -> IO (d String) readHaldx tr dct fp ky = do h <- openFile fp ReadMode dct' <- readLines h tr dct ky hClose h return dct' readLines :: Dictionary d String => Handle -> Translate -> d String -> [String] -> IO (d String) readLines h tr dct ky = do done <- hIsEOF h if done then return$ dct
else do txt <- hGetLine h
txt' <- return $dropWindowsReturn txt case splitLine tr ky txt' of Nothing -> readLines h tr dct ky Just (k, v) -> readLines h tr (add dct (k, v)) ky dropWindowsReturn :: String -> String dropWindowsReturn "" = "" dropWindowsReturn (c:s) | c == '\r' = dropWindowsReturn s | otherwise = c:(dropWindowsReturn s) splitLine :: Translate -> [String] -> String -> Maybe ([String], String) splitLine tr ky txt = case keyVal txt of Nothing -> Nothing Just (k, v) -> case tr$ ky ++ k of
Nothing  -> Nothing
Just ky' -> Just (ky', v)

keyVal :: String -> Maybe ([String], String)
keyVal s = keyVal' "" $dropSpace s where keyVal' _ "" = Nothing keyVal' k (c:s) | isSpace c = Just (fromDot k, dropSpace s) | otherwise = keyVal' (k ++ [c]) s  \end{code} \end{itemize} Given those formats it is possible to define two different values with the same key. In such cases the final result in the dictionary is undefined (the key will be associated with one of the values, but the choice will depend on implementation details). The functions for parsing these file formats are stored in a dictionary, indexed by file extension. The database can be extended to handle other formats by adding to this dictionary. %%Haddock: Map file extension to file reading function \begin{code} type ReadDB d = Translate -> d String -> FilePath -> [String] -> IO (d String)  \end{code} %%Haddock: Add the default file functions (hal, hals, haldx) \begin{code} addDefaultsDB :: (Dictionary d String, Dictionary r (ReadDB d)) => r (ReadDB d) -> r (ReadDB d) addDefaultsDB d = addAll' d [ (hal, readHal) , (hal++"s", readHals) , (hal++"dx", readHaldx) , ("rst", readRstMsg) , ("mdwn", readMdwnMsg) , ("lst", readList) ]  \end{code} \subsection{Translation} Before a file is read, its full path (from the base directory of the database, but excluding the file extension) is passed to a translate function which returns either a (possibly modified) value or Nothing. A valid value is used as the base key for the contents, a value of Nothing will cause that file to be ignored. Directories are also translated --- their names are not used as keys, but a value of Nothing will prevent traversal of that directory. %%Haddock: Type of translation function to select or modify file names \begin{code} type Translate = [String] -> Maybe [String]  \end{code} %%Haddock: Block CVS directories \begin{code} noCVS :: Translate noCVS s | last s == "CVS" = Nothing | otherwise = Just s  \end{code} %%Haddock: Select all files \begin{code} allFiles :: Translate allFiles = Just  \end{code} \subsection{HTML} All file contents are read as plain text. Functions called during the generation of the site may parse some of these values as HTML, but that functionality is not part of this database. \subsection{Reading Data} So here's the code... \begin{code} safety :: Translate safety [] = Nothing safety l | last l == "" = Nothing | head (last l) == '.' = Nothing | head (last l) == '/' = Nothing | head (last l) == '\\' = Nothing | otherwise = Just l  \end{code} %%Haddock: Read a database from disk using the default file functions \begin{code} readDB' :: Dictionary d String => Translate -> d String -> FilePath -> IO (d String) readDB' = readDB (addDefaultsDB empty)  \end{code} %%Haddock: Read a database from disk \begin{code} readDB :: (Dictionary d String, Dictionary r (ReadDB d)) => r (ReadDB d) -> Translate -> d String -> FilePath -> IO (d String) readDB d tr dc dr = readFP d (safety thenMaybe tr) dc dr [] readFP :: (Dictionary d String, Dictionary r (ReadDB d)) => r (ReadDB d) -> ReadDB d readFP d tr dct fp ky = do isD <- doesDirectoryExist fp if isD then readDir d tr dct fp ky$ getDirectoryContents fp
else readHFile d tr dct fp ky

r (ReadDB d) -> Translate -> d String -> FilePath -> [String]
-> IO [FilePath] -> IO (d String)
readDir d tr dct fp ky l = do l' <- l
foldM fn dct l'
where
fn dct' f = case tr $ky ++ [beforeDot f] of Nothing -> do return dct' Just ky' -> readFP d tr dct' (fp slash f) ky' where beforeDot "" = "" beforeDot (c:s) | c == '.' = "" | otherwise = c:(beforeDot s) readHFile :: (Dictionary d String, Dictionary r (ReadDB d)) => r (ReadDB d) -> ReadDB d readHFile d tr dct fp ky = case search' d (suffix fp) of Nothing -> do putStrLn$ "no match for " ++ fp
return dct
Just fn -> do putStrLn $"reading " ++ fp fn tr dct fp ky suffix :: String -> String suffix = suffix' "" suffix' :: String -> String -> String suffix' x "" = x suffix' x (c:s) | c == '.' = suffix' s s | otherwise = suffix' x s  \end{code} Support for Internet Message style entries. \begin{code} readMessage :: FilePath -> IO ([(String,String)], String) readMessage fp = do (header,body) <- fmap (span (not . Data.List.null) . lines) (readFile fp) let headerLinesWords = map (words . concat) (groupBy (\l r -> isSpace (head r)) header) headerLines = map (\wrds -> (fixKeyword (head wrds), unwords (tail wrds))) headerLinesWords fixKeyword k | ":" isSuffixOf k = map toLower$ reverse . tail . reverse $k | otherwise = error$ "unknown header keyword " ++ show k ++ " in " ++ fp
return (headerLines, unlines . drop 1 $body)  \end{code} Support for ReStructured Text Messages. \begin{code} readRstMsg :: Dictionary d String => Translate -> d String -> FilePath -> [String] -> IO (d String) readRstMsg _ dct fp ky = do (header,body) <- readMessage fp let st = defaultParserState opt = defaultWriterOptions { writerStrictMarkdown = True } pdoc = readRST st body html = writeHtmlString opt pdoc dct' = foldl (\d (k,v) -> add d (ky ++ [k], v)) dct header return$ add dct' (ky ++ ["body"], html)

\end{code} Support for Markdown Text Messages. \begin{code}
readMdwnMsg :: Dictionary d String => Translate -> d String -> FilePath -> [String] -> IO (d String)
readMdwnMsg _ dct fp ky = do
return $add dct' (ky ++ ["body"], html)  \end{code} Support for simple entry-by-line lists. \begin{code} readList :: Dictionary d String => Translate -> d String -> FilePath -> [String] -> IO (d String) readList _ dct fp ky = do ls <- fmap lines (readFile fp) let dct' = foldl (\d (k,v) -> add d (ky ++ [show k], v)) dct (zip [1..] ls) return$ dct'