-- |A simple feed generator: Stanza reading and parsing -- -- Copyright (c) 2006 Manuel M T Chakravarty -- -- License: -- --- Description --------------------------------------------------------------- -- -- Language: Haskell 98 -- module Stanza ( -- * Stanza structure Stanza, StanzaAssoc, -- * Stanza reading readStanzas, -- * Stanza processors StanzaProcessor, Proc(..), Action(..), -- * Stanza parsing parseStanzas ) where -- hierachical libraries -- import Data.Char ( isDigit, isSpace, toLower) import Data.List ( (\\)) -- lambdaFeed import Error ( elementErrStr) -- Stanze file parsing -- ------------------- -- A stanza is a list of associations, where each association comprises -- -- (1) a tag string (without the terminating colon of the textual -- representation), -- (2) a range of source file lines (containing the association in the textual -- representation), and -- (3) a value string. -- type Stanza = [StanzaAssoc] type StanzaAssoc = (String, (Int, Int), String) -- Read a text file in stanza format (including line numbers for better error -- messages). -- -- * See top of file for details of the format. -- readStanzas :: FilePath -> IO [Stanza] readStanzas fname = do contents <- readFile fname let numbered = zip [1..] (filter notComment . lines $ contents) stanzas = splitIntoStanzas numbered return $ map splitIntoAssocs stanzas where splitIntoStanzas [] = [] splitIntoStanzas lines = let lines0 = dropWhile isEmptyLine lines (stanza, lines') = break isEmptyLine lines0 stanzas = splitIntoStanzas lines' in stanza:stanzas -- splitIntoAssocs [] = [] splitIntoAssocs ((start, line):lines) = let (values, lines') = break isTagLine lines assocs = splitIntoAssocs lines' -- (tag, value1) = break (== ':') line allValues = stripColonNClean value1 : map (clean . snd) values -- end | null values = start | otherwise = (fst . last) values in case extractTag tag of Just rawTag -> (rawTag, (start, end), unwords' allValues):assocs Nothing -> malformedTagErr fname start tag -- extractTag "" = Nothing extractTag tag | isSpace (head tag) = Nothing | any isSpace cleanedTag = Nothing | otherwise = Just cleanedTag where cleanedTag = cleanTrailing tag -- notComment ('-':'-':_) = False notComment _ = True -- stripColonNClean (':':str) = clean str -- strip leading colon & clean w/s stripColonNClean str = clean str -- isEmptyLine = all isSpace . snd isTagLine = not . isSpace . (!!0) . snd -- unwords' = dropWhile isSpace . unwords -- Complain about a malformed tag and abort -- malformedTagErr :: FilePath -> Int -> String -> a malformedTagErr fname line tag = error $ fname ++ ":" ++ show line ++ ": `" ++ cleanTrailing tag ++ "' is not a valid tag" -- Lose leading and trailing whitespace -- clean :: String -> String clean = reverse . dropWhile isSpace . reverse . dropWhile isSpace -- Lose trailing whitespace -- cleanTrailing :: String -> String cleanTrailing = reverse . dropWhile isSpace . reverse -- Stanza processing -- ----------------- -- Association of tag names with functions that add the associated value to -- some structure `a' (and may also return a list of error messages). -- -- * We ignore case when comparing tag names. -- type StanzaProcessor a = [(String, Proc a)] -- Determines whether the associated tag is required. -- data Proc a = Proc Bool (Action a) -- The actual processing functions come in two flavours: -- -- * Simple processors consume one stanza association. -- -- * Compound processors consume an entire stanza, where the first entry of -- the stanza is the one that triggered the processor and the others are -- attributes to that element. If they occur within a stanza, they get -- passed a singelton stanza with the one association. -- data Action a = Simple (StanzaAssoc -> (a -> (a, [String]))) | Compound (Stanza -> (a -> (a, [String]))) -- Apply a stanza processor to a stanza list. -- -- * The second argument indicates the source of the stanza list and is used -- in error messages. -- -- * If required tags are missing, we inject an the error message into the -- errors produced by the transformer. As this error is fatal in some -- context, we indicate the error also by producing a transformer that -- returns constant `Nothing' in its first component. -- parseStanzas :: StanzaProcessor a -- stanza processor for parsing -> String -- name of stanza source -> [Stanza] -- stanzas to parse -> (a -> (Maybe a, [String])) -- -- The following is a two step process: -- -- (1) The stanza processor computes from the input stanza a *function* that -- when applied to a value of type `a' (usually a record) adds the -- information from the stanza to that value. -- (2) We apply the function from Step (1) to a value of type `a' (usually a -- record with default values in all components) to get the final result -- of parsing. -- parseStanzas stanzaProc sname stanzas = \rec -> let (transformer, req) = processStanzas required stanzas (results, errs) = transformer rec in if null req then -- a required tags were found (Just results, errs) else -- some required tags were missing => no result (Nothing, [sname ++ ": required tags missing: " ++ show req, sname ++ ": invalidates the whole element" ] ++ errs) where required = [tag | (tag, Proc reqd _) <- stanzaProc, reqd] -- --processStanzas :: [String] -> [Stanza] -> (a -> (a, [String]), [String]) processStanzas reqd [] = (idNil, reqd) processStanzas reqd ([] :stanzas) = processStanzas reqd stanzas processStanzas reqd (stanza@((tag, range, _):_):stanzas) = let (trans1, reqd1) = case lookup (map toLower tag) stanzaProc of Just (Proc _ (Simple _ )) -> processAssocs reqd stanza Just (Proc _ (Compound act)) -> (act stanza, reqd \| tag) Nothing -> (addError (unknownTag range tag), reqd) (trans2, reqd2) = processStanzas reqd1 stanzas in (trans2 .++ trans1, reqd2) -- --processAssocs :: [String] -> Stanza -> (a -> (a, [String]), [String]) processAssocs reqd [] = (idNil, reqd) processAssocs reqd (assoc@(tag, range, _):assocs) = let (trans1, reqd1) = case lookup (map toLower tag) stanzaProc of Just (Proc _ (Simple act)) -> (act assoc , reqd \| tag) Just (Proc _ (Compound act)) -> (act [assoc], reqd \| tag) Nothing -> (addError (unknownTag range tag), reqd) (trans2, reqd2) = processAssocs reqd1 assocs in (trans2 .++ trans1, reqd2) -- idNil = \a -> (a, []) f1 .++ f2 = \a -> let (a1, e1) = f1 a; (a2, e2) = f2 a1 in (a2, e1 ++ e2) -- l \| tag = l \\ [map toLower tag] -- addError err = \a -> (a, [err]) -- unknownTag range tag = elementErrStr sname range $ "unknown tag `" ++ tag ++ "' (ignoring)"