module Text.Format ( parseFormat , renderFormat , renderFormatString , scanFormat , scanFormatString ) where import Text.Parsec data FormatParsePart = FRaw Char | FVar String data FormatPart = Var String (Parsec String () ()) | Raw String type Format = [FormatPart] mapLeft f (Left x) = Left (f x) mapLeft _ (Right x) = Right x {-| Parse a format string into a `Format` object ready to be used in `renderFormat` and `scanFormat`. The format string consists of raw tokens (ordinary characters), and variables, marked '$varname' or '${varname}'. The dollar sign may be used as a raw token by escaping it with another dollar sign, like so: '$$'. (If you want a variable named $, use '${$}'.) Not all syntactically valid parse strings are semantically valid. In particular, two variables must not occur consecutively without interleaving raw tokens. (If this were permitted, the resulting grammar would be ambiguous.) Variable names may be used twice; however, this will make the result of `scanFormat` somewhat difficult to deal with. The functions `renderFormatString` and `scanFormatString` are provided as conveniences to make doing this explicitly unnecessary. -} parseFormat :: String -> Either String Format parseFormat s = case runParser formatParser () "format-string" s of Left e -> Left $ show e Right x -> cleanFormat x where cleanFormat :: [FormatParsePart] -> Either String Format cleanFormat ps = mergeRaw "" ps >>= sepVars mergeRaw :: String -> [FormatParsePart] -> Either String Format mergeRaw s [] = case s of "" -> return [] _ -> return [Raw s] mergeRaw s (FRaw c : ps) = mergeRaw (s++[c]) ps mergeRaw s (FVar v : ps) = do r <- mergeRaw "" ps return $ case s of "" -> Var v eof : r _ -> Raw s : Var v eof : r sepVars :: [FormatPart] -> Either String Format sepVars [] = return [] sepVars (Raw s : ps) = do rs <- sepVars ps return $ Raw s : rs sepVars (Var v _ : ps) = case ps of [] -> return $ [Var v eof] Raw s : _ -> do rs <- sepVars ps return $ Var v (ignore $ string s) : rs Var _ _ : _ -> Left "Ambiguous format string" ignore = (>>= const (return ())) ident = many1 (alphaNum <|> char '_') >>= return . FVar variable = char '$' >> (ident <|> between (char '{') (char '}') ident) special = string "$$" >> (return $ FRaw '$') raw = return . FRaw =<< noneOf ['$'] formatParser :: Parsec String () [FormatParsePart] formatParser = do xs <- many (try raw <|> try special <|> variable) eof return xs renderFormat :: Format -> (String -> Maybe String) -> Either String String renderFormat fmt lu = return . concat =<< (sequence $ flip map fmt $ \x -> case x of Raw s -> return s Var v _ -> case lu v of Just o -> Right o Nothing -> Left ("No such variable " ++ v)) -- |A more convenient alternative to using `parseFormat` and `renderFormat`. renderFormatString :: String -> (String -> Maybe String) -> Either String String renderFormatString s lu = do f <- parseFormat s renderFormat f lu {-| Parses a string using the given format as a guide, generating a list of pairs of variable names and values. To determine where a variable ends, the entire subsequent string of raw tokens (until the next variable or the end of the string) is used as a terminator. It must occur verbatim in the scanned string or the parse will fail. The smallest match is used: if the format string is '${a}:' and the input string is '1:2:', the parse will exit with an error, as only the first character will be considered part of the variable 'a'. -} scanFormat :: Format -> String -> Either String [(String, String)] scanFormat fmt str = mapLeft show $ runParser scanner () "INPUT" str where scanner = do r <- compileScanner fmt eof return r compileScanner :: Format -> Parsec String () [(String, String)] compileScanner fmt = let ps = map compileScannerPart fmt in foldr (\a as -> do x <- a xs <- as return (x++xs)) (return []) ps compileScannerPart :: FormatPart -> Parsec String () [(String, String)] compileScannerPart p = case p of Raw s -> string s >> return [] Var name sep -> do d <- manyTill anyChar $ try $ lookAhead sep return [(name, d)] -- |A more convenient alternative to using `parseFormat` and `scanFormat`. scanFormatString :: String -> String -> Either String [(String, String)] scanFormatString s str = do f <- parseFormat s scanFormat f str