module Codec.Sexpr.Parser (readSexpr, sexpr, canonicalSexpr) where import Codec.Sexpr import Data.Char import Text.ParserCombinators.ReadP import qualified Codec.Binary.Base64.String as B64 {- From Rivest's documentation: :: | :: ? ; :: | | | | ; :: "[" "]" ; :: ":" ; :: + ; -- decimal numbers should have no unnecessary leading zeros -- any string of bytes, of the indicated length :: + ; :: ? "|" ( | )* "|" ; :: "#" ( | )* "#" ; :: ? :: "\"" "\"" :: "(" ( | )* ")" ; :: * ; :: | | ; :: | | ; :: "a" | ... | "z" ; :: "A" | ... | "Z" ; :: "0" | ... | "9" ; :: | "A" | ... | "F" | "a" | ... | "f" ; :: "-" | "." | "/" | "_" | ":" | "*" | "+" | "=" ; :: " " | "\t" | "\r" | "\n" ; :: | | "+" | "/" | "=" ; :: "" ; -} instance Read Sexpr where readsPrec n = readP_to_S sexpr readSexpr s = fst . head $ readP_to_S sexpr s sexpr = do skipSpaces s <- canonicalSexpr skipSpaces return s canonicalSexpr :: ReadP Sexpr canonicalSexpr = do s <- atomR <++ listR <++ basicTransport optional $ char '\NUL' return s basicTransport :: ReadP Sexpr basicTransport = do b64Octets <- between (char '{') (char '}') $ many1 b64char let parses = readP_to_S sexpr $ B64.decode b64Octets choice $ map (return.fst) $ filter ((=="") . snd) parses b64char = satisfy (\x -> isAlphaNum x || x `elem` "+/=") b64char' = skipSpaces >> b64char hexchar = satisfy isHexDigit hexchar' = skipSpaces >> hexchar listR :: ReadP Sexpr listR = do l <- between (char '(') (char ')') $ many sexpr return $ list l atomR :: ReadP Sexpr atomR = unhinted +++ hinted where unhinted = simpleString >>= (return . atom) hinted = do hint <- between (char '[' >> skipSpaces) (skipSpaces >> char ']') simpleString value <- simpleString return $ hintedAtom hint value simpleString :: ReadP String simpleString = raw +++ token +++ b64Atom +++ hexAtom +++ quotedString quotedString = withLength +++ withoutLength where withLength = do l <- decimal c <- between (char '"') (char '"') (many get) let s = read ('"':c ++ "\"") if (l == length s) then return s else fail "length error" withoutLength = do c <- between (char '"') (char '"') (many get) return $ read ('"':c ++ "\"") hexAtom = do s <- withLength +++ withoutLength return $ hexDecode s where withLength = do l <- decimal between (char '#') (char '#') (count (2*l) hexchar') withoutLength = between (char '#') (char '#') (many1 hexchar') hexDecode [] = "" hexDecode (h:o:cs) = chr (16*digitToInt h + digitToInt o) : (hexDecode cs) b64Atom = do s <- withLength +++ withoutLength return $ B64.decode s where withLength = do l <- decimal between (char '|') (char '|') (count (b64length l) b64char') withoutLength = between (char '|') (char '|') (many1 b64char') b64length l = 4 * (ceiling (fromIntegral l / 3)) token = do c <- satisfy isInitialTokenChar cs <- munch isTokenChar return (c:cs) raw :: ReadP String raw = do length <- decimal char ':' count length get decimal :: ReadP Int decimal = do s <- munch1 isNumber return $ read s