-- |All present parsers work on Strings, one character at a time. The canonical encoding is clearly susceptible to efficient parsing as a Lazy ByteString. -- -- This package also includes the Read instance for Sexprs. -- -- From Rivest's documentation: -- -- > :: | -- > :: ? ; -- > :: | | | | -- > ; -- > :: "[" "]" ; -- > :: ":" ; -- > :: + ; -- > -- decimal numbers should have no unnecessary leading zeros -- > -- any string of bytes, of the indicated length -- > :: + ; -- > :: ? "|" ( | )* "|" ; -- > :: "#" ( | )* "#" ; -- > :: ? -- > :: "\"" "\"" -- > :: "(" ( | )* ")" ; -- > :: * ; -- > :: | | ; -- > :: | | ; -- > :: "a" | ... | "z" ; -- > :: "A" | ... | "Z" ; -- > :: "0" | ... | "9" ; -- > :: | "A" | ... | "F" | "a" | ... | "f" ; -- > :: "-" | "." | "/" | "_" | ":" | "*" | "+" | "=" ; -- > :: " " | "\t" | "\r" | "\n" ; -- > :: | | "+" | "/" | "=" ; -- > :: "" ; module Codec.Sexpr.Parser (readSexpr, readSexprString, sexpr, canonicalSexpr) where import Codec.Sexpr -- import Data.Binary.Get -- import Data.ByteString import Data.Char import Text.ParserCombinators.ReadP import qualified Codec.Binary.Base64.String as B64 instance Read s => Read (Sexpr s) where readsPrec n s = map (\(a,b) -> (fmap read a, b)) s' where s' = readP_to_S sexpr s :: [(Sexpr String,String)] -- |Read a @'Sexpr' 'String'@ in any encoding: Canonical, Basic, or Advanced. readSexprString :: String -> Sexpr String readSexprString s = fst . head $ readP_to_S sexpr s -- |Read a @'Sexpr' a@ using the 'Read' instance for @a@. The Sexpr -- may be in any encoding: Canonical, Basic, or Advanced. readSexpr :: Read a => String -> Sexpr a readSexpr = fmap read . readSexprString -- |Parser for @'Sexpr' 'String'@s suitable for embedding in other -- @ReadP@ parsers. sexpr :: ReadP (Sexpr String) sexpr = do skipSpaces s <- canonicalSexpr skipSpaces return s {- getCanonicalAtom :: Get (Sexpr ByteString) getCanonicalAtom = do l <- getDecimal skip 1 -- ':' s <- getLazyByteString l -- FIXME doesn't handle hints return $ atom s getCanonicalList :: Get S getCanonicalList = do skip 1 -- '(' -- FIXME mostly missing -} -- |For some applications it is wise to accept only very carefully -- specified input. This is useful when you know you are receiving -- exactly a Canonical S-Expression. It will read only a Canonical -- S-expression (and optional terminating NUL), but not the Basic or -- Advanced encodings. canonicalSexpr :: ReadP (Sexpr String) canonicalSexpr = do s <- atomR <++ listR <++ basicTransport optional $ char '\NUL' return s basicTransport :: ReadP (Sexpr String) basicTransport = do b64Octets <- between (char '{') (char '}') $ many1 b64char let parses = readP_to_S sexpr $ B64.decode b64Octets choice $ map (return.fst) $ filter ((=="") . snd) parses b64char = satisfy (\x -> isAlphaNum x || x `elem` "+/=") b64char' = skipSpaces >> b64char hexchar = satisfy isHexDigit hexchar' = skipSpaces >> hexchar listR :: ReadP (Sexpr String) listR = do l <- between (char '(') (char ')') $ many sexpr return $ list l atomR :: ReadP (Sexpr String) atomR = unhinted +++ hinted where unhinted = simpleString >>= (return . atom) hinted = do hint <- between (char '[' >> skipSpaces) (skipSpaces >> char ']') simpleString value <- simpleString return $ hintedAtom hint value simpleString :: ReadP String simpleString = raw +++ token +++ b64Atom +++ hexAtom +++ quotedString quotedString = withLength +++ withoutLength where withLength = do l <- decimal c <- between (char '"') (char '"') (many get) let s = read ('"':c ++ "\"") if (l == length s) then return s else fail "length error" withoutLength = do c <- between (char '"') (char '"') (many get) return $ read ('"':c ++ "\"") hexAtom = do s <- withLength +++ withoutLength return $ hexDecode s where withLength = do l <- decimal between (char '#') (char '#') (count (2*l) hexchar') withoutLength = between (char '#') (char '#') (many1 hexchar') hexDecode [] = "" hexDecode (h:o:cs) = chr (16*digitToInt h + digitToInt o) : (hexDecode cs) b64Atom = do s <- withLength +++ withoutLength return $ B64.decode s where withLength = do l <- decimal between (char '|') (char '|') (count (b64length l) b64char') withoutLength = between (char '|') (char '|') (many1 b64char') b64length l = 4 * (ceiling (fromIntegral l / 3)) token = do c <- satisfy isInitialTokenChar cs <- munch isTokenChar return (c:cs) raw :: ReadP String raw = do length <- decimal char ':' count length get decimal :: ReadP Int decimal = do s <- munch1 isNumber return $ read s