-- |All present parsers work on Strings, one character at a time.  The canonical encoding is clearly susceptible to efficient parsing as a Lazy ByteString.
-- 
-- This package also includes the Read instance for Sexprs.
-- 
-- From Rivest's documentation:
-- 
-- > <sexpr>    	:: <string> | <list>
-- > <string>   	:: <display>? <simple-string> ;
-- > <simple-string>	:: <raw> | <token> | <base-64> | <hexadecimal> | 
-- > 		           <quoted-string> ;
-- > <display>  	:: "[" <simple-string> "]" ;
-- > <raw>      	:: <decimal> ":" <bytes> ;
-- > <decimal>  	:: <decimal-digit>+ ;
-- > 		-- decimal numbers should have no unnecessary leading zeros
-- > <bytes> 	-- any string of bytes, of the indicated length
-- > <token>    	:: <tokenchar>+ ;
-- > <base-64>  	:: <decimal>? "|" ( <base-64-char> | <whitespace> )* "|" ;
-- > <hexadecimal>      :: "#" ( <hex-digit> | <white-space> )* "#" ;
-- > <quoted-string>    :: <decimal>? <quoted-string-body>  
-- > <quoted-string-body> :: "\"" <bytes> "\""
-- > <list>     	:: "(" ( <sexp> | <whitespace> )* ")" ;
-- > <whitespace> 	:: <whitespace-char>* ;
-- > <token-char>  	:: <alpha> | <decimal-digit> | <simple-punc> ;
-- > <alpha>       	:: <upper-case> | <lower-case> | <digit> ;
-- > <lower-case>  	:: "a" | ... | "z" ;
-- > <upper-case>  	:: "A" | ... | "Z" ;
-- > <decimal-digit>    :: "0" | ... | "9" ;
-- > <hex-digit>        :: <decimal-digit> | "A" | ... | "F" | "a" | ... | "f" ;
-- > <simple-punc> 	:: "-" | "." | "/" | "_" | ":" | "*" | "+" | "=" ;
-- > <whitespace-char>  :: " " | "\t" | "\r" | "\n" ;
-- > <base-64-char> 	:: <alpha> | <decimal-digit> | "+" | "/" | "=" ;
-- > <null>        	:: "" ;

module Codec.Sexpr.Parser (readSexpr,
                           readSexprString,
                           sexpr,
                           canonicalSexpr) where

import Codec.Sexpr

-- import Data.Binary.Get
-- import Data.ByteString

import Data.Char
import Text.ParserCombinators.ReadP
import qualified Codec.Binary.Base64.String as B64


instance Read s => Read (Sexpr s) where
    readsPrec n s = map (\(a,b) -> (fmap read a, b)) s'
        where 
          s' = readP_to_S sexpr s :: [(Sexpr String,String)]

-- |Read a @'Sexpr' 'String'@ in any encoding: Canonical, Basic, or Advanced.
readSexprString :: String -> Sexpr String
readSexprString s = fst . head $ readP_to_S sexpr s

-- |Read a @'Sexpr' a@ using the 'Read' instance for @a@.  The Sexpr
-- may be in any encoding: Canonical, Basic, or Advanced.
readSexpr :: Read a => String -> Sexpr a
readSexpr = fmap read . readSexprString

-- |Parser for @'Sexpr' 'String'@s suitable for embedding in other 
-- @ReadP@ parsers.
sexpr :: ReadP (Sexpr String)
sexpr = do
  skipSpaces
  s <- canonicalSexpr
  skipSpaces
  return s

{-
getCanonicalAtom :: Get (Sexpr ByteString)
getCanonicalAtom = do
  l <- getDecimal
  skip 1 -- ':'
  s <- getLazyByteString l -- FIXME doesn't handle hints
  return $ atom s

getCanonicalList :: Get S
getCanonicalList = do
  skip 1 -- '('
  -- FIXME mostly missing
-}

-- |For some applications it is wise to accept only very carefully
-- specified input.  This is useful when you know you are receiving
-- exactly a Canonical S-Expression.  It will read only a Canonical
-- S-expression (and optional terminating NUL), but not the Basic or
-- Advanced encodings.
canonicalSexpr :: ReadP (Sexpr String)
canonicalSexpr = do
  s <- atomR <++ listR <++ basicTransport
  optional $ char '\NUL'
  return s

basicTransport :: ReadP (Sexpr String)
basicTransport = do
  b64Octets <- between (char '{') (char '}') $ many1 b64char
  let parses = readP_to_S sexpr $ B64.decode b64Octets
  choice $ map (return.fst) $ filter ((=="") . snd) parses

b64char = satisfy (\x -> isAlphaNum x || x `elem` "+/=")
b64char' = skipSpaces >> b64char

hexchar = satisfy isHexDigit
hexchar' = skipSpaces >> hexchar

listR :: ReadP (Sexpr String)
listR = do
  l <- between (char '(') (char ')') $ many sexpr
  return $ list l

atomR :: ReadP (Sexpr String)
atomR = unhinted +++ hinted
  where 
    unhinted = simpleString >>= (return . atom)
    hinted = do
      hint <- between (char '[' >> skipSpaces) 
                      (skipSpaces >> char ']') 
                      simpleString
      value <- simpleString
      return $ hintedAtom hint value
      
simpleString :: ReadP String
simpleString = raw +++ token +++ b64Atom +++ hexAtom +++ quotedString

quotedString = withLength +++ withoutLength
  where
    withLength = do
      l <- decimal
      c <- between (char '"') (char '"') (many get)
      let s = read ('"':c ++ "\"")
      if (l == length s)
       then return s
       else fail "length error"              
    withoutLength = do
                 c <- between (char '"') (char '"') (many get)
                 return $ read ('"':c ++ "\"")

hexAtom = do
  s <- withLength +++ withoutLength
  return $ hexDecode s
    where
      withLength = do
            l <- decimal
            between (char '#') (char '#') (count (2*l) hexchar')
      withoutLength = between (char '#') (char '#') (many1 hexchar')

hexDecode [] = ""
hexDecode (h:o:cs) = chr (16*digitToInt h + digitToInt o) : (hexDecode cs)

b64Atom = do
  s <- withLength +++ withoutLength
  return $ B64.decode s
  where
    withLength = do
      l <- decimal
      between (char '|') (char '|') (count (b64length l) b64char')
    withoutLength = 
      between (char '|') (char '|') (many1 b64char')
    b64length l = 4 * (ceiling (fromIntegral l / 3))

token = do
  c <- satisfy isInitialTokenChar
  cs <- munch isTokenChar
  return (c:cs)

raw :: ReadP String
raw = do
  length <- decimal
  char ':'
  count length get

decimal :: ReadP Int
decimal = do
  s <- munch1 isNumber
  return $ read s