module Codec.Sexpr.Parser (readSexpr,
                           sexpr,
                           canonicalSexpr) where

import Codec.Sexpr

import Data.Char
import Text.ParserCombinators.ReadP
import qualified Codec.Binary.Base64.String as B64

{-
From Rivest's documentation:

<sexpr>    	:: <string> | <list>
<string>   	:: <display>? <simple-string> ;
<simple-string>	:: <raw> | <token> | <base-64> | <hexadecimal> | 
		           <quoted-string> ;
<display>  	:: "[" <simple-string> "]" ;
<raw>      	:: <decimal> ":" <bytes> ;
<decimal>  	:: <decimal-digit>+ ;
		-- decimal numbers should have no unnecessary leading zeros
<bytes> 	-- any string of bytes, of the indicated length
<token>    	:: <tokenchar>+ ;
<base-64>  	:: <decimal>? "|" ( <base-64-char> | <whitespace> )* "|" ;
<hexadecimal>   :: "#" ( <hex-digit> | <white-space> )* "#" ;
<quoted-string> :: <decimal>? <quoted-string-body>  
<quoted-string-body> :: "\"" <bytes> "\""
<list>     	:: "(" ( <sexp> | <whitespace> )* ")" ;
<whitespace> 	:: <whitespace-char>* ;
<token-char>  	:: <alpha> | <decimal-digit> | <simple-punc> ;
<alpha>       	:: <upper-case> | <lower-case> | <digit> ;
<lower-case>  	:: "a" | ... | "z" ;
<upper-case>  	:: "A" | ... | "Z" ;
<decimal-digit> :: "0" | ... | "9" ;
<hex-digit>     :: <decimal-digit> | "A" | ... | "F" | "a" | ... | "f" ;
<simple-punc> 	:: "-" | "." | "/" | "_" | ":" | "*" | "+" | "=" ;
<whitespace-char> :: " " | "\t" | "\r" | "\n" ;
<base-64-char> 	:: <alpha> | <decimal-digit> | "+" | "/" | "=" ;
<null>        	:: "" ;
-}

instance Read Sexpr where
    readsPrec n = readP_to_S sexpr

readSexpr s = fst . head $ readP_to_S sexpr s

sexpr = do
  skipSpaces
  s <- canonicalSexpr
  skipSpaces
  return s

canonicalSexpr :: ReadP Sexpr
canonicalSexpr = do
  s <- atomR <++ listR <++ basicTransport
  optional $ char '\NUL'
  return s

basicTransport :: ReadP Sexpr
basicTransport = do
  b64Octets <- between (char '{') (char '}') $ many1 b64char
  let parses = readP_to_S sexpr $ B64.decode b64Octets
  choice $ map (return.fst) $ filter ((=="") . snd) parses

b64char = satisfy (\x -> isAlphaNum x || x `elem` "+/=")
b64char' = skipSpaces >> b64char

hexchar = satisfy isHexDigit
hexchar' = skipSpaces >> hexchar

listR :: ReadP Sexpr
listR = do
  l <- between (char '(') (char ')') $ many sexpr
  return $ list l

atomR :: ReadP Sexpr
atomR = unhinted +++ hinted
  where 
    unhinted = simpleString >>= (return . atom)
    hinted = do
      hint <- between (char '[' >> skipSpaces) 
                      (skipSpaces >> char ']') 
                      simpleString
      value <- simpleString
      return $ hintedAtom hint value
      
simpleString :: ReadP String
simpleString = raw +++ token +++ b64Atom +++ hexAtom +++ quotedString

quotedString = withLength +++ withoutLength
  where
    withLength = do
      l <- decimal
      c <- between (char '"') (char '"') (many get)
      let s = read ('"':c ++ "\"")
      if (l == length s)
       then return s
       else fail "length error"              
    withoutLength = do
                 c <- between (char '"') (char '"') (many get)
                 return $ read ('"':c ++ "\"")

hexAtom = do
  s <- withLength +++ withoutLength
  return $ hexDecode s
    where
      withLength = do
            l <- decimal
            between (char '#') (char '#') (count (2*l) hexchar')
      withoutLength = between (char '#') (char '#') (many1 hexchar')

hexDecode [] = ""
hexDecode (h:o:cs) = chr (16*digitToInt h + digitToInt o) : (hexDecode cs)

b64Atom = do
  s <- withLength +++ withoutLength
  return $ B64.decode s
  where
    withLength = do
      l <- decimal
      between (char '|') (char '|') (count (b64length l) b64char')
    withoutLength = 
      between (char '|') (char '|') (many1 b64char')
    b64length l = 4 * (ceiling (fromIntegral l / 3))

token = do
  c <- satisfy isInitialTokenChar
  cs <- munch isTokenChar
  return (c:cs)

raw :: ReadP String
raw = do
  length <- decimal
  char ':'
  count length get

decimal :: ReadP Int
decimal = do
  s <- munch1 isNumber
  return $ read s