module Data.ProtoLens.TextFormat.Parser
    ( Message
    , Field(..)
    , Key(..)
    , Value(..)
    , parser
    ) where
import Data.ByteString (ByteString, pack)
import Data.Char (ord, isSpace)
import Data.Functor.Identity (Identity)
import Data.List (intercalate)
import Data.Maybe (catMaybes)
import Data.Text.Lazy (Text)
import qualified Data.Text as StrictText
import Data.Word (Word8)
import Numeric (readOct, readHex)
import Text.Parsec ((<?>))
import Text.Parsec.Char
  (alphaNum, char, hexDigit, letter, octDigit, oneOf, satisfy)
import Text.Parsec.Text.Lazy (Parser)
import Text.Parsec.Combinator (choice, eof, many1, optionMaybe, sepBy1)
import Text.Parsec.Token hiding (octal)
import Control.Applicative ((<|>), many)
import Control.Monad (liftM, liftM2, mzero)
ptp :: GenTokenParser Text () Identity
ptp = makeTokenParser protobufLangDef
protobufLangDef :: GenLanguageDef Text () Identity
protobufLangDef = LanguageDef
  { commentStart = ""
  , commentEnd = ""
  , commentLine = "#"
  , nestedComments = False
  , identStart = letter <|> char '_'
  , identLetter = alphaNum <|> oneOf "_'"
  , opStart = mzero
  , opLetter = mzero
  , reservedNames = []
  , reservedOpNames = []
  , caseSensitive = True
  }
type Message = [Field]
data Field = Field Key Value
  deriving (Show,Ord,Eq)
data Key = Key String  
  | UnknownKey Integer  
  | ExtensionKey [String]  
  | UnknownExtensionKey Integer  
                                 
  deriving (Ord,Eq)
data Value = IntValue Integer  
  | DoubleValue Double  
  | ByteStringValue ByteString    
  | MessageValue (Maybe StrictText.Text) Message  
  | EnumValue String  
  deriving (Show,Ord,Eq)
instance Show Key
  where
    show (Key name) = show name  
                                 
    show (UnknownKey k) = show k
    show (ExtensionKey name) = "[" ++ intercalate "." name ++ "]"
    show (UnknownExtensionKey k) = "[" ++ show k ++ "]"
parser :: Parser Message
parser = whiteSpace ptp *> parseMessage <* eof
  where
    parseMessage = many parseField
    parseField = liftM2 Field parseKey parseValue
    parseKey =
        liftM Key (identifier ptp) <|>
        liftM UnknownKey (natural ptp) <|>
        liftM ExtensionKey (brackets ptp (identifier ptp `sepBy1` dot ptp)) <|>
        liftM UnknownExtensionKey (brackets ptp (natural ptp))
    parseValue =
        colon ptp *> choice
            [parseNumber, parseString, parseEnumValue, parseMessageValue] <|>
        parseMessageValue
    parseNumber = do
        negative <- (symbol ptp "-" >> return True) <|> return False
        value <- naturalOrFloat ptp
        return $ makeNumberValue negative value
    parseString = liftM (ByteStringValue . mconcat)
        $ many1 $ lexeme ptp $ protoStringLiteral
    parseEnumValue = liftM EnumValue (identifier ptp)
    parseMessageValue =
        braces ptp (parseAny <|>
                    liftM (MessageValue Nothing) parseMessage) <|>
        angles ptp (liftM (MessageValue Nothing) parseMessage)
    typeUri = liftM StrictText.pack (many (satisfy (\c -> c /= ']' && not (isSpace c)))) <?>
              "type URI"
    parseAny = liftM2 MessageValue (liftM Just (brackets ptp typeUri))
                                   (braces ptp parseMessage)
    makeNumberValue :: Bool -> Either Integer Double -> Value
    makeNumberValue True (Left intValue) = IntValue (negate intValue)
    makeNumberValue False (Left intValue) = IntValue intValue
    makeNumberValue True (Right doubleValue) = DoubleValue (negate doubleValue)
    makeNumberValue False (Right doubleValue) = DoubleValue doubleValue
protoStringLiteral :: Parser ByteString
protoStringLiteral = do
    initialQuoteChar <- char '\'' <|> char '\"'
    word8s <- many stringChar
    _ <- char initialQuoteChar
    return $ pack word8s
  where
    stringChar :: Parser Word8
    stringChar = nonEscape <|> stringEscape
    nonEscape  = fmap (fromIntegral . ord)
        $ satisfy (\c -> c `notElem` "\\\'\"" && ord c < 256)
    stringEscape = char '\\' >> (octal <|> hex <|> unicode <|> simple)
    octal = do d0 <- octDigit
               d1 <- optionMaybe octDigit
               d2 <- optionMaybe octDigit
               readMaybeDigits readOct [Just d0, d1, d2]
    readMaybeDigits :: ReadS Word8 -> [Maybe Char] -> Parser Word8
    readMaybeDigits reader
        = return . (\str -> let [(v, "")] = reader str in v) . catMaybes
    hex = do _ <- oneOf "xX"
             d0 <- hexDigit
             d1 <- optionMaybe hexDigit
             readMaybeDigits readHex [Just d0, d1]
    unicode = oneOf "uU" >> fail "Unicode in string literals not yet supported"
    simple = choice $ map charRet [ ('a', '\a')
                                  , ('b', '\b')
                                  , ('f', '\f')
                                  , ('n', '\n')
                                  , ('r', '\r')
                                  , ('t', '\t')
                                  , ('v', '\v')
                                  , ('\\', '\\')
                                  , ('\'', '\'')
                                  , ('\"', '\"')
                                  ]
      where
        charRet :: (Char, Char) -> Parser Word8
        charRet (escapeCh, ch) = do _ <- char escapeCh
                                    return $ fromIntegral $ ord ch