{-# LANGUAGE BangPatterns, RankNTypes, ScopedTypeVariables, CPP #-}
-- | This "Parser" module takes a filename and its contents as a
-- bytestring, and uses Lexer.hs to make a stream of tokens that it
-- parses. No IO is performed and the error function is not used.
-- Since the Lexer should also avoid such errors this should be a
-- reliably total function of the input.
--
-- The internals have been updated to handle Google's protobuf version
-- 2.0.3 formats, including EnumValueOptions.
module Text.ProtocolBuffers.ProtoCompile.Parser(parseProto,isValidPacked) where

import qualified Text.DescriptorProtos.DescriptorProto                as D(DescriptorProto)
import qualified Text.DescriptorProtos.DescriptorProto                as D.DescriptorProto(DescriptorProto(..))
-- import qualified Text.DescriptorProtos.DescriptorProto.ExtensionRange as D(ExtensionRange)
import qualified Text.DescriptorProtos.DescriptorProto.ExtensionRange as D.ExtensionRange(ExtensionRange(..))
import qualified Text.DescriptorProtos.EnumDescriptorProto            as D(EnumDescriptorProto)
import qualified Text.DescriptorProtos.EnumDescriptorProto            as D.EnumDescriptorProto(EnumDescriptorProto(..))
-- import qualified Text.DescriptorProtos.EnumOptions                    as D(EnumOptions)
import qualified Text.DescriptorProtos.EnumOptions                    as D.EnumOptions(EnumOptions(..))
import qualified Text.DescriptorProtos.EnumValueDescriptorProto       as D(EnumValueDescriptorProto)
import qualified Text.DescriptorProtos.EnumValueDescriptorProto       as D.EnumValueDescriptorProto(EnumValueDescriptorProto(..))
-- import qualified Text.DescriptorProtos.EnumValueOptions               as D(EnumValueOptions)
import qualified Text.DescriptorProtos.EnumValueOptions               as D.EnumValueOptions(EnumValueOptions(..))
import qualified Text.DescriptorProtos.FieldDescriptorProto           as D(FieldDescriptorProto)
import qualified Text.DescriptorProtos.FieldDescriptorProto           as D.FieldDescriptorProto(FieldDescriptorProto(..))
import           Text.DescriptorProtos.FieldDescriptorProto.Label
-- import qualified Text.DescriptorProtos.FieldDescriptorProto.Type      as D.FieldDescriptorProto(Type)
import           Text.DescriptorProtos.FieldDescriptorProto.Type         (Type(..))
-- import qualified Text.DescriptorProtos.FieldOptions                   as D(FieldOptions)
import qualified Text.DescriptorProtos.FieldOptions                   as D.FieldOptions(FieldOptions(..))
import qualified Text.DescriptorProtos.FileDescriptorProto            as D(FileDescriptorProto)
import qualified Text.DescriptorProtos.FileDescriptorProto            as D.FileDescriptorProto(FileDescriptorProto(..))
-- import qualified Text.DescriptorProtos.FileOptions                    as D(FileOptions)
import qualified Text.DescriptorProtos.FileOptions                    as D.FileOptions(FileOptions(..))
-- import qualified Text.DescriptorProtos.MessageOptions                 as D(MessageOptions)
import qualified Text.DescriptorProtos.MessageOptions                 as D.MessageOptions(MessageOptions(..))
import qualified Text.DescriptorProtos.MethodDescriptorProto          as D(MethodDescriptorProto)
import qualified Text.DescriptorProtos.MethodDescriptorProto          as D.MethodDescriptorProto(MethodDescriptorProto(..))
-- import qualified Text.DescriptorProtos.MethodOptions                  as D(MethodOptions)
import qualified Text.DescriptorProtos.MethodOptions                  as D.MethodOptions(MethodOptions(..))
import qualified Text.DescriptorProtos.OneofDescriptorProto           as D(OneofDescriptorProto)
import qualified Text.DescriptorProtos.OneofDescriptorProto           as D.OneofDescriptorProto(OneofDescriptorProto(..))
import qualified Text.DescriptorProtos.ServiceDescriptorProto         as D(ServiceDescriptorProto)
import qualified Text.DescriptorProtos.ServiceDescriptorProto         as D.ServiceDescriptorProto(ServiceDescriptorProto(..))
-- import qualified Text.DescriptorProtos.ServiceOptions                 as D(ServiceOptions)
import qualified Text.DescriptorProtos.ServiceOptions                 as D.ServiceOptions(ServiceOptions(..))
import qualified Text.DescriptorProtos.UninterpretedOption            as D(UninterpretedOption)
import qualified Text.DescriptorProtos.UninterpretedOption            as D.UninterpretedOption(UninterpretedOption(..))
-- import qualified Text.DescriptorProtos.UninterpretedOption.NamePart   as D(NamePart)
import qualified Text.DescriptorProtos.UninterpretedOption.NamePart   as D.NamePart(NamePart(..))

import Text.ProtocolBuffers.Basic
import Text.ProtocolBuffers.Identifiers
import Text.ProtocolBuffers.Header(ReflectEnum(reflectEnumInfo),enumName)
import Text.ProtocolBuffers.ProtoCompile.Lexer(Lexed(..),alexScanTokens,getLinePos)
import Text.ProtocolBuffers.ProtoCompile.Instances(parseLabel,parseType)
-- import Text.ProtocolBuffers.Reflections()

import Control.Monad(when,liftM2,liftM3)
import qualified Data.ByteString.Lazy as L(unpack)
import qualified Data.ByteString.Lazy.Char8 as LC(notElem,head)
import qualified Data.ByteString.Lazy.UTF8 as U(fromString,toString)
import Data.Char(isUpper,toLower)
import Data.Ix(inRange)
import Data.Maybe(fromMaybe)
import Data.Monoid(mconcat)
import Data.Sequence((|>),(><))
import qualified Data.Sequence as Seq(fromList,length,empty)
import Data.Word(Word8)
import Numeric(showOct)
--import System.FilePath(takeFileName)
import Text.ParserCombinators.Parsec(GenParser,ParseError,runParser,sourceName,anyToken,many1,lookAhead,try
                                    ,getInput,setInput,getPosition,setPosition,getState,setState
                                    ,(<?>),(<|>),token,choice,between,eof,unexpected,skipMany)
import Text.ParserCombinators.Parsec.Pos(newPos)

default ()

#if MIN_VERSION_parsec(3,0,0)
type P st = GenParser Lexed st
#else
type P = GenParser Lexed
#endif

parseProto :: String -> ByteString -> Either ParseError D.FileDescriptorProto
parseProto filename fileContents = do
  let initial_line_number = case lexed of
                              [] -> setPosition (newPos filename 0 0)
                              (l:_) -> setPosition (newPos filename (getLinePos l) 0)
      initState = defaultValue {D.FileDescriptorProto.name=Just (uFromString filename)}
      lexed = alexScanTokens fileContents
  runParser (initial_line_number >> parser) initState filename lexed


{-# INLINE mayRead #-}
mayRead :: ReadS a -> String -> Maybe a
mayRead f s = case f s of [(a,"")] -> Just a; _ -> Nothing

true,false :: ByteString
true = U.fromString "true"
false = U.fromString "false"

-- Use 'token' via 'tok' to make all the parsers for the Lexed values
tok :: (Lexed -> Maybe a) -> P s a
tok f = token show (\lexed -> newPos "" (getLinePos lexed) 0) f

pChar :: Char -> P s ()
pChar c = tok (\l-> case l of L _ x -> if (x==c) then return () else Nothing
                              _ -> Nothing) <?> ("character "++show c)

eol,eols :: P s ()
eol = pChar ';'
eols = skipMany eol

pName :: ByteString -> P s Utf8
pName name = tok (\l-> case l of L_Name _ x -> if (x==name) then return (Utf8 x) else Nothing
                                 _ -> Nothing) <?> ("name "++show (U.toString name))

rawStrMany :: P s (ByteString,ByteString) -- used for any and all access to L_String
rawStrMany = fmap mconcat (many1 singleStringLit)
  where singleStringLit :: P s (ByteString,ByteString)
        singleStringLit = tok (\l-> case l of L_String _ raw x -> return (raw,x)
                                              _ -> Nothing) <?> "expected string literal in single or double quotes"

-- In Google's version 2.4.0 there can be default message values which are curly-brace delimited
-- aggregates.  The lexer eats these fine, and this parser routine should recognized a balanced
-- expression.  Used with 'undoLexer'.
--
-- This assumes the initial (L _ '{' ) has already been parsed.
getAggregate :: P s [Lexed]
getAggregate = do
  input <- getInput
  let count :: Int -> Int -> P s [Lexed]
      count !n !depth = do
        -- Not using getNextToken so that the value of 'n' in count is correct.
        t <- anyToken
        case t  of
          L _ '{' -> count (succ n) (succ depth)
          L _ '}' -> let n' = succ n
                         depth' = pred depth
                     in if 0==depth' then return (take n' input)
                          else count n' depth'
          _ -> count (succ n) depth
  ls <- count 0 1
  return ls

getNextToken :: P s Lexed -- used in storing value for UninterpretedOption
getNextToken = do
  l <- lookAhead anyToken
  case l of
    L_String line _ _ -> rawStrMany >>= \(raw,bs) -> return (L_String line raw bs)
    _ -> anyToken

bsLit :: P s ByteString
bsLit = fmap fst rawStrMany <?> "quoted bytes literal, raw form"

strLit :: P s Utf8
strLit = fmap snd rawStrMany >>= \ x -> case isValidUTF8 x of
                                          Nothing -> return (Utf8 x)
                                          Just n -> fail $ "bad utf-8 byte in string literal position # "++show n

intLit,fieldInt,enumInt :: (Num a) => P s a
intLit = tok (\l-> case l of L_Integer _ x -> return (fromInteger x)
                             _ -> Nothing) <?> "integer literal"

fieldInt = tok (\l-> case l of L_Integer _ x | inRange validRange x && not (inRange reservedRange x) -> return (fromInteger x)
                               _ -> Nothing) <?> "field number (from 0 to 2^29-1 and not in 19000 to 19999)"
  where validRange = (0,(2^(29::Int))-1)
        reservedRange = (19000,19999)

enumInt = tok (\l-> case l of L_Integer _ x | inRange validRange x -> return (fromInteger x)
                              _ -> Nothing) <?> "enum value (from -2^31 to 2^31-1)"
  where validRange = (toInteger (minBound :: Int32), toInteger (maxBound :: Int32))

doubleLit :: P s Double
doubleLit = tok (\l-> case l of L_Double _ x -> return x
                                L_Integer _ x -> return (fromInteger x)
                                L_Name _ s | s == U.fromString "inf" -> return (1/0)
                                           | s == U.fromString "-inf" -> return (-1/0)
                                           | s == U.fromString "nan" -> return (0/0)
                                _ -> Nothing) <?> "double (or integer) literal or nan, inf, -inf"

floatLit :: P s Float
floatLit = tok (\l-> case l of L_Double _ x -> return . fromRational . toRational $ x
                               L_Integer _ x -> return (fromInteger x)
                               L_Name _ s | s == U.fromString "inf" -> return (1/0)
                                          | s == U.fromString "-inf" -> return (-1/0)
                                          | s == U.fromString "nan" -> return (0/0)
                               _ -> Nothing) <?> "float (or integer) literal or nan, inf, -inf"

ident,ident1,ident_package :: P s Utf8
ident = tok (\l-> case l of L_Name _ x -> return (Utf8 x)
                            _ -> Nothing) <?> "identifier (perhaps dotted)"

ident1 = tok (\l-> case l of L_Name _ x | LC.notElem '.' x -> return (Utf8 x)
                             _ -> Nothing) <?> "identifier (not dotted)"

ident_package = tok (\l-> case l of L_Name _ x | LC.head x /= '.' -> return (Utf8 x)
                                    _ -> Nothing) <?> "package name (no leading dot)"

boolLit :: P s Bool
boolLit = tok (\l-> case l of L_Name _ x | x == true -> return True
                                         | x == false -> return False
                              _ -> Nothing) <?> "boolean literal ('true' or 'false')"

enumLit :: forall s a. (Read a,ReflectEnum a) => P s a -- This is very polymorphic, and with a good error message
enumLit = do
  s <- fmap' uToString ident1
  case mayRead reads s of
    Just x -> return x
    Nothing -> let self = enumName (reflectEnumInfo (undefined :: a))
               in unexpected $ "Enum value not recognized: "++show s++", wanted enum value of type "++show self

-- -------------------------------------------------------------------
-- subParser changes the user state. It is a bit of a hack and is used
-- to define an interesting style of parsing.
subParser :: forall t sSub a s. Show t => GenParser t sSub a -> sSub -> GenParser t s sSub
subParser doSub inSub = do
  in1 <- getInput
  pos1 <- getPosition
  let out = runParser (setPosition pos1 >> doSub >> getStatus) inSub (sourceName pos1) in1
  case out of
    Left pe -> do
      let anyTok :: Int -> GenParser t s [t]
          anyTok i | i<=0 = return []
                   | otherwise = try (liftM2 (:) anyToken (anyTok (pred i))) <|> (return [])
      context <- anyTok 10
      fail ( unlines [ "The error message from the nested subParser was:\n"++indent (show pe)
                     , "  The next 10 tokens were "++show context ] )
    Right (outSub,in2,pos2) -> setInput in2 >> setPosition pos2 >> return outSub
 where getStatus = liftM3 (,,) getState getInput getPosition
       indent = unlines . map (\s -> ' ':' ':s) . lines

{-# INLINE return' #-}
return' :: (Monad m) => a -> m a
return' a = return $! a

{-# INLINE fmap' #-}
fmap' :: (Monad m) => (a->b) -> m a -> m b
fmap' f m = m >>= \a -> seq a (return $! (f a))

{-# INLINE update' #-}
update' :: (s -> s) -> P s ()
update' f = getState >>= \s -> setState $! (f s)

parser :: P D.FileDescriptorProto D.FileDescriptorProto
parser = proto >> getState
  where proto = eof <|> (choice [ eol
                                , importFile
                                , package
                                , fileOption
                                , message upTopMsg
                                , enum upTopEnum
                                , extend upTopMsg upTopExt
                                , service
                                , syntax
                                ] >> proto)
        upTopMsg msg = update' (\s -> s {D.FileDescriptorProto.message_type=D.FileDescriptorProto.message_type s |> msg})
        upTopEnum e  = update' (\s -> s {D.FileDescriptorProto.enum_type=D.FileDescriptorProto.enum_type s |> e})
        upTopExt f   = update' (\s -> s {D.FileDescriptorProto.extension=D.FileDescriptorProto.extension s |> f})

importFile,package,fileOption,service :: P D.FileDescriptorProto.FileDescriptorProto ()
importFile = pName (U.fromString "import") >> strLit >>= \p -> eol >> update' (\s -> s {D.FileDescriptorProto.dependency=(D.FileDescriptorProto.dependency s) |> p})

package = pName (U.fromString "package") >> do
  p <- ident_package
  eol
  update' (\s -> s {D.FileDescriptorProto.package=Just p})

-- This parses the new extensible option name format of Google's protobuf verison 2.0.2
-- "foo.(bar.baz).qux" goes to Left [("foo",False),("bar.baz",True),("qux",False)]
pOptionE :: P s (Either D.UninterpretedOption String)
pOptionE = do
  let pieces = withParens <|> withoutParens
      withParens = do
        part <- between (pChar '(') (pChar ')') ident
        fmap ((part,True) :) ( choice [ pChar '=' >> return []
                                      , pChar '.' >> withParens
                                      , withoutParens ] )
      withoutParens = do
        parts <- fmap split ident
        let prepend rest = foldr (\part xs -> (part,False):xs) rest parts
        fmap prepend ( choice [ pChar '=' >> return []
                              , pChar '.' >> withParens ] )
  nameParts <- pieces
  case nameParts of
    [(optName,False)] -> return (Right (uToString optName))
    _ -> do uno <- pUnValue (makeUninterpetedOption nameParts)
            return (Left uno)

pOptionWith :: P s t -> P s (Either D.UninterpretedOption String, t)
pOptionWith = liftM2 (,) (pName (U.fromString "option") >> pOptionE)

-- This does not handle D.UninterpretedOption.aggregate_value yet
pUnValue :: D.UninterpretedOption -> P s D.UninterpretedOption
pUnValue uno = getNextToken >>= storeLexed where
  storeLexed (L_Name _ bs) = return $ uno {D.UninterpretedOption.identifier_value = Just (Utf8 bs)}
  storeLexed (L_Integer _ i) | i >= 0 =
    return $ uno { D.UninterpretedOption.positive_int_value = Just (fromInteger i) }
                             | otherwise =
    return $ uno { D.UninterpretedOption.negative_int_value = Just (fromInteger i) }
  storeLexed (L_Double _ d) = return $ uno {D.UninterpretedOption.double_value = Just d }
  storeLexed (L_String _ _raw bs) = return $ uno {D.UninterpretedOption.string_value = Just bs }
  storeLexed l@(L _ '{') = do ls <- getAggregate
                              let bs = uFromString . concatMap undoLexer $ l:ls
                              return $ uno {D.UninterpretedOption.aggregate_value = Just bs }
  storeLexed _ = fail $ "Could not the parse value of an custom (uninterpreted) option"

makeUninterpetedOption :: [(Utf8,Bool)] -> D.UninterpretedOption
makeUninterpetedOption nameParts = defaultValue { D.UninterpretedOption.name = Seq.fromList . map makeNamePart $ nameParts }
  where makeNamePart (name_part,is_extension) = defaultValue { D.NamePart.name_part = name_part
                                                             , D.NamePart.is_extension =  is_extension }

fileOption = pOptionWith getOld >>= setOption >>= setNew >> eol where
  getOld = fmap (fromMaybe defaultValue . D.FileDescriptorProto.options) getState
  setNew p = update' (\s -> s {D.FileDescriptorProto.options=Just p})
  setOption (Left uno,old) =
    return' (old {D.FileOptions.uninterpreted_option = D.FileOptions.uninterpreted_option old |> uno})
  setOption (Right optName,old) =
    case optName of
      "java_package"          -> strLit  >>= \p -> return' (old {D.FileOptions.java_package        =Just p})
      "java_outer_classname"  -> strLit  >>= \p -> return' (old {D.FileOptions.java_outer_classname=Just p})
      "java_multiple_files"   -> boolLit >>= \p -> return' (old {D.FileOptions.java_multiple_files =Just p})
      "java_generate_equals_and_hash" -> boolLit >>= \p -> return' (old {D.FileOptions.java_generate_equals_and_hash =Just p})
      "java_string_check_utf8"-> boolLit >>= \p -> return' (old {D.FileOptions.java_string_check_utf8 = Just p})
      "optimize_for"          -> enumLit >>= \p -> return' (old {D.FileOptions.optimize_for        =Just p})
      "go_package"            -> strLit  >>= \p -> return' (old {D.FileOptions.go_package          =Just p})
      "cc_generic_services"   -> boolLit >>= \p -> return' (old {D.FileOptions.cc_generic_services =Just p})
      "java_generic_services" -> boolLit >>= \p -> return' (old {D.FileOptions.java_generic_services =Just p})
      "py_generic_services"   -> boolLit >>= \p -> return' (old {D.FileOptions.py_generic_services =Just p})
      "deprecated"            -> boolLit >>= \p -> return' (old {D.FileOptions.deprecated          =Just p})
      "cc_enable_arenas"      -> boolLit >>= \p -> return' (old {D.FileOptions.cc_enable_arenas      =Just p})
      "objc_class_prefix"     -> strLit  >>= \p -> return' (old {D.FileOptions.objc_class_prefix   =Just p})
      "csharp_namespace"      -> strLit  >>= \p -> return' (old {D.FileOptions.csharp_namespace    =Just p})
      "javanano_use_deprecated_package" -> boolLit >>= \p -> return' (old {D.FileOptions.javanano_use_deprecated_package =Just p})
      _ -> unexpected $ "FileOptions has no option named " ++ optName

oneof :: (D.OneofDescriptorProto -> Seq D.FieldDescriptorProto -> P s ()) -> P s ()
oneof up = pName (U.fromString "oneof") >> do
  self <- ident1
  (o,fs) <- subParser (pChar '{' >> subOneof) (defaultValue {D.OneofDescriptorProto.name=Just self}, Seq.empty)
  up o fs

subOneof :: P (D.OneofDescriptorProto,Seq D.FieldDescriptorProto) ()
subOneof = pChar '}' <|> (choice [ eof
                                 , fieldOneof >>= upMsgField] >> subOneof)
  where upMsgField f = update' (\(o,fs) -> (o,fs |> f))


fieldOneof :: P s D.FieldDescriptorProto
fieldOneof = do
  sType <- ident
  -- parseType may return Nothing, this is fixed up in Text.ProtocolBuffers.ProtoCompile.Resolve.fqField
  let (maybeTypeCode,maybeTypeName) = case parseType (uToString sType) of
                                        Just t -> (Just t,Nothing)
                                        Nothing -> (Nothing, Just sType)
  name <- ident1
  number <- pChar '=' >> fieldInt
  let v1 = defaultValue { D.FieldDescriptorProto.name = Just name
                        , D.FieldDescriptorProto.number = Just number
                        , D.FieldDescriptorProto.label = Just LABEL_OPTIONAL
                        , D.FieldDescriptorProto.type' = maybeTypeCode
                        , D.FieldDescriptorProto.type_name = maybeTypeName
                        }
  eol >> return v1

message :: (D.DescriptorProto -> P s ()) -> P s ()
message up = pName (U.fromString "message") >> do
  self <- ident1
  up =<< subParser (pChar '{' >> subMessage) (defaultValue {D.DescriptorProto.name=Just self})

-- subMessage is also used to parse group declarations
subMessage,messageOption,extensions :: P D.DescriptorProto.DescriptorProto ()
subMessage = (pChar '}') <|> (choice [ eol
                                     , field upNestedMsg Nothing >>= upMsgField
                                     , message upNestedMsg
                                     , enum upNestedEnum
                                     , oneof upMsgOneof
                                     , extensions
                                     , extend upNestedMsg upExtField
                                     , messageOption] >> subMessage
                                     )
  where upNestedMsg msg = update' (\s -> s {D.DescriptorProto.nested_type=D.DescriptorProto.nested_type s |> msg})
        upNestedEnum e  = update' (\s -> s {D.DescriptorProto.enum_type=D.DescriptorProto.enum_type s |> e})
        upMsgField f    = update' (\s -> s {D.DescriptorProto.field=D.DescriptorProto.field s |> f})
        upMsgOneof o xs  = update' $ \s ->
          let n = Seq.length (D.DescriptorProto.oneof_decl s)
              xs' = fmap (\s -> s { D.FieldDescriptorProto.oneof_index = Just (fromIntegral n) }) xs
          in s {D.DescriptorProto.oneof_decl=D.DescriptorProto.oneof_decl s |> o
               ,D.DescriptorProto.field=D.DescriptorProto.field s >< xs'
               }

        upExtField f    = update' (\s -> s {D.DescriptorProto.extension=D.DescriptorProto.extension s |> f})

messageOption = pOptionWith getOld >>= setOption >>= setNew >> eol where
  getOld = fmap (fromMaybe defaultValue . D.DescriptorProto.options) getState
  setNew p = update' (\s -> s {D.DescriptorProto.options=Just p})
  setOption (Left uno,old) =
    return' (old {D.MessageOptions.uninterpreted_option = D.MessageOptions.uninterpreted_option old |> uno })
  setOption (Right optName,old) =
    case optName of
      "message_set_wire_format" -> boolLit >>= \p -> return' (old {D.MessageOptions.message_set_wire_format=Just p})
      "no_standard_descriptor_accessor" -> boolLit >>= \p -> return' (old {D.MessageOptions.no_standard_descriptor_accessor=Just p})
      _ -> unexpected $ "MessageOptions has no option named "++optName

extend :: (D.DescriptorProto -> P s ()) -> (D.FieldDescriptorProto -> P s ()) -> P s ()
extend upGroup upField = pName (U.fromString "extend") >> do
  typeExtendee <- ident
  pChar '{'
  let rest = (field upGroup (Just typeExtendee) >>= upField) >> eols >> (pChar '}' <|> rest)
  eols >> rest

field :: (D.DescriptorProto -> P s ()) -> Maybe Utf8 -> P s D.FieldDescriptorProto
field upGroup maybeExtendee = do
  let allowedLabels = case maybeExtendee of
                        Nothing -> ["optional","repeated","required"]
                        Just {} -> ["optional","repeated"] -- cannot declare a required extension and an oneof extension. 
  sLabel <- choice . map (pName . U.fromString) $ allowedLabels
  theLabel <- maybe (fail ("not a valid Label :"++show sLabel)) return (parseLabel (uToString sLabel))
  sType <- ident
  -- parseType may return Nothing, this is fixed up in Text.ProtocolBuffers.ProtoCompile.Resolve.fqField
  let (maybeTypeCode,maybeTypeName) = case parseType (uToString sType) of
                                        Just t -> (Just t,Nothing)
                                        Nothing -> (Nothing, Just sType)
  name <- ident1
  number <- pChar '=' >> fieldInt
  let v1 = defaultValue { D.FieldDescriptorProto.name = Just name
                        , D.FieldDescriptorProto.number = Just number
                        , D.FieldDescriptorProto.label = Just theLabel
                        , D.FieldDescriptorProto.type' = maybeTypeCode
                        , D.FieldDescriptorProto.type_name = maybeTypeName
                        , D.FieldDescriptorProto.extendee = maybeExtendee }
  if maybeTypeCode == Just TYPE_GROUP
    then do let nameString = uToString name
            when (null nameString) (fail "Impossible? ident1 for field name was empty")
            when (not (isUpper (head nameString))) (fail $ "Group names must start with an upper case letter: "++show name)
            upGroup =<< subParser (pChar '{' >> subMessage) (defaultValue {D.DescriptorProto.name=Just name})
            let fieldName = Just $ uFromString (map toLower nameString)  -- down-case the whole name
                v = v1 { D.FieldDescriptorProto.name = fieldName
                       , D.FieldDescriptorProto.type_name = Just name }
            return v
    else (eol >> return v1) <|> (subParser (pChar '[' >> subField theLabel maybeTypeCode) v1)

subField,defaultConstant :: Label -> Maybe Type -> P D.FieldDescriptorProto ()
subField label mt = do
  (defaultConstant label mt <|> fieldOption label mt) <?> "expected \"default\" or a fieldOption"
  (pChar ']' >> eol) <|> (pChar ',' >> subField label mt)

defaultConstant LABEL_REPEATED _ = pName (U.fromString "default") >> fail "Repeated fields cannot have a default value"
defaultConstant _ mt = do
  _ <- pName (U.fromString "default")
  maybeDefault <- pChar '=' >> fmap Just (constant mt)
  -- XXX Hack: we lie about Utf8 for the default_value below
  update' (\s -> s { D.FieldDescriptorProto.default_value = fmap Utf8 maybeDefault })

-- This does a type and range safe parsing of the default value,
-- except for enum constants which cannot be checked (the definition
-- may not have been parsed yet).
--
-- Double and Float are checked to be not-Nan and not-Inf.  The
-- int-like types are checked to be within the corresponding range.
constant :: Maybe Type -> P s ByteString
-- With Nothing the next item may be an enum constant or a '{' and an aggregate.
constant Nothing = enumIdent <?> "expected the name of an enum or a curly-brace-enclosed aggregate value"
  where enumIdent = fmap utf8 ident1 -- hopefully a matching enum; forget about Utf8
constant (Just t) =
  case t of
    TYPE_DOUBLE  -> do d <- doubleLit
--                       when (isNaN d || isInfinite d)
--                            (fail $ "default floating point literal "++show d++" is out of range for type "++show t)
                       return' (utf8 . uFromString . showRF $ d)
    TYPE_FLOAT   -> do fl <- floatLit
{-
                       let fl :: Float
                           fl = read (show d)
--                       when (isNaN fl || isInfinite fl || (d==0) /= (fl==0))
--                            (fail $ "default floating point literal "++show d++" is out of range for type "++show t)
                       when (isNaN fl /= isNaN d || isInfinite fl /= isInfinite d  || (d==0) /= (fl==0))
                            (fail $ "default floating point literal "++show d++" is out of range for type "++show t)
-}
                       return' (utf8 . uFromString . showRF $ fl)
    TYPE_BOOL    -> boolLit >>= \b -> return' $ if b then true else false
    TYPE_STRING  -> strLit >>= return . utf8
    TYPE_BYTES   -> bsLit
    TYPE_GROUP   -> unexpected $ "cannot have a default for field of "++show t
    TYPE_MESSAGE -> unexpected $ "cannot have a default for field of "++show t
    TYPE_ENUM    -> fmap utf8 ident1 -- IMPOSSIBLE : SHOULD HAVE HAD Maybe Type PARAMETER match Nothing
    TYPE_SFIXED32 -> f (undefined :: Int32)
    TYPE_SINT32   -> f (undefined :: Int32)
    TYPE_INT32    -> f (undefined :: Int32)
    TYPE_SFIXED64 -> f (undefined :: Int64)
    TYPE_SINT64   -> f (undefined :: Int64)
    TYPE_INT64    -> f (undefined :: Int64)
    TYPE_FIXED32  -> f (undefined :: Word32)
    TYPE_UINT32   -> f (undefined :: Word32)
    TYPE_FIXED64  -> f (undefined :: Word64)
    TYPE_UINT64   -> f (undefined :: Word64)
  where f :: (Bounded a,Integral a) => a -> P s ByteString
        f u = do let range = (toInteger (minBound `asTypeOf` u),toInteger (maxBound `asTypeOf` u))
                 i <- intLit
                 when (not (inRange range i))
                      (fail $ "default integer value "++show i++" is out of range for type "++show t)
                 return' (utf8 . uFromString . show $ i)

fieldOption :: Label -> Maybe Type -> P D.FieldDescriptorProto ()
fieldOption label mt = liftM2 (,) pOptionE getOld >>= setOption >>= setNew where
  getOld = fmap (fromMaybe defaultValue . D.FieldDescriptorProto.options) getState
  setNew p = update' (\s -> s { D.FieldDescriptorProto.options = Just p })
  setOption (Left uno,old) =
    return' (old {D.FieldOptions.uninterpreted_option = D.FieldOptions.uninterpreted_option old |> uno })
  setOption (Right optName,old) =
    case optName of
      "ctype" | (Just TYPE_STRING) == mt -> do
        enumLit >>= \p -> return' (old {D.FieldOptions.ctype=Just p})
              | otherwise -> unexpected $ "field option cyte is only defined for string fields"
      -- "experimental_map_key" | Nothing == mt -> do
       -- strLit >>= \p -> return' (old {D.FieldOptions.experimental_map_key=Just p})
       --                      | otherwise -> unexpected $ "field option experimental_map_key is only defined for messages"
      "packed" | isValidPacked label mt -> do
        boolLit >>= \p -> return' (old {D.FieldOptions.packed=Just p})
               | otherwise -> unexpected $ "field option packed is not defined for this kind of field"
      "deprecated" -> boolLit >>= \p -> return' (old {D.FieldOptions.deprecated=Just p})
      _ -> unexpected $ "FieldOptions has no option named "++optName

isValidPacked :: Label -> Maybe Type -> Bool
isValidPacked LABEL_REPEATED Nothing = True -- provisional, okay if Enum but wrong if Message, checked in Resolve.fqField
isValidPacked LABEL_REPEATED (Just typeCode) =
  case typeCode of
    TYPE_STRING -> False
    TYPE_GROUP -> False
    TYPE_BYTES -> False
    TYPE_MESSAGE -> False -- Impossible value for typeCode from parseType, but here for completeness
    TYPE_ENUM -> True     -- Impossible value for typeCode from parseType, but here for completeness
    _ -> True
isValidPacked _ _ = False

enum :: (D.EnumDescriptorProto -> P s ()) -> P s ()
enum up = pName (U.fromString "enum") >> do
  self <- ident1
  up =<< subParser (pChar '{' >> subEnum) (defaultValue {D.EnumDescriptorProto.name=Just self})

subEnum,enumOption :: P D.EnumDescriptorProto.EnumDescriptorProto ()
subEnum = eols >> rest -- Note: Must check enumOption before enumVal
  where rest = (enumOption <|> enumVal) >> eols >> (pChar '}' <|> rest)

enumOption = pOptionWith getOld >>= setOption >>= setNew >> eol where
  getOld = fmap (fromMaybe defaultValue . D.EnumDescriptorProto.options) getState
  setNew p = update' (\s -> s {D.EnumDescriptorProto.options=Just p})
  setOption (Left uno,old) =
    return' $  (old {D.EnumOptions.uninterpreted_option = D.EnumOptions.uninterpreted_option old |> uno })
  setOption (Right optName,_old) =
    case optName of
      _ -> unexpected $ "EnumOptions has no option named "++optName

enumVal :: P D.EnumDescriptorProto ()
enumVal = do
  name <- ident1
  number <- pChar '=' >> enumInt
  let v1 = defaultValue { D.EnumValueDescriptorProto.name = Just name
                        , D.EnumValueDescriptorProto.number = Just number }
  v <- (eol >> return v1) <|> subParser (pChar '[' >> subEnumValue) v1
  update' (\s -> s {D.EnumDescriptorProto.value=D.EnumDescriptorProto.value s |> v})

subEnumValue,enumValueOption :: P D.EnumValueDescriptorProto ()
subEnumValue = enumValueOption >> ( (pChar ']' >> eol) <|> (pChar ',' >> subEnumValue) )

enumValueOption = liftM2 (,) pOptionE getOld >>= setOption >>= setNew where
  getOld = fmap (fromMaybe defaultValue . D.EnumValueDescriptorProto.options) getState
  setNew p = update' (\s -> s {D.EnumValueDescriptorProto.options=Just p})
  setOption (Left uno,old) =
    return' $  (old {D.EnumValueOptions.uninterpreted_option = D.EnumValueOptions.uninterpreted_option old |> uno })
  setOption (Right optName,_old) =
    case optName of
      _ -> unexpected $ "EnumValueOptions has no option named "++optName

extensions = pName (U.fromString "extensions") >> do
  start <- fieldInt
  let noEnd = eol >> return (succ start)
      toEnd = pName (U.fromString "to") >> (fieldInt <|> (pName (U.fromString "max") >> return (getFieldId maxBound)))
  end <- choice [ noEnd, toEnd ]
  let e = defaultValue { D.ExtensionRange.start = Just start
                       , D.ExtensionRange.end = Just (succ end) }  -- One _past_ the end!
  update' (\s -> s {D.DescriptorProto.extension_range=D.DescriptorProto.extension_range s |> e})

service = pName (U.fromString "service") >> do
  name <- ident1
  f <- subParser (pChar '{' >> subService) (defaultValue {D.ServiceDescriptorProto.name=Just name})
  update' (\s -> s {D.FileDescriptorProto.service=D.FileDescriptorProto.service s |> f})

 where subService = pChar '}' <|> (choice [ eol, rpc, serviceOption ] >> subService)

syntax = pName (U.fromString "syntax") >> do
  pChar '='
  p <- strLit
  update' (\s -> s {D.FileDescriptorProto.syntax=Just p})

serviceOption,rpc :: P D.ServiceDescriptorProto ()
serviceOption = pOptionWith getOld >>= setOption >>= setNew >> eol where
  getOld = fmap (fromMaybe defaultValue . D.ServiceDescriptorProto.options) getState
  setNew p = update' (\s -> s {D.ServiceDescriptorProto.options=Just p})
  setOption (Left uno,old) =
    return' (old {D.ServiceOptions.uninterpreted_option = D.ServiceOptions.uninterpreted_option old |> uno })
  setOption (Right optName,_old) =
    case optName of
      _ -> unexpected $ "ServiceOptions has no option named "++optName

rpc = pName (U.fromString "rpc") >> do
  name <- ident1
  input <- between (pChar '(') (pChar ')') ident
  _ <- pName (U.fromString "returns")
  output <- between (pChar '(') (pChar ')') ident
  let m1 = defaultValue { D.MethodDescriptorProto.name=Just name
                        , D.MethodDescriptorProto.input_type=Just input
                        , D.MethodDescriptorProto.output_type=Just output }
  m <- (eol >> return m1) <|> subParser (pChar '{' >> subRpc) m1
  update' (\s -> s {D.ServiceDescriptorProto.method=D.ServiceDescriptorProto.method s |> m})

subRpc,rpcOption :: P D.MethodDescriptorProto ()
subRpc = pChar '}' <|> (choice [ eol, rpcOption ] >> subRpc)

rpcOption = pOptionWith getOld >>= setOption >>= setNew >> eol where
  getOld = fmap (fromMaybe defaultValue . D.MethodDescriptorProto.options) getState
  setNew p = update' (\s -> s {D.MethodDescriptorProto.options=Just p})
  setOption (Left uno,old) =
    return' $  (old {D.MethodOptions.uninterpreted_option = D.MethodOptions.uninterpreted_option old |> uno })
  setOption (Right optName,_old) =
    case optName of
      _ -> unexpected $ "MethodOptions has no option named "++optName

-- see google's stubs/strutil.cc lines 398-449/1121 and C99 specification
-- This mainly targets three digit octal codes
cEncode :: [Word8] -> [Char]
cEncode = concatMap one where
  one :: Word8 -> [Char]
  -- special non-octal escaped values
  one 9 = sl  't'
  one 10 = sl 'n'
  one 13 = sl 'r'
  one 34 = sl '"'
  one 39 = sl '\''
  one 92 = sl '\\'
  -- main case of unescaped value
  one x | (32 <= x) && (x < 127) = [toEnum .  fromEnum $  x]
  -- below are the octal escaped values.  This always emits 3 digits.
  one 0 = '\\':"000"
  one x | x < 8 = '\\':'0':'0':(showOct x "")
        | x < 64 = '\\':'0':(showOct x "")
        | otherwise = '\\':(showOct x "")
  sl c = ['\\',c]

showRF :: (Show a, RealFloat a) => a -> String
showRF x | isNaN x = "nan"
         | isInfinite x = if 0 < x then "inf" else "-inf"
         | otherwise = show x

-- Aggregate
{-
data Lexed = L_Integer !Int !Integer
           | L_Double !Int !Double
           | L_Name !Int !L.ByteString
           | L_String !Int !L.ByteString !L.ByteString
           | L !Int !Char
           | L_Error !Int !String
           -}

undoLexer :: Lexed -> String
undoLexer (L_Integer _ integer) = ' ':show integer
undoLexer (L_Double _ double) = ' ':showRF double
undoLexer (L_Name _ bs) = ' ':U.toString bs
undoLexer (L_String _ _ bs) = let middle = L.unpack bs
                                  encoded = cEncode middle  -- escapes both quote and double-quote
                                  s = '\'' : encoded ++ "'"
                              in ' ':s
undoLexer (L _ '{') = " {\n"
undoLexer (L _ '}') = " }\n"
undoLexer (L _ ';') = ";\n"
undoLexer (L _ char) = ' ':[char]
undoLexer (L_Error _ errorMessage) = error ("Lexer failure found when parsing aggregate default value\n:"++errorMessage) -- XXX improve error reporting?