-- | This module contains a near-direct translation of the proto3 grammar -- It uses String for easier compatibility with DotProto.Generator, which needs it for not very good reasons {-# LANGUAGE CPP #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-unused-do-bind #-} module Proto3.Suite.DotProto.Parsing ( parseProto , parseProtoFile ) where import Prelude hiding (fail) import Control.Applicative hiding (empty) import Control.Monad hiding (fail) #if MIN_VERSION_base(4,13,0) import Control.Monad (fail) #endif #if !MIN_VERSION_base(4,13,0) import Control.Monad.Fail #endif import qualified Data.List.NonEmpty as NE import Data.Functor import qualified Data.Text as T import qualified Filesystem.Path.CurrentOS as FP import Proto3.Suite.DotProto.AST import Proto3.Wire.Types (FieldNumber(..)) import Text.Parsec (parse, ParseError) import Text.Parsec.String (Parser) import Text.Parser.Char import Text.Parser.Combinators import Text.Parser.LookAhead import Text.Parser.Token import qualified Text.Parser.Token.Style as TokenStyle import qualified Turtle ---------------------------------------- -- interfaces -- | @parseProto mp inp@ attempts to parse @inp@ as a 'DotProto'. @mp@ is the -- module path to be injected into the AST as part of 'DotProtoMeta' metadata on -- a successful parse. parseProto :: Path -> String -> Either ParseError DotProto parseProto modulePath = parseProtoWithFile modulePath "" parseProtoWithFile :: Path -> String -> String -> Either ParseError DotProto parseProtoWithFile modulePath filePath = parse (runProtoParser (topLevel modulePath)) filePath -- | @parseProtoFile mp fp@ reads and parses the .proto file found at @fp@. @mp@ -- is used downstream during code generation when we need to generate names -- which are a function of the source .proto file's filename and its path -- relative to some @--includeDir@. parseProtoFile :: Turtle.MonadIO m => Path -> Turtle.FilePath -> m (Either ParseError DotProto) parseProtoFile modulePath (FP.encodeString -> fp) = parseProtoWithFile modulePath fp <$> Turtle.liftIO (readFile fp) ---------------------------------------- -- convenience -- | Wrapper around @Text.Parsec.String.Parser@, overriding whitespace lexing. newtype ProtoParser a = ProtoParser { runProtoParser :: Parser a } deriving ( Functor, Applicative, Alternative, Monad, MonadFail, MonadPlus , Parsing, CharParsing, LookAheadParsing) instance TokenParsing ProtoParser where someSpace = TokenStyle.buildSomeSpaceParser (ProtoParser someSpace) TokenStyle.javaCommentStyle -- use the default implementation for other methods: -- nesting, semi, highlight, token empty :: ProtoParser () empty = textSymbol ";" >> return () fieldNumber :: ProtoParser FieldNumber fieldNumber = FieldNumber . fromInteger <$> integer ---------------------------------------- -- identifiers identifierName :: ProtoParser String identifierName = do h <- letter t <- many (alphaNum <|> char '_') return $ h:t -- Parses a full identifier, without consuming trailing space. _identifier :: ProtoParser DotProtoIdentifier _identifier = identifierName `sepBy1` string "." >>= \case [] -> fail "impossible" [i] -> pure (Single i) (i:is) -> pure (Dots (Path (i NE.:| is))) singleIdentifier :: ProtoParser DotProtoIdentifier singleIdentifier = Single <$> token identifierName -- Parses a full identifier, consuming trailing space. identifier :: ProtoParser DotProtoIdentifier identifier = token _identifier -- [note] message and enum types are defined by the proto3 spec to have an optional leading period (messageType and enumType in the spec) -- what this indicates is, as far as i can tell, not documented, and i haven't found this syntax used in practice -- it's ommitted but can be fairly easily added if there is in fact a use for it -- [update] the leading dot denotes that the identifier path starts in global scope -- i still haven't seen a use case for this but i can add it upon request -- Parses a nested identifier, consuming trailing space. nestedIdentifier :: ProtoParser DotProtoIdentifier nestedIdentifier = token $ do h <- parens _identifier string "." t <- _identifier return $ Qualified h t ---------------------------------------- -- values -- [issue] these string parsers are weak to \" and \000 octal codes stringLit :: ProtoParser String stringLit = stringLiteral <|> stringLiteral' bool :: ProtoParser Bool bool = token $ lit "true" True <|> lit "false" False where -- used to distinguish "true_" (Identifier) from "true" (BoolLit) lit s c = string s >> notFollowedBy (alphaNum <|> char '_') >> pure c -- the `parsers` package actually does not expose a parser for signed fractional values floatLit :: ProtoParser Double floatLit = do sign <- char '-' $> negate <|> char '+' $> id <|> pure id sign <$> double value :: ProtoParser DotProtoValue value = try (BoolLit <$> bool) <|> try (StringLit <$> stringLit) <|> try (FloatLit <$> floatLit) <|> try (IntLit . fromInteger <$> integer) <|> try (Identifier <$> identifier) ---------------------------------------- -- types primType :: ProtoParser DotProtoPrimType primType = try (symbol "double" $> Double) <|> try (symbol "float" $> Float) <|> try (symbol "int32" $> Int32) <|> try (symbol "int64" $> Int64) <|> try (symbol "sint32" $> SInt32) <|> try (symbol "sint64" $> SInt64) <|> try (symbol "uint32" $> UInt32) <|> try (symbol "uint64" $> UInt64) <|> try (symbol "fixed32" $> Fixed32) <|> try (symbol "fixed64" $> Fixed64) <|> try (symbol "sfixed32" $> SFixed32) <|> try (symbol "sfixed64" $> SFixed64) <|> try (symbol "string" $> String) <|> try (symbol "bytes" $> Bytes) <|> try (symbol "bool" $> Bool) <|> Named <$> identifier -------------------------------------------------------------------------------- -- top-level parser and version annotation syntaxSpec :: ProtoParser () syntaxSpec = void $ do symbol "syntax" symbol "=" symbol "'proto3'" <|> symbol "\"proto3\"" semi data DotProtoStatement = DPSOption DotProtoOption | DPSPackage DotProtoPackageSpec | DPSImport DotProtoImport | DPSDefinition DotProtoDefinition | DPSEmpty deriving Show sortStatements :: Path -> [DotProtoStatement] -> DotProto sortStatements modulePath statements = DotProto { protoOptions = [ x | DPSOption x <- statements] , protoImports = [ x | DPSImport x <- statements] , protoPackage = adapt [ x | DPSPackage x <- statements] , protoDefinitions = [ x | DPSDefinition x <- statements] , protoMeta = DotProtoMeta modulePath } where adapt (x:_) = x adapt _ = DotProtoNoPackage topLevel :: Path -> ProtoParser DotProto topLevel modulePath = do whiteSpace syntaxSpec sortStatements modulePath <$> many topStatement -------------------------------------------------------------------------------- -- top-level statements topStatement :: ProtoParser DotProtoStatement topStatement = DPSImport <$> import_ <|> DPSPackage <$> package <|> DPSOption <$> topOption <|> DPSDefinition <$> definition <|> DPSEmpty <$ empty import_ :: ProtoParser DotProtoImport import_ = do symbol "import" qualifier <- option DotProtoImportDefault $ symbol "weak" $> DotProtoImportWeak <|> symbol "public" $> DotProtoImportPublic target <- FP.fromText . T.pack <$> stringLit semi return $ DotProtoImport qualifier target package :: ProtoParser DotProtoPackageSpec package = do symbol "package" p <- identifier semi return $ DotProtoPackageSpec p definition :: ProtoParser DotProtoDefinition definition = message <|> enum <|> service -------------------------------------------------------------------------------- -- options inlineOption :: ProtoParser DotProtoOption inlineOption = DotProtoOption <$> (optionName <* symbol "=") <*> value where optionName = nestedIdentifier <|> identifier optionAnnotation :: ProtoParser [DotProtoOption] optionAnnotation = brackets (commaSep1 inlineOption) <|> pure [] topOption :: ProtoParser DotProtoOption topOption = symbol "option" *> inlineOption <* semi -------------------------------------------------------------------------------- -- service statements servicePart :: ProtoParser DotProtoServicePart servicePart = DotProtoServiceRPCMethod <$> rpc <|> DotProtoServiceOption <$> topOption <|> DotProtoServiceEmpty <$ empty rpcOptions :: ProtoParser [DotProtoOption] rpcOptions = braces $ many topOption rpcClause :: ProtoParser (DotProtoIdentifier, Streaming) rpcClause = do let sid ctx = (,ctx) <$> identifier -- NB: Distinguish "stream stream.foo" from "stream.foo" try (symbol "stream" *> sid Streaming) <|> sid NonStreaming rpc :: ProtoParser RPCMethod rpc = do symbol "rpc" rpcMethodName <- singleIdentifier (rpcMethodRequestType, rpcMethodRequestStreaming) <- parens rpcClause symbol "returns" (rpcMethodResponseType, rpcMethodResponseStreaming) <- parens rpcClause rpcMethodOptions <- rpcOptions <|> (semi $> []) return RPCMethod{..} service :: ProtoParser DotProtoDefinition service = do symbol "service" name <- singleIdentifier statements <- braces (many servicePart) return $ DotProtoService mempty name statements -------------------------------------------------------------------------------- -- message definitions message :: ProtoParser DotProtoDefinition message = do symbol "message" name <- singleIdentifier body <- braces (many messagePart) return $ DotProtoMessage mempty name body messageOneOf :: ProtoParser DotProtoMessagePart messageOneOf = do symbol "oneof" name <- singleIdentifier body <- braces $ many (messageField <|> empty $> DotProtoEmptyField) return $ DotProtoMessageOneOf name body messagePart :: ProtoParser DotProtoMessagePart messagePart = try (DotProtoMessageDefinition <$> enum) <|> try (DotProtoMessageReserved <$> reservedField) <|> try (DotProtoMessageDefinition <$> message) <|> try messageOneOf <|> try (DotProtoMessageField <$> messageField) messageType :: ProtoParser DotProtoType messageType = try mapType <|> try repType <|> (Prim <$> primType) where mapType = do symbol "map" angles $ Map <$> (primType <* comma) <*> primType repType = do symbol "repeated" Repeated <$> primType messageField :: ProtoParser DotProtoField messageField = do mtype <- messageType mname <- identifier symbol "=" mnumber <- fieldNumber moptions <- optionAnnotation semi return $ DotProtoField mnumber mtype mname moptions mempty -------------------------------------------------------------------------------- -- enumerations enumField :: ProtoParser DotProtoEnumPart enumField = do fname <- identifier symbol "=" fpos <- fromInteger <$> integer opts <- optionAnnotation semi return $ DotProtoEnumField fname fpos opts enumStatement :: ProtoParser DotProtoEnumPart enumStatement = try (DotProtoEnumOption <$> topOption) <|> enumField <|> empty $> DotProtoEnumEmpty enum :: ProtoParser DotProtoDefinition enum = do symbol "enum" ename <- singleIdentifier ebody <- braces (many enumStatement) return $ DotProtoEnum mempty ename ebody -------------------------------------------------------------------------------- -- field reservations range :: ProtoParser DotProtoReservedField range = do lookAhead (integer >> symbol "to") -- [note] parsec commits to this parser too early without this lookahead s <- fromInteger <$> integer symbol "to" e <- fromInteger <$> integer return $ FieldRange s e ranges :: ProtoParser [DotProtoReservedField] ranges = commaSep1 (try range <|> (SingleField . fromInteger <$> integer)) reservedField :: ProtoParser [DotProtoReservedField] reservedField = do symbol "reserved" v <- ranges <|> commaSep1 (ReservedIdentifier <$> stringLit) semi return v