{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE ViewPatterns #-}
{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
module Proto3.Suite.DotProto.Parsing
( parseProto
, parseProtoFile
) where
import Control.Applicative hiding (empty)
import Control.Monad
import qualified Data.List.NonEmpty as NE
import Data.Functor
import qualified Data.Text as T
import qualified Filesystem.Path.CurrentOS as FP
import Proto3.Suite.DotProto.AST
import Proto3.Wire.Types (FieldNumber(..))
import Text.Parsec (parse, ParseError)
import Text.Parsec.String (Parser)
import Text.Parser.Char
import Text.Parser.Combinators
import Text.Parser.LookAhead
import Text.Parser.Token
import qualified Text.Parser.Token.Style as TokenStyle
import qualified Turtle
parseProto :: Path -> String -> Either ParseError DotProto
parseProto modulePath = parseProtoWithFile modulePath ""
parseProtoWithFile :: Path -> String -> String -> Either ParseError DotProto
parseProtoWithFile modulePath filePath = parse (runProtoParser (topLevel modulePath)) filePath
parseProtoFile :: Turtle.MonadIO m
=> Path -> Turtle.FilePath -> m (Either ParseError DotProto)
parseProtoFile modulePath (FP.encodeString -> fp) =
parseProtoWithFile modulePath fp <$> Turtle.liftIO (readFile fp)
newtype ProtoParser a = ProtoParser { runProtoParser :: Parser a }
deriving ( Functor, Applicative, Alternative, Monad, MonadPlus
, Parsing, CharParsing, LookAheadParsing)
instance TokenParsing ProtoParser where
someSpace = TokenStyle.buildSomeSpaceParser
(ProtoParser someSpace)
TokenStyle.javaCommentStyle
empty :: ProtoParser ()
empty = textSymbol ";" >> return ()
fieldNumber :: ProtoParser FieldNumber
fieldNumber = FieldNumber . fromInteger <$> integer
identifierName :: ProtoParser String
identifierName = do h <- letter
t <- many (alphaNum <|> char '_')
return $ h:t
_identifier :: ProtoParser DotProtoIdentifier
_identifier = identifierName `sepBy1` string "." >>= \case
[] -> fail "impossible"
[i] -> pure (Single i)
(i:is) -> pure (Dots (Path (i NE.:| is)))
singleIdentifier :: ProtoParser DotProtoIdentifier
singleIdentifier = Single <$> token identifierName
identifier :: ProtoParser DotProtoIdentifier
identifier = token _identifier
nestedIdentifier :: ProtoParser DotProtoIdentifier
nestedIdentifier = token $ do
h <- parens _identifier
string "."
t <- _identifier
return $ Qualified h t
stringLit :: ProtoParser String
stringLit = stringLiteral <|> stringLiteral'
bool :: ProtoParser Bool
bool = token $ lit "true" True <|> lit "false" False
where
lit s c = string s >> notFollowedBy (alphaNum <|> char '_') >> pure c
floatLit :: ProtoParser Double
floatLit = do sign <- char '-' $> negate <|> char '+' $> id <|> pure id
sign <$> double
value :: ProtoParser DotProtoValue
value = try (BoolLit <$> bool)
<|> try (StringLit <$> stringLit)
<|> try (FloatLit <$> floatLit)
<|> try (IntLit . fromInteger <$> integer)
<|> try (Identifier <$> identifier)
primType :: ProtoParser DotProtoPrimType
primType = try (symbol "double" $> Double)
<|> try (symbol "float" $> Float)
<|> try (symbol "int32" $> Int32)
<|> try (symbol "int64" $> Int64)
<|> try (symbol "sint32" $> SInt32)
<|> try (symbol "sint64" $> SInt64)
<|> try (symbol "uint32" $> UInt32)
<|> try (symbol "uint64" $> UInt64)
<|> try (symbol "fixed32" $> Fixed32)
<|> try (symbol "fixed64" $> Fixed64)
<|> try (symbol "sfixed32" $> SFixed32)
<|> try (symbol "sfixed64" $> SFixed64)
<|> try (symbol "string" $> String)
<|> try (symbol "bytes" $> Bytes)
<|> try (symbol "bool" $> Bool)
<|> Named <$> identifier
syntaxSpec :: ProtoParser ()
syntaxSpec = void $ do
symbol "syntax"
symbol "="
symbol "'proto3'" <|> symbol "\"proto3\""
semi
data DotProtoStatement
= DPSOption DotProtoOption
| DPSPackage DotProtoPackageSpec
| DPSImport DotProtoImport
| DPSDefinition DotProtoDefinition
| DPSEmpty
deriving Show
sortStatements :: Path -> [DotProtoStatement] -> DotProto
sortStatements modulePath statements
= DotProto { protoOptions = [ x | DPSOption x <- statements]
, protoImports = [ x | DPSImport x <- statements]
, protoPackage = adapt [ x | DPSPackage x <- statements]
, protoDefinitions = [ x | DPSDefinition x <- statements]
, protoMeta = DotProtoMeta modulePath
}
where
adapt (x:_) = x
adapt _ = DotProtoNoPackage
topLevel :: Path -> ProtoParser DotProto
topLevel modulePath = do whiteSpace
syntaxSpec
sortStatements modulePath <$> many topStatement
topStatement :: ProtoParser DotProtoStatement
topStatement = DPSImport <$> import_
<|> DPSPackage <$> package
<|> DPSOption <$> topOption
<|> DPSDefinition <$> definition
<|> DPSEmpty <$ empty
import_ :: ProtoParser DotProtoImport
import_ = do symbol "import"
qualifier <- option DotProtoImportDefault $
symbol "weak" $> DotProtoImportWeak
<|> symbol "public" $> DotProtoImportPublic
target <- FP.fromText . T.pack <$> stringLit
semi
return $ DotProtoImport qualifier target
package :: ProtoParser DotProtoPackageSpec
package = do symbol "package"
p <- identifier
semi
return $ DotProtoPackageSpec p
definition :: ProtoParser DotProtoDefinition
definition = message
<|> enum
<|> service
inlineOption :: ProtoParser DotProtoOption
inlineOption = DotProtoOption <$> (optionName <* symbol "=") <*> value
where
optionName = nestedIdentifier <|> identifier
optionAnnotation :: ProtoParser [DotProtoOption]
optionAnnotation = brackets (commaSep1 inlineOption) <|> pure []
topOption :: ProtoParser DotProtoOption
topOption = symbol "option" *> inlineOption <* semi
servicePart :: ProtoParser DotProtoServicePart
servicePart = DotProtoServiceRPCMethod <$> rpc
<|> DotProtoServiceOption <$> topOption
<|> DotProtoServiceEmpty <$ empty
rpcOptions :: ProtoParser [DotProtoOption]
rpcOptions = braces $ many topOption
rpcClause :: ProtoParser (DotProtoIdentifier, Streaming)
rpcClause = do
let sid ctx = (,ctx) <$> identifier
try (symbol "stream" *> sid Streaming) <|> sid NonStreaming
rpc :: ProtoParser RPCMethod
rpc = do symbol "rpc"
rpcMethodName <- singleIdentifier
(rpcMethodRequestType, rpcMethodRequestStreaming) <- parens rpcClause
symbol "returns"
(rpcMethodResponseType, rpcMethodResponseStreaming) <- parens rpcClause
rpcMethodOptions <- rpcOptions <|> (semi $> [])
return RPCMethod{..}
service :: ProtoParser DotProtoDefinition
service = do symbol "service"
name <- singleIdentifier
statements <- braces (many servicePart)
return $ DotProtoService mempty name statements
message :: ProtoParser DotProtoDefinition
message = do symbol "message"
name <- singleIdentifier
body <- braces (many messagePart)
return $ DotProtoMessage mempty name body
messageOneOf :: ProtoParser DotProtoMessagePart
messageOneOf = do symbol "oneof"
name <- singleIdentifier
body <- braces $ many (messageField <|> empty $> DotProtoEmptyField)
return $ DotProtoMessageOneOf name body
messagePart :: ProtoParser DotProtoMessagePart
messagePart = try (DotProtoMessageDefinition <$> enum)
<|> try (DotProtoMessageReserved <$> reservedField)
<|> try (DotProtoMessageDefinition <$> message)
<|> try messageOneOf
<|> try (DotProtoMessageField <$> messageField)
messageType :: ProtoParser DotProtoType
messageType = try mapType <|> try repType <|> (Prim <$> primType)
where
mapType = do symbol "map"
angles $ Map <$> (primType <* comma)
<*> primType
repType = do symbol "repeated"
Repeated <$> primType
messageField :: ProtoParser DotProtoField
messageField = do mtype <- messageType
mname <- identifier
symbol "="
mnumber <- fieldNumber
moptions <- optionAnnotation
semi
return $ DotProtoField mnumber mtype mname moptions mempty
enumField :: ProtoParser DotProtoEnumPart
enumField = do fname <- identifier
symbol "="
fpos <- fromInteger <$> integer
opts <- optionAnnotation
semi
return $ DotProtoEnumField fname fpos opts
enumStatement :: ProtoParser DotProtoEnumPart
enumStatement = try (DotProtoEnumOption <$> topOption)
<|> enumField
<|> empty $> DotProtoEnumEmpty
enum :: ProtoParser DotProtoDefinition
enum = do symbol "enum"
ename <- singleIdentifier
ebody <- braces (many enumStatement)
return $ DotProtoEnum mempty ename ebody
range :: ProtoParser DotProtoReservedField
range = do lookAhead (integer >> symbol "to")
s <- fromInteger <$> integer
symbol "to"
e <- fromInteger <$> integer
return $ FieldRange s e
ranges :: ProtoParser [DotProtoReservedField]
ranges = commaSep1 (try range <|> (SingleField . fromInteger <$> integer))
reservedField :: ProtoParser [DotProtoReservedField]
reservedField = do symbol "reserved"
v <- ranges <|> commaSep1 (ReservedIdentifier <$> stringLit)
semi
return v