{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
module Proto3.Suite.DotProto.Parsing
( parseProto
, parseProtoFile
) where
import Control.Applicative hiding (empty)
import Control.Monad
import Data.Functor
import qualified Data.Text as T
import qualified Filesystem.Path.CurrentOS as FP
import Proto3.Suite.DotProto.AST
import Proto3.Wire.Types (FieldNumber(..))
import Text.Parsec (parse, ParseError)
import Text.Parsec.String (Parser)
import Text.Parser.Char
import Text.Parser.Combinators
import Text.Parser.LookAhead
import Text.Parser.Token
import qualified Text.Parser.Token.Style as TokenStyle
import qualified Turtle
parseProto :: Path -> String -> Either ParseError DotProto
parseProto modulePath = parse (runProtoParser (topLevel modulePath)) ""
parseProtoFile :: Turtle.MonadIO m
=> Path -> Turtle.FilePath -> m (Either ParseError DotProto)
parseProtoFile modulePath =
fmap (parseProto modulePath) . Turtle.liftIO . readFile . FP.encodeString
newtype ProtoParser a = ProtoParser { runProtoParser :: Parser a }
deriving ( Functor, Applicative, Alternative, Monad, MonadPlus
, Parsing, CharParsing, LookAheadParsing)
instance TokenParsing ProtoParser where
someSpace = TokenStyle.buildSomeSpaceParser
(ProtoParser someSpace)
TokenStyle.javaCommentStyle
empty :: ProtoParser ()
empty = textSymbol ";" >> return ()
fieldNumber :: ProtoParser FieldNumber
fieldNumber = FieldNumber . fromInteger <$> integer
identifierName :: ProtoParser String
identifierName = do h <- letter
t <- many (alphaNum <|> char '_')
return $ h:t
_identifier :: ProtoParser DotProtoIdentifier
_identifier = do is <- identifierName `sepBy1` string "."
return $ case is of
[i] -> Single i
_ -> Dots (Path is)
singleIdentifier :: ProtoParser DotProtoIdentifier
singleIdentifier = Single <$> token identifierName
identifier :: ProtoParser DotProtoIdentifier
identifier = token _identifier
nestedIdentifier :: ProtoParser DotProtoIdentifier
nestedIdentifier = token $ do
h <- parens _identifier
string "."
t <- _identifier
return $ Qualified h t
stringLit :: ProtoParser String
stringLit = stringLiteral <|> stringLiteral'
bool :: ProtoParser Bool
bool = token $ lit "true" True <|> lit "false" False
where
lit s c = string s >> notFollowedBy (alphaNum <|> char '_') >> pure c
floatLit :: ProtoParser Double
floatLit = do sign <- char '-' $> negate <|> char '+' $> id <|> pure id
sign <$> double
value :: ProtoParser DotProtoValue
value = try (BoolLit <$> bool)
<|> try (StringLit <$> stringLit)
<|> try (FloatLit <$> floatLit)
<|> try (IntLit . fromInteger <$> integer)
<|> try (Identifier <$> identifier)
primType :: ProtoParser DotProtoPrimType
primType = try (symbol "double" $> Double)
<|> try (symbol "float" $> Float)
<|> try (symbol "int32" $> Int32)
<|> try (symbol "int64" $> Int64)
<|> try (symbol "sint32" $> SInt32)
<|> try (symbol "sint64" $> SInt64)
<|> try (symbol "uint32" $> UInt32)
<|> try (symbol "uint64" $> UInt64)
<|> try (symbol "fixed32" $> Fixed32)
<|> try (symbol "fixed64" $> Fixed64)
<|> try (symbol "sfixed32" $> SFixed32)
<|> try (symbol "sfixed64" $> SFixed64)
<|> try (symbol "string" $> String)
<|> try (symbol "bytes" $> Bytes)
<|> try (symbol "bool" $> Bool)
<|> Named <$> identifier
syntaxSpec :: ProtoParser ()
syntaxSpec = void $ do
symbol "syntax"
symbol "="
symbol "'proto3'" <|> symbol "\"proto3\""
semi
data DotProtoStatement
= DPSOption DotProtoOption
| DPSPackage DotProtoPackageSpec
| DPSImport DotProtoImport
| DPSDefinition DotProtoDefinition
| DPSEmpty
deriving Show
sortStatements :: Path -> [DotProtoStatement] -> DotProto
sortStatements modulePath statements
= DotProto { protoOptions = [ x | DPSOption x <- statements]
, protoImports = [ x | DPSImport x <- statements]
, protoPackage = adapt [ x | DPSPackage x <- statements]
, protoDefinitions = [ x | DPSDefinition x <- statements]
, protoMeta = DotProtoMeta modulePath
}
where
adapt (x:_) = x
adapt _ = DotProtoNoPackage
topLevel :: Path -> ProtoParser DotProto
topLevel modulePath = do whiteSpace
syntaxSpec
sortStatements modulePath <$> many topStatement
topStatement :: ProtoParser DotProtoStatement
topStatement = DPSImport <$> import_
<|> DPSPackage <$> package
<|> DPSOption <$> topOption
<|> DPSDefinition <$> definition
<|> DPSEmpty <$ empty
import_ :: ProtoParser DotProtoImport
import_ = do symbol "import"
qualifier <- option DotProtoImportDefault $
symbol "weak" $> DotProtoImportWeak
<|> symbol "public" $> DotProtoImportPublic
target <- FP.fromText . T.pack <$> stringLit
semi
return $ DotProtoImport qualifier target
package :: ProtoParser DotProtoPackageSpec
package = do symbol "package"
p <- identifier
semi
return $ DotProtoPackageSpec p
definition :: ProtoParser DotProtoDefinition
definition = message
<|> enum
<|> service
inlineOption :: ProtoParser DotProtoOption
inlineOption = DotProtoOption <$> (optionName <* symbol "=") <*> value
where
optionName = nestedIdentifier <|> identifier
optionAnnotation :: ProtoParser [DotProtoOption]
optionAnnotation = brackets (commaSep1 inlineOption) <|> pure []
topOption :: ProtoParser DotProtoOption
topOption = symbol "option" *> inlineOption <* semi
servicePart :: ProtoParser DotProtoServicePart
servicePart = rpc
<|> (DotProtoServiceOption <$> topOption)
<|> empty $> DotProtoServiceEmpty
rpcOptions :: ProtoParser [DotProtoOption]
rpcOptions = braces $ many topOption
rpcClause :: ProtoParser (DotProtoIdentifier, Streaming)
rpcClause = do
let sid ctx = (,ctx) <$> identifier
try (symbol "stream" *> sid Streaming) <|> sid NonStreaming
rpc :: ProtoParser DotProtoServicePart
rpc = do symbol "rpc"
name <- singleIdentifier
subjecttype <- parens rpcClause
symbol "returns"
returntype <- parens rpcClause
options <- rpcOptions <|> (semi $> [])
return $ DotProtoServiceRPC name subjecttype returntype options
service :: ProtoParser DotProtoDefinition
service = do symbol "service"
name <- singleIdentifier
statements <- braces (many servicePart)
return $ DotProtoService name statements
message :: ProtoParser DotProtoDefinition
message = do symbol "message"
name <- singleIdentifier
body <- braces (many messagePart)
return $ DotProtoMessage name body
messageOneOf :: ProtoParser DotProtoMessagePart
messageOneOf = do symbol "oneof"
name <- singleIdentifier
body <- braces $ many (messageField <|> empty $> DotProtoEmptyField)
return $ DotProtoMessageOneOf name body
messagePart :: ProtoParser DotProtoMessagePart
messagePart = try (DotProtoMessageDefinition <$> enum)
<|> try (DotProtoMessageReserved <$> reservedField)
<|> try (DotProtoMessageDefinition <$> message)
<|> try messageOneOf
<|> try (DotProtoMessageField <$> messageField)
messageType :: ProtoParser DotProtoType
messageType = try mapType <|> try repType <|> (Prim <$> primType)
where
mapType = do symbol "map"
angles $ Map <$> (primType <* comma)
<*> primType
repType = do symbol "repeated"
Repeated <$> primType
messageField :: ProtoParser DotProtoField
messageField = do mtype <- messageType
mname <- identifier
symbol "="
mnumber <- fieldNumber
moptions <- optionAnnotation
semi
return $ DotProtoField mnumber mtype mname moptions Nothing
enumField :: ProtoParser DotProtoEnumPart
enumField = do fname <- identifier
symbol "="
fpos <- fromInteger <$> integer
opts <- optionAnnotation
semi
return $ DotProtoEnumField fname fpos opts
enumStatement :: ProtoParser DotProtoEnumPart
enumStatement = try (DotProtoEnumOption <$> topOption)
<|> enumField
<|> empty $> DotProtoEnumEmpty
enum :: ProtoParser DotProtoDefinition
enum = do symbol "enum"
ename <- singleIdentifier
ebody <- braces (many enumStatement)
return $ DotProtoEnum ename ebody
range :: ProtoParser DotProtoReservedField
range = do lookAhead (integer >> symbol "to")
s <- fromInteger <$> integer
symbol "to"
e <- fromInteger <$> integer
return $ FieldRange s e
ranges :: ProtoParser [DotProtoReservedField]
ranges = commaSep1 (try range <|> (SingleField . fromInteger <$> integer))
reservedField :: ProtoParser [DotProtoReservedField]
reservedField = do symbol "reserved"
v <- ranges <|> commaSep1 (ReservedIdentifier <$> stringLit)
semi
return v