module Language.Thrift.Parser
( parseFromFile
, parse
, thriftIDL
, program
, header
, include
, namespace
, definition
, constant
, typeDefinition
, service
, typedef
, enum
, struct
, union
, exception
, senum
, typeReference
, constantValue
, docstring
, Parser
, runParser
, whiteSpace
) where
import Control.Applicative
import Control.Monad
import Control.Monad.Trans.State (StateT)
import Data.Scientific (floatingOrInteger)
import Data.Text (Text)
import qualified Control.Monad.Trans.State as State
import qualified Data.Text as Text
import qualified Data.Text.IO as Text
import qualified Text.Megaparsec as P
import qualified Text.Megaparsec.Lexer as PL
import qualified Language.Thrift.AST as T
data State = State
{ stateDocstring :: T.Docstring
}
deriving (Show, Eq)
type Parser s = StateT State (P.Parsec P.Dec s)
runParser
:: (P.Stream s, P.Token s ~ Char)
=> Parser s a -> P.Parsec P.Dec s a
runParser p = State.evalStateT p (State Nothing)
parseFromFile
:: FilePath
-> IO (Either (P.ParseError Char P.Dec) (T.Program P.SourcePos))
parseFromFile path = P.runParser thriftIDL path <$> Text.readFile path
parse
:: (P.Stream s, P.Token s ~ Char)
=> FilePath
-> s -> Either (P.ParseError Char P.Dec) (T.Program P.SourcePos)
parse = P.parse thriftIDL
thriftIDL
:: (P.Stream s, P.Token s ~ Char)
=> P.Parsec P.Dec s (T.Program P.SourcePos)
thriftIDL = runParser program
clearDocstring :: P.Stream s => Parser s ()
clearDocstring = State.modify' (\s -> s { stateDocstring = Nothing })
lastDocstring :: P.Stream s => Parser s T.Docstring
lastDocstring = do
s <- State.gets stateDocstring
clearDocstring
return s
whiteSpace :: (P.Stream s, P.Token s ~ Char) => Parser s ()
whiteSpace = someSpace <|> pure ()
someSpace :: (P.Stream s, P.Token s ~ Char) => Parser s ()
someSpace = P.skipSome $ readDocstring <|> skipComments <|> skipSpace
where
readDocstring = do
s <- docstring
unless (Text.null s) $
State.modify' (\st -> st { stateDocstring = Just s})
skipSpace = P.choice
[ P.newline *> clearDocstring
, P.skipSome P.spaceChar
]
skipComments = P.choice
[ P.char '#' *> skipLine
, P.try (P.string "//") *> skipLine
, P.try (P.string "/*") *> skipCStyleComment
] *> clearDocstring
skipLine = void P.eol <|> P.eof <|> (P.anyChar *> skipLine)
skipCStyleComment = P.choice
[ P.try (P.string "*/") *> pure ()
, P.skipSome (noneOf "/*") *> skipCStyleComment
, oneOf "/*" *> skipCStyleComment
]
oneOf :: (P.Stream s, P.Token s ~ Char) => String -> Parser s Char
oneOf = P.oneOf
noneOf :: (P.Stream s, P.Token s ~ Char) => String -> Parser s Char
noneOf = P.noneOf
skipUpTo
:: (P.Stream s, P.Token s ~ Char) => Parser s a -> Int -> Parser s ()
skipUpTo p = loop
where
loop 0 = return ()
loop n =
( do
void $ P.try p
loop $! n 1
) <|> return ()
hspace :: (P.Stream s, P.Token s ~ Char) => Parser s ()
hspace = void $ oneOf " \t"
docstring :: (P.Stream s, P.Token s ~ Char) => Parser s Text
docstring = do
P.try (P.string "/**") >> P.skipMany hspace
indent <- fromIntegral . P.unPos <$> PL.indentLevel
isNewLine <- maybeEOL
chunks <- loop isNewLine (indent 1) []
return $! Text.intercalate "\n" chunks
where
maybeEOL = (P.eol >> return True) <|> return False
commentChar =
noneOf "*\r\n" <|>
P.try (P.char '*' <* P.notFollowedBy (P.char '/'))
loop shouldDedent maxDedent chunks = do
when shouldDedent $
hspace `skipUpTo` maxDedent
finishComment <|> readDocLine
where
finishComment = do
P.try (P.skipMany hspace <* P.string "*/")
void $ optional P.spaceChar
return $! reverse chunks
readDocLine = do
when shouldDedent . void $
optional $ P.try (P.char '*' >> optional hspace)
line <- Text.pack <$> P.many commentChar
void (optional hspace >> maybeEOL)
loop True maxDedent (line:chunks)
symbolic :: (P.Stream s, P.Token s ~ Char) => Char -> Parser s ()
symbolic c = void $ PL.symbol whiteSpace [c]
token :: (P.Stream s, P.Token s ~ Char) => Parser s a -> Parser s a
token = PL.lexeme whiteSpace
braces, angles, parens
:: (P.Stream s, P.Token s ~ Char) => Parser s a -> Parser s a
braces = P.between (symbolic '{') (symbolic '}')
angles = P.between (symbolic '<') (symbolic '>')
parens = P.between (symbolic '(') (symbolic ')')
comma, semi, colon, equals :: (P.Stream s, P.Token s ~ Char) => Parser s ()
comma = symbolic ','
semi = symbolic ';'
colon = symbolic ':'
equals = symbolic '='
reserved :: (P.Stream s, P.Token s ~ Char) => String -> Parser s ()
reserved name = P.label name $ token $ P.try $ do
void (P.string name)
P.notFollowedBy (P.alphaNumChar <|> oneOf "_.")
literal :: (P.Stream s, P.Token s ~ Char) => Parser s Text
literal = P.label "string literal" $ token $
stringLiteral '"' <|> stringLiteral '\''
stringLiteral :: (P.Stream s, P.Token s ~ Char) => Char -> Parser s Text
stringLiteral q = fmap Text.pack $
P.char q >> P.manyTill PL.charLiteral (P.char q)
integer :: (P.Stream s, P.Token s ~ Char) => Parser s Integer
integer = token PL.integer
identifier :: (P.Stream s, P.Token s ~ Char) => Parser s Text
identifier = P.label "identifier" $ token $ do
name <- (:)
<$> (P.letterChar <|> P.char '_')
<*> many (P.alphaNumChar <|> oneOf "_.")
return (Text.pack name)
program :: (P.Stream s, P.Token s ~ Char) => Parser s (T.Program P.SourcePos)
program = whiteSpace >>
T.Program
<$> many (header <* optionalSep)
<*> many (definition <* optionalSep)
<* P.eof
header :: (P.Stream s, P.Token s ~ Char) => Parser s (T.Header P.SourcePos)
header = P.choice
[ T.HeaderInclude <$> include
, T.HeaderNamespace <$> namespace
]
include :: (P.Stream s, P.Token s ~ Char) => Parser s (T.Include P.SourcePos)
include = reserved "include" >> withPosition (T.Include <$> literal)
namespace
:: (P.Stream s, P.Token s ~ Char) => Parser s (T.Namespace P.SourcePos)
namespace = P.choice
[ reserved "namespace" >>
withPosition (T.Namespace <$> (star <|> identifier) <*> identifier)
, reserved "cpp_namespace" >>
withPosition (T.Namespace "cpp" <$> identifier)
, reserved "php_namespace" >>
withPosition (T.Namespace "php" <$> identifier)
, reserved "py_module" >>
withPosition (T.Namespace "py" <$> identifier)
, reserved "perl_package" >>
withPosition (T.Namespace "perl" <$> identifier)
, reserved "ruby_namespace" >>
withPosition (T.Namespace "rb" <$> identifier)
, reserved "java_package" >>
withPosition (T.Namespace "java" <$> identifier)
, reserved "cocoa_package" >>
withPosition (T.Namespace "cocoa" <$> identifier)
, reserved "csharp_namespace" >>
withPosition (T.Namespace "csharp" <$> identifier)
]
where
star = symbolic '*' >> pure "*"
withPosition
:: (P.Stream s, P.Token s ~ Char)
=> Parser s (P.SourcePos -> a) -> Parser s a
withPosition p = P.getPosition >>= \pos -> p <*> pure pos
withDocstring
:: (P.Stream s, P.Token s ~ Char)
=> Parser s (T.Docstring -> P.SourcePos -> a) -> Parser s a
withDocstring p = lastDocstring >>= \s -> do
pos <- P.getPosition
p <*> pure s <*> pure pos
definition
:: (P.Stream s, P.Token s ~ Char) => Parser s (T.Definition P.SourcePos)
definition = whiteSpace >> P.choice
[ T.ConstDefinition <$> constant
, T.TypeDefinition <$> typeDefinition
, T.ServiceDefinition <$> service
]
typeDefinition
:: (P.Stream s, P.Token s ~ Char) => Parser s (T.Type P.SourcePos)
typeDefinition = P.choice
[ T.TypedefType <$> typedef
, T.EnumType <$> enum
, T.SenumType <$> senum
, T.StructType <$> struct
, T.UnionType <$> union
, T.ExceptionType <$> exception
]
typedef :: (P.Stream s, P.Token s ~ Char) => Parser s (T.Typedef P.SourcePos)
typedef = reserved "typedef" >> withDocstring
(T.Typedef <$> typeReference <*> identifier <*> typeAnnotations)
enum :: (P.Stream s, P.Token s ~ Char) => Parser s (T.Enum P.SourcePos)
enum = reserved "enum" >> withDocstring
( T.Enum
<$> identifier
<*> braces (many enumDef)
<*> typeAnnotations
)
struct :: (P.Stream s, P.Token s ~ Char) => Parser s (T.Struct P.SourcePos)
struct = reserved "struct" >> withDocstring
( T.Struct
<$> identifier
<*> braces (many field)
<*> typeAnnotations
)
union :: (P.Stream s, P.Token s ~ Char) => Parser s (T.Union P.SourcePos)
union = reserved "union" >> withDocstring
( T.Union
<$> identifier
<*> braces (many field)
<*> typeAnnotations
)
exception
:: (P.Stream s, P.Token s ~ Char) => Parser s (T.Exception P.SourcePos)
exception = reserved "exception" >> withDocstring
( T.Exception
<$> identifier
<*> braces (many field)
<*> typeAnnotations
)
fieldRequiredness
:: (P.Stream s, P.Token s ~ Char) => Parser s T.FieldRequiredness
fieldRequiredness = P.choice
[ reserved "required" *> pure T.Required
, reserved "optional" *> pure T.Optional
]
field :: (P.Stream s, P.Token s ~ Char) => Parser s (T.Field P.SourcePos)
field = withDocstring $
T.Field
<$> optional (integer <* colon)
<*> optional fieldRequiredness
<*> typeReference
<*> identifier
<*> optional (equals *> constantValue)
<*> typeAnnotations
<* optionalSep
enumDef :: (P.Stream s, P.Token s ~ Char) => Parser s (T.EnumDef P.SourcePos)
enumDef = withDocstring $
T.EnumDef
<$> identifier
<*> optional (equals *> PL.signed whiteSpace integer)
<*> typeAnnotations
<* optionalSep
senum :: (P.Stream s, P.Token s ~ Char) => Parser s (T.Senum P.SourcePos)
senum = reserved "senum" >> withDocstring
( T.Senum
<$> identifier
<*> braces (many (literal <* optionalSep))
<*> typeAnnotations
)
constant :: (P.Stream s, P.Token s ~ Char) => Parser s (T.Const P.SourcePos)
constant = do
reserved "const"
withDocstring $
T.Const
<$> typeReference
<*> (identifier <* equals)
<*> constantValue
<* optionalSep
constantValue
:: (P.Stream s, P.Token s ~ Char) => Parser s (T.ConstValue P.SourcePos)
constantValue = withPosition $ P.choice
[ P.try (P.string "0x") >> T.ConstInt <$> token PL.hexadecimal
, either T.ConstFloat T.ConstInt
<$> token signedNumber
, T.ConstLiteral <$> literal
, T.ConstIdentifier <$> identifier
, T.ConstList <$> constList
, T.ConstMap <$> constMap
]
where
signedNumber = floatingOrInteger <$> PL.signed whiteSpace PL.number
constList
:: (P.Stream s, P.Token s ~ Char) => Parser s [T.ConstValue P.SourcePos]
constList = symbolic '[' *> loop []
where
loop xs = P.choice
[ symbolic ']' *> return (reverse xs)
, (:) <$> (constantValue <* optionalSep)
<*> pure xs
>>= loop
]
constMap
:: (P.Stream s, P.Token s ~ Char)
=> Parser s [(T.ConstValue P.SourcePos, T.ConstValue P.SourcePos)]
constMap = symbolic '{' *> loop []
where
loop xs = P.choice [
symbolic '}' *> return (reverse xs)
, (:) <$> (constantValuePair <* optionalSep)
<*> pure xs
>>= loop
]
constantValuePair
:: (P.Stream s, P.Token s ~ Char)
=> Parser s (T.ConstValue P.SourcePos, T.ConstValue P.SourcePos)
constantValuePair =
(,) <$> (constantValue <* colon)
<*> (constantValue <* optionalSep)
typeReference
:: (P.Stream s, P.Token s ~ Char)
=> Parser s (T.TypeReference P.SourcePos)
typeReference = P.choice
[ baseType
, containerType
, withPosition (T.DefinedType <$> identifier)
]
baseType
:: (P.Stream s, P.Token s ~ Char)
=> Parser s (T.TypeReference P.SourcePos)
baseType = withPosition $
P.choice [reserved s *> (v <$> typeAnnotations) | (s, v) <- bases]
where
bases =
[ ("string", T.StringType)
, ("binary", T.BinaryType)
, ("slist", T.SListType)
, ("bool", T.BoolType)
, ("byte", T.ByteType)
, ("i8", T.ByteType)
, ("i16", T.I16Type)
, ("i32", T.I32Type)
, ("i64", T.I64Type)
, ("double", T.DoubleType)
]
containerType
:: (P.Stream s, P.Token s ~ Char)
=> Parser s (T.TypeReference P.SourcePos)
containerType = withPosition $
P.choice [mapType, setType, listType] <*> typeAnnotations
where
mapType = reserved "map" >>
angles (T.MapType <$> (typeReference <* comma) <*> typeReference)
setType = reserved "set" >> angles (T.SetType <$> typeReference)
listType = reserved "list" >> angles (T.ListType <$> typeReference)
service :: (P.Stream s, P.Token s ~ Char) => Parser s (T.Service P.SourcePos)
service = do
reserved "service"
withDocstring $
T.Service
<$> identifier
<*> optional (reserved "extends" *> identifier)
<*> braces (many function)
<*> typeAnnotations
function
:: (P.Stream s, P.Token s ~ Char) => Parser s (T.Function P.SourcePos)
function = withDocstring $
T.Function
<$> ((reserved "oneway" *> pure True) <|> pure False)
<*> ((reserved "void" *> pure Nothing) <|> Just <$> typeReference)
<*> identifier
<*> parens (many field)
<*> optional (reserved "throws" *> parens (many field))
<*> typeAnnotations
<* optionalSep
typeAnnotations
:: (P.Stream s, P.Token s ~ Char) => Parser s [T.TypeAnnotation]
typeAnnotations = parens (many typeAnnotation) <|> pure []
typeAnnotation :: (P.Stream s, P.Token s ~ Char) => Parser s T.TypeAnnotation
typeAnnotation =
T.TypeAnnotation
<$> identifier
<*> (optional (equals *> literal) <* optionalSep)
optionalSep :: (P.Stream s, P.Token s ~ Char) => Parser s ()
optionalSep = void $ optional (comma <|> semi)