module Language.Thrift.Parser
( parseFromFile
, parse
, thriftIDL
, program
, header
, include
, namespace
, definition
, constant
, typeDefinition
, service
, typedef
, enum
, struct
, union
, exception
, senum
, typeReference
, constantValue
, docstring
, Parser
, runParser
, whiteSpace
) where
import Control.Applicative
import Control.Monad
import Control.Monad.Trans.State (StateT)
import Data.Text (Text)
import qualified Control.Monad.Trans.State as State
import qualified Data.Text as Text
import qualified Text.Megaparsec as P
import qualified Text.Megaparsec.Lexer as PL
import Language.Thrift.Internal.Reserved (isReserved)
import qualified Language.Thrift.Types as T
data State = State
{ stateDocstring :: T.Docstring
}
deriving (Show, Eq)
type Parser s = StateT State (P.Parsec s)
runParser :: Parser s a -> P.Parsec s a
runParser p = State.evalStateT p (State Nothing)
parseFromFile :: FilePath -> IO (Either P.ParseError (T.Program P.SourcePos))
parseFromFile =
P.parseFromFile (thriftIDL :: P.Parsec Text (T.Program P.SourcePos))
parse
:: P.Stream s Char
=> FilePath -> s -> Either P.ParseError (T.Program P.SourcePos)
parse = P.parse thriftIDL
thriftIDL :: P.Stream s Char => P.Parsec s (T.Program P.SourcePos)
thriftIDL = runParser program
clearDocstring :: Parser s ()
clearDocstring = State.modify' (\s -> s { stateDocstring = Nothing })
lastDocstring :: Parser s T.Docstring
lastDocstring = do
s <- State.gets stateDocstring
clearDocstring
return s
whiteSpace :: P.Stream s Char => Parser s ()
whiteSpace = someSpace <|> pure ()
someSpace :: P.Stream s Char => Parser s ()
someSpace = P.skipSome $ readDocstring <|> skipComments <|> skipSpace
where
readDocstring = do
s <- docstring
unless (Text.null s) $
State.modify' (\st -> st { stateDocstring = Just s})
skipSpace = P.choice
[ P.newline *> clearDocstring
, P.skipSome P.spaceChar
]
skipComments = P.choice
[ P.char '#' *> skipLine
, P.try (P.string "//") *> skipLine
, P.try (P.string "/*") *> skipCStyleComment
] *> clearDocstring
skipLine = void P.eol <|> P.eof <|> (P.anyChar *> skipLine)
skipCStyleComment = P.choice
[ P.try (P.string "*/") *> pure ()
, P.skipSome (P.noneOf "/*") *> skipCStyleComment
, P.oneOf "/*" *> skipCStyleComment
]
skipUpTo :: P.Stream s Char => Parser s a -> Int -> Parser s ()
skipUpTo p = loop
where
loop 0 = return ()
loop n =
( do
void $ P.try p
loop $! n 1
) <|> return ()
hspace :: P.Stream s Char => Parser s ()
hspace = void $ P.oneOf " \t"
docstring :: P.Stream s Char => Parser s Text
docstring = do
P.try (P.string "/**") >> P.skipMany hspace
indent <- P.sourceColumn <$> P.getPosition
isNewLine <- maybeEOL
chunks <- loop isNewLine (indent 1) []
return $! Text.intercalate "\n" chunks
where
maybeEOL = (P.eol >> return True) <|> return False
commentChar =
P.noneOf "*\r\n" <|>
P.try (P.char '*' <* P.notFollowedBy (P.char '/'))
loop shouldDedent maxDedent chunks = do
when shouldDedent $
hspace `skipUpTo` maxDedent
finishComment <|> readDocLine
where
finishComment = do
P.try (P.skipMany hspace <* P.string "*/")
void $ optional P.spaceChar
return $! reverse chunks
readDocLine = do
when shouldDedent . void $
optional $ P.try (P.char '*' >> optional hspace)
line <- Text.pack <$> P.many commentChar
void (optional hspace >> maybeEOL)
loop True maxDedent (line:chunks)
symbolic :: P.Stream s Char => Char -> Parser s ()
symbolic c = void $ PL.symbol whiteSpace [c]
token :: P.Stream s Char => Parser s a -> Parser s a
token = PL.lexeme whiteSpace
braces, angles, parens :: P.Stream s Char => Parser s a -> Parser s a
braces = P.between (symbolic '{') (symbolic '}')
angles = P.between (symbolic '<') (symbolic '>')
parens = P.between (symbolic '(') (symbolic ')')
comma, semi, colon, equals :: P.Stream s Char => Parser s ()
comma = symbolic ','
semi = symbolic ';'
colon = symbolic ':'
equals = symbolic '='
errorUnlessReserved :: Monad m => String -> m ()
errorUnlessReserved name =
unless (isReserved name) $
error ("reserved called with unreserved identifier " ++ show name)
reserved :: P.Stream s Char => String -> Parser s ()
reserved name =
errorUnlessReserved name >>
P.label name $ token $ P.try $ do
void (P.string name)
P.notFollowedBy (P.alphaNumChar <|> P.oneOf "_.")
literal :: P.Stream s Char => Parser s Text
literal = P.label "string literal" $ token $
stringLiteral '"' <|> stringLiteral '\''
stringLiteral :: P.Stream s Char => Char -> Parser s Text
stringLiteral q = fmap Text.pack $
P.char q >> P.manyTill PL.charLiteral (P.char q)
integer :: P.Stream s Char => Parser s Integer
integer = token PL.integer
identifier :: P.Stream s Char => Parser s Text
identifier = P.label "identifier" $ token $ do
name <- (:)
<$> (P.letterChar <|> P.char '_')
<*> many (P.alphaNumChar <|> P.oneOf "_.")
when (isReserved name) $
P.unexpected name
return (Text.pack name)
program :: P.Stream s Char => Parser s (T.Program P.SourcePos)
program = whiteSpace >>
T.Program
<$> many (header <* optionalSep)
<*> many (definition <* optionalSep)
<* P.eof
header :: P.Stream s Char => Parser s (T.Header P.SourcePos)
header = P.choice
[ T.HeaderInclude <$> include
, T.HeaderNamespace <$> namespace
]
include :: P.Stream s Char => Parser s (T.Include P.SourcePos)
include = reserved "include" >> withPosition (T.Include <$> literal)
namespace :: P.Stream s Char => Parser s (T.Namespace P.SourcePos)
namespace = P.choice
[ reserved "namespace" >>
withPosition (T.Namespace <$> (star <|> identifier) <*> identifier)
, reserved "cpp_namespace" >>
withPosition (T.Namespace "cpp" <$> identifier)
, reserved "php_namespace" >>
withPosition (T.Namespace "php" <$> identifier)
, reserved "py_module" >>
withPosition (T.Namespace "py" <$> identifier)
, reserved "perl_package" >>
withPosition (T.Namespace "perl" <$> identifier)
, reserved "ruby_namespace" >>
withPosition (T.Namespace "rb" <$> identifier)
, reserved "java_package" >>
withPosition (T.Namespace "java" <$> identifier)
, reserved "cocoa_package" >>
withPosition (T.Namespace "cocoa" <$> identifier)
, reserved "csharp_namespace" >>
withPosition (T.Namespace "csharp" <$> identifier)
]
where
star = symbolic '*' >> pure "*"
withPosition :: P.Stream s Char => Parser s (P.SourcePos -> a) -> Parser s a
withPosition p = P.getPosition >>= \pos -> p <*> pure pos
withDocstring
:: P.Stream s Char
=> Parser s (T.Docstring -> P.SourcePos -> a) -> Parser s a
withDocstring p = lastDocstring >>= \s -> do
pos <- P.getPosition
p <*> pure s <*> pure pos
definition :: P.Stream s Char => Parser s (T.Definition P.SourcePos)
definition = whiteSpace >> P.choice
[ T.ConstDefinition <$> constant
, T.TypeDefinition <$> typeDefinition
, T.ServiceDefinition <$> service
]
typeDefinition :: P.Stream s Char => Parser s (T.Type P.SourcePos)
typeDefinition = P.choice
[ T.TypedefType <$> typedef
, T.EnumType <$> enum
, T.SenumType <$> senum
, T.StructType <$> struct
, T.UnionType <$> union
, T.ExceptionType <$> exception
]
typedef :: P.Stream s Char => Parser s (T.Typedef P.SourcePos)
typedef = reserved "typedef" >> withDocstring
(T.Typedef <$> typeReference <*> identifier <*> typeAnnotations)
enum :: P.Stream s Char => Parser s (T.Enum P.SourcePos)
enum = reserved "enum" >> withDocstring
( T.Enum
<$> identifier
<*> braces (many enumDef)
<*> typeAnnotations
)
struct :: P.Stream s Char => Parser s (T.Struct P.SourcePos)
struct = reserved "struct" >> withDocstring
( T.Struct
<$> identifier
<*> braces (many field)
<*> typeAnnotations
)
union :: P.Stream s Char => Parser s (T.Union P.SourcePos)
union = reserved "union" >> withDocstring
( T.Union
<$> identifier
<*> braces (many field)
<*> typeAnnotations
)
exception :: P.Stream s Char => Parser s (T.Exception P.SourcePos)
exception = reserved "exception" >> withDocstring
( T.Exception
<$> identifier
<*> braces (many field)
<*> typeAnnotations
)
fieldRequiredness :: P.Stream s Char => Parser s T.FieldRequiredness
fieldRequiredness = P.choice
[ reserved "required" *> pure T.Required
, reserved "optional" *> pure T.Optional
]
field :: P.Stream s Char => Parser s (T.Field P.SourcePos)
field = withDocstring $
T.Field
<$> optional (integer <* colon)
<*> optional fieldRequiredness
<*> typeReference
<*> identifier
<*> optional (equals *> constantValue)
<*> typeAnnotations
<* optionalSep
enumDef :: P.Stream s Char => Parser s (T.EnumDef P.SourcePos)
enumDef = withDocstring $
T.EnumDef
<$> identifier
<*> optional (equals *> PL.signed whiteSpace integer)
<*> typeAnnotations
<* optionalSep
senum :: P.Stream s Char => Parser s (T.Senum P.SourcePos)
senum = reserved "senum" >> withDocstring
( T.Senum
<$> identifier
<*> braces (many (literal <* optionalSep))
<*> typeAnnotations
)
constant :: P.Stream s Char => Parser s (T.Const P.SourcePos)
constant = do
reserved "const"
withDocstring $
T.Const
<$> typeReference
<*> (identifier <* equals)
<*> constantValue
<* optionalSep
constantValue :: P.Stream s Char => Parser s (T.ConstValue P.SourcePos)
constantValue = withPosition $ P.choice
[ P.try (P.string "0x") >> T.ConstInt <$> token PL.hexadecimal
, either T.ConstInt T.ConstFloat
<$> token (PL.signed whiteSpace PL.number)
, T.ConstLiteral <$> literal
, T.ConstIdentifier <$> identifier
, T.ConstList <$> constList
, T.ConstMap <$> constMap
]
constList :: P.Stream s Char => Parser s [T.ConstValue P.SourcePos]
constList = symbolic '[' *> loop []
where
loop xs = P.choice
[ symbolic ']' *> return (reverse xs)
, (:) <$> (constantValue <* optionalSep)
<*> pure xs
>>= loop
]
constMap
:: P.Stream s Char
=> Parser s [(T.ConstValue P.SourcePos, T.ConstValue P.SourcePos)]
constMap = symbolic '{' *> loop []
where
loop xs = P.choice [
symbolic '}' *> return (reverse xs)
, (:) <$> (constantValuePair <* optionalSep)
<*> pure xs
>>= loop
]
constantValuePair
:: P.Stream s Char
=> Parser s (T.ConstValue P.SourcePos, T.ConstValue P.SourcePos)
constantValuePair =
(,) <$> (constantValue <* colon)
<*> (constantValue <* optionalSep)
typeReference :: P.Stream s Char => Parser s (T.TypeReference P.SourcePos)
typeReference = P.choice
[ baseType
, containerType
, withPosition (T.DefinedType <$> identifier)
]
baseType
:: P.Stream s Char => Parser s (T.TypeReference P.SourcePos)
baseType = withPosition $
P.choice [reserved s *> (v <$> typeAnnotations) | (s, v) <- bases]
where
bases =
[ ("string", T.StringType)
, ("binary", T.BinaryType)
, ("slist", T.SListType)
, ("bool", T.BoolType)
, ("byte", T.ByteType)
, ("i8", T.ByteType)
, ("i16", T.I16Type)
, ("i32", T.I32Type)
, ("i64", T.I64Type)
, ("double", T.DoubleType)
]
containerType
:: P.Stream s Char => Parser s (T.TypeReference P.SourcePos)
containerType = withPosition $
P.choice [mapType, setType, listType] <*> typeAnnotations
where
mapType = reserved "map" >>
angles (T.MapType <$> (typeReference <* comma) <*> typeReference)
setType = reserved "set" >> angles (T.SetType <$> typeReference)
listType = reserved "list" >> angles (T.ListType <$> typeReference)
service :: P.Stream s Char => Parser s (T.Service P.SourcePos)
service = do
reserved "service"
withDocstring $
T.Service
<$> identifier
<*> optional (reserved "extends" *> identifier)
<*> braces (many function)
<*> typeAnnotations
function :: P.Stream s Char => Parser s (T.Function P.SourcePos)
function = withDocstring $
T.Function
<$> ((reserved "oneway" *> pure True) <|> pure False)
<*> ((reserved "void" *> pure Nothing) <|> Just <$> typeReference)
<*> identifier
<*> parens (many field)
<*> optional (reserved "throws" *> parens (many field))
<*> typeAnnotations
<* optionalSep
typeAnnotations :: P.Stream s Char => Parser s [T.TypeAnnotation]
typeAnnotations = parens (many typeAnnotation) <|> pure []
typeAnnotation :: P.Stream s Char => Parser s T.TypeAnnotation
typeAnnotation =
T.TypeAnnotation
<$> identifier
<*> (optional (equals *> literal) <* optionalSep)
optionalSep :: P.Stream s Char => Parser s ()
optionalSep = void $ optional (comma <|> semi)