-- Copyright 2016 Google Inc. All Rights Reserved. -- -- Use of this source code is governed by a BSD-style -- license that can be found in the LICENSE file or at -- https://developers.google.com/open-source/licenses/bsd -- | This module takes care of collecting all the definitions in a .proto file -- and assigning Haskell names to all of the defined things (messages, enums -- and field names). {-# LANGUAGE CPP #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE OverloadedStrings #-} module Data.ProtoLens.Compiler.Definitions ( Env , Definition(..) , MessageInfo(..) , ServiceInfo(..) , MethodInfo(..) , FieldInfo(..) , OneofInfo(..) , OneofCase(..) , FieldName(..) , Symbol , nameFromSymbol , promoteSymbol , EnumInfo(..) , EnumValueInfo(..) , qualifyEnv , unqualifyEnv , collectDefinitions , collectServices , definedFieldType , definedType , camelCase ) where import Data.Char (isUpper, toUpper) import Data.Int (Int32) import Data.List (mapAccumL) import qualified Data.Map as Map import Data.Maybe (fromMaybe) #if !MIN_VERSION_base(4,11,0) import Data.Monoid ((<>)) #endif import qualified Data.Semigroup as Semigroup import qualified Data.Set as Set import Data.String (IsString(..)) import Data.Text (Text, cons, splitOn, toLower, uncons, unpack) import qualified Data.Text as T import Lens.Family2 ((^.), (^..), toListOf) import Proto.Google.Protobuf.Descriptor ( DescriptorProto , EnumDescriptorProto , EnumValueDescriptorProto , FieldDescriptorProto , FileDescriptorProto , MethodDescriptorProto , ServiceDescriptorProto ) import Proto.Google.Protobuf.Descriptor_Fields ( clientStreaming , enumType , field , inputType , maybe'oneofIndex , messageType , method , name , nestedType , number , oneofDecl , outputType , package , serverStreaming , service , typeName , value ) import Data.ProtoLens.Compiler.Combinators ( Name , QName , ModuleName , Type , qual , tyPromotedString , unQual ) -- | 'Env' contains a mapping of proto names (as specified in the .proto file) -- to Haskell names. The keys are fully-qualified names, for example, -- ".package.Message.Submessage". (The protocol_compiler tool emits all -- message field types in this form, even if they refer to local definitions.) -- -- The type 'n' can be either a 'Name' (when talking about definitions within -- the current file) or a (qualified) 'QName' (when talking about definitions -- either from this or another file). type Env n = Map.Map Text (Definition n) data Definition n = Message (MessageInfo n) | Enum (EnumInfo n) deriving Functor -- | All the information needed to define or use a proto message type. data MessageInfo n = MessageInfo { messageName :: n -- ^ Haskell type name , messageDescriptor :: DescriptorProto , messageFields :: [FieldInfo] -- ^ Fields not belonging to a oneof. , messageOneofFields :: [OneofInfo] -- ^ The oneofs in this message, associated with the fields that -- belong to them. , messageUnknownFields :: Name -- ^ The name of the Haskell field in this message that holds the -- unknown fields. } deriving Functor data ServiceInfo = ServiceInfo { serviceName :: Text , servicePackage :: Text , serviceMethods :: [MethodInfo] } data MethodInfo = MethodInfo { methodName :: Text , methodIdent :: Text , methodInput :: Text , methodOutput :: Text , methodClientStreaming :: Bool , methodServerStreaming :: Bool } -- | Information about a single field of a proto message. data FieldInfo = FieldInfo { fieldDescriptor :: FieldDescriptorProto , plainFieldName :: FieldName } data OneofInfo = OneofInfo { oneofFieldName :: FieldName , oneofTypeName :: Name -- ^ The name of the sum type corresponding to this oneof. , oneofCases :: [OneofCase] -- ^ The individual fields that make up this oneof. } data OneofCase = OneofCase { caseField :: FieldInfo , caseConstructorName :: Name -- ^ The constructor for building a 'oneofTypeName' from the -- value in this field. , casePrismName :: Name -- ^ The name for building 'Prism' definition. } data FieldName = FieldName { overloadedName :: Symbol -- ^ The overloaded name of lenses that access this field. -- For example, if the field is called "foo_bar" in the .proto -- then @overloadedName == "fooBar"@ and we might generate -- @fooBar@ and/or @maybe'fooBar@ lenses to access the data. -- -- May be shared between two different message data types in the same -- module. , haskellRecordFieldName :: Name -- ^ The Haskell name of this internal record field; for example, -- "_Foo'Bar'baz. Unique within each module. } -- | A string that refers to the name (in Haskell) of a lens that accesses a -- field. -- -- For example, in the signature of the overloaded lens -- -- @ -- foo :: HasLens "foo" ... => Lens ... -- @ -- -- a 'Symbol' is used to construct both the type-level argument to -- @HasLens@ and the name of the function @foo@. newtype Symbol = Symbol String deriving (Eq, Ord, IsString, Semigroup.Semigroup, Monoid) nameFromSymbol :: Symbol -> Name nameFromSymbol (Symbol s) = fromString s -- | Construct a promoted, type-level string. promoteSymbol :: Symbol -> Type promoteSymbol (Symbol s) = tyPromotedString s -- | All the information needed to define or use a proto enum type. data EnumInfo n = EnumInfo { enumName :: n , enumUnrecognizedName :: n , enumUnrecognizedValueName :: n , enumDescriptor :: EnumDescriptorProto , enumValues :: [EnumValueInfo n] } deriving Functor -- | Information about a single value case of a proto enum. data EnumValueInfo n = EnumValueInfo { enumValueName :: n , enumValueDescriptor :: EnumValueDescriptorProto , enumAliasOf :: Maybe Name -- ^ If 'Nothing', we turn value into a normal constructor of the enum. -- If @'Just' n@, we're treating it as an alias of the constructor @n@ -- (a PatternSynonym in Haskell). This mirrors the behavior of the -- Java API. } deriving Functor mapEnv :: (n -> n') -> Env n -> Env n' mapEnv f = fmap $ fmap f -- Lift a set of local definitions into references to a specific module. qualifyEnv :: ModuleName -> Env Name -> Env QName qualifyEnv m = mapEnv (qual m) -- Lift a set of local definitions into references to the current module. unqualifyEnv :: Env Name -> Env QName unqualifyEnv = mapEnv unQual -- | Look up the Haskell name for the type of a given field (message or enum). definedFieldType :: FieldDescriptorProto -> Env QName -> Definition QName definedFieldType fd env = fromMaybe err $ Map.lookup (fd ^. typeName) env where err = error $ "definedFieldType: Field type " ++ unpack (fd ^. typeName) ++ " not found in environment." -- | Look up the Haskell name for the type of a given type. definedType :: Text -> Env QName -> Definition QName definedType ty = fromMaybe err . Map.lookup ty where err = error $ "definedType: Type " ++ unpack ty ++ " not found in environment." -- | Collect all the definitions in the given file (including definitions -- nested in other messages), and assign Haskell names to them. collectDefinitions :: FileDescriptorProto -> Env Name collectDefinitions fd = let protoPrefix = case fd ^. package of "" -> "." p -> "." <> p <> "." hsPrefix = "" in Map.fromList $ messageAndEnumDefs protoPrefix hsPrefix (fd ^. messageType) (fd ^. enumType) collectServices :: FileDescriptorProto -> [ServiceInfo] collectServices fd = fmap (toServiceInfo $ fd ^. package) $ fd ^. service where toServiceInfo :: Text -> ServiceDescriptorProto -> ServiceInfo toServiceInfo pkg sd = ServiceInfo { serviceName = sd ^. name , servicePackage = pkg , serviceMethods = fmap toMethodInfo $ sd ^. method } toMethodInfo :: MethodDescriptorProto -> MethodInfo toMethodInfo md = MethodInfo { methodName = md ^. name , methodIdent = camelCase $ md ^. name , methodInput = fromString . T.unpack $ md ^. inputType , methodOutput = fromString . T.unpack $ md ^. outputType , methodClientStreaming = md ^. clientStreaming , methodServerStreaming = md ^. serverStreaming } messageAndEnumDefs :: Text -> String -> [DescriptorProto] -> [EnumDescriptorProto] -> [(Text, Definition Name)] messageAndEnumDefs protoPrefix hsPrefix messages enums = concatMap (messageDefs protoPrefix hsPrefix) messages ++ map (enumDef protoPrefix hsPrefix) enums -- | Generate the definitions for a message and its nested types (if any). messageDefs :: Text -> String -> DescriptorProto -> [(Text, Definition Name)] messageDefs protoPrefix hsPrefix d = (protoName, thisDef) : messageAndEnumDefs (protoName <> ".") hsPrefix' (d ^. nestedType) (d ^. enumType) where protoName = protoPrefix <> d ^. name hsPrefix' = hsPrefix ++ hsName (d ^. name) ++ "'" allFields = groupFieldsByOneofIndex (d ^. field) thisDef = Message MessageInfo { messageName = fromString $ hsPrefix ++ hsName (d ^. name) , messageDescriptor = d , messageFields = map (fieldInfo hsPrefix') $ Map.findWithDefault [] Nothing allFields , messageOneofFields = collectOneofFields hsPrefix' d allFields , messageUnknownFields = fromString $ "_" ++ hsPrefix' ++ "_unknownFields" } fieldInfo :: String -> FieldDescriptorProto -> FieldInfo fieldInfo hsPrefix f = FieldInfo f $ mkFieldName hsPrefix $ f ^. name collectOneofFields :: String -> DescriptorProto -> Map.Map (Maybe Int32) [FieldDescriptorProto] -> [OneofInfo] collectOneofFields hsPrefix d allFields = zipWith oneofInfo [0..] $ d ^.. oneofDecl . traverse . name where oneofInfo idx n = OneofInfo { oneofFieldName = mkFieldName hsPrefix n , oneofTypeName = fromString $ hsPrefix ++ hsNameUnique subdefTypes n , oneofCases = map oneofCase $ Map.findWithDefault [] (Just idx) allFields } oneofCase f = let consName = hsPrefix ++ hsNameUnique subdefCons (f ^. name) in OneofCase { caseField = fieldInfo hsPrefix f , caseConstructorName = -- Note: oneof case constructors aren't prefixed -- by the oneof name; field names (even inside -- of a oneof) are unique within a message. fromString consName , casePrismName = fromString $ "_" ++ consName } -- Make a name that doesn't overlap with those already defined by submessages -- or subenums. hsNameUnique ns n | n' `elem` ns = n' ++ "'" | otherwise = n' where n' = hsName $ camelCase n -- The Haskell "type" namespace subdefTypes = Set.fromList $ map hsName $ toListOf (nestedType . traverse . name) d ++ toListOf (enumType . traverse . name) d -- The Haskell "expression" namespace (i.e., constructors) subdefCons = Set.fromList $ map hsName $ toListOf (nestedType . traverse . name) d ++ toListOf (enumType . traverse . value . traverse . name) d -- | Group fields by the index of the oneof field that they belong to. -- (Or 'Nothing' if they don't belong to a oneof.) groupFieldsByOneofIndex :: [FieldDescriptorProto] -> Map.Map (Maybe Int32) [FieldDescriptorProto] groupFieldsByOneofIndex = fmap reverse . Map.fromListWith (++) . fmap (\f -> (f ^. maybe'oneofIndex, [f])) hsName :: Text -> String hsName = unpack . capitalize mkFieldName :: String -> Text -> FieldName mkFieldName hsPrefix n = FieldName { overloadedName = fromString n' , haskellRecordFieldName = fromString $ "_" ++ hsPrefix ++ n' } where n' = fieldName n -- | Get the name in Haskell of a proto field, taking care of camel casing and -- clashes with language keywords. fieldName :: Text -> String fieldName = unpack . disambiguate . camelCase where disambiguate s | s `Set.member` reservedKeywords = s <> "'" | otherwise = s camelCase :: Text -> Text camelCase s = -- Preserve any initial underlines (e.g., "_foo_bar" -> "_fooBar"). let (underlines, rest) = T.span (== '_') s in case splitOn "_" rest of -- splitOn always returns a list with at least one element. [] -> error $ "camelCase: splitOn returned empty list: " ++ show rest [""] -> error $ "camelCase: name consists only of underscores: " ++ show s s':ss -> T.concat $ underlines : lowerInitialChars s' : map capitalize ss -- | Lower-case all initial upper-case characters. -- For example: "Foo" -> "foo", "FooBar" -> "fooBar", "FOObar" -> "foobar" lowerInitialChars :: Text -> Text lowerInitialChars s = toLower pre <> post where (pre, post) = T.span isUpper s -- | A list of reserved keywords that aren't valid as variable names. reservedKeywords :: Set.Set Text reservedKeywords = Set.fromList $ -- Haskell2010 keywords: -- https://www.haskell.org/onlinereport/haskell2010/haskellch2.html#x7-180002.4 -- We don't include keywords that are allowed to be variable names, -- in particular: "as", "forall", and "hiding". [ "case" , "class" , "data" , "default" , "deriving" , "do" , "else" , "foreign" , "if" , "import" , "in" , "infix" , "infixl" , "infixr" , "instance" , "let" , "module" , "newtype" , "of" , "then" , "type" , "where" ] ++ -- Nonstandard extensions [ "mdo" -- RecursiveDo , "rec" -- Arrows, RecursiveDo , "pattern" -- PatternSynonyms , "proc" -- Arrows ] -- | Generate the definition for an enum type. enumDef :: Text -> String -> EnumDescriptorProto -> (Text, Definition Name) enumDef protoPrefix hsPrefix d = let mkText n = protoPrefix <> n mkHsName n = fromString $ hsPrefix ++ unpack n in (mkText (d ^. name) , Enum EnumInfo { enumName = mkHsName (d ^. name) , enumUnrecognizedName = mkHsName (d ^. name <> "'Unrecognized") , enumUnrecognizedValueName = mkHsName (d ^. name <> "'UnrecognizedValue") , enumDescriptor = d , enumValues = collectEnumValues mkHsName $ d ^. value }) -- | Generate the definitions for each enum value. In particular, decide -- whether it's a true constructor or a PatternSynonym to another -- constructor. -- -- Like Java, we treat the first case of each numeric value as the "real" -- constructor, and subsequent cases as synonyms. collectEnumValues :: (Text -> Name) -> [EnumValueDescriptorProto] -> [EnumValueInfo Name] collectEnumValues mkHsName = snd . mapAccumL helper Map.empty where helper :: Map.Map Int32 Name -> EnumValueDescriptorProto -> (Map.Map Int32 Name, EnumValueInfo Name) helper seenNames v | Just n' <- Map.lookup k seenNames = (seenNames, mkValue (Just n')) | otherwise = (Map.insert k n seenNames, mkValue Nothing) where mkValue = EnumValueInfo n v n = mkHsName (v ^. name) k = v ^. number -- Haskell types must start with an uppercase letter, so we capitalize message -- and enum names. -- Name collisions will show up as build errors in the generated haskell file. capitalize :: Text -> Text capitalize s | Just (c, s') <- uncons s = cons (toUpper c) s' | otherwise = s