-- Copyright 2016 Google Inc. All Rights Reserved. -- -- Use of this source code is governed by a BSD-style -- license that can be found in the LICENSE file or at -- https://developers.google.com/open-source/licenses/bsd -- | Functions for converting protocol buffers to a human-readable text format. {-# LANGUAGE CPP #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} module Data.ProtoLens.TextFormat( showMessage, showMessageWithRegistry, showMessageShort, pprintMessage, pprintMessageWithRegistry, readMessage, readMessageWithRegistry, readMessageOrDie, ) where import Lens.Family2 ((&),(^.),(.~), set, over, view) import Control.Arrow (left) import Data.Bifunctor (first) import qualified Data.ByteString import Data.Char (isPrint, isAscii, chr) import Data.Foldable (foldlM, foldl') import qualified Data.Map as Map import Data.Maybe (catMaybes) import Data.Proxy (Proxy(Proxy)) import qualified Data.Set as Set import qualified Data.Text.Encoding as Text import qualified Data.Text.Lazy as Lazy import qualified Data.Text as Text (unpack) import Numeric (showOct) import Text.Parsec (parse) import Text.PrettyPrint #if MIN_VERSION_base(4,11,0) import Prelude hiding ((<>)) #endif import Data.ProtoLens.Encoding (decodeMessage, encodeMessage) import Data.ProtoLens.Encoding.Bytes (runParser) import Data.ProtoLens.Encoding.Wire import Data.ProtoLens.Message hiding (buildMessage, parseMessage) import qualified Data.ProtoLens.TextFormat.Parser as Parser -- TODO: This code is newer and missing some edge cases, -- including: -- - Serialize directly to Text -- - String/bytestring serialization -- - Strings delimited by single quotes -- - Concatenate multiple strings one after another -- - control characters and non-UTF8 text -- - characters in bytes fields should fit in Word8 -- - More output formats for floats like exponentials -- - Print/parse enums by textual name in addition to integer value -- - More compact printing/parsing for packed fields -- - Decide what to do for values that don't fit in the field (e.g., overflow) -- - Add more tests for: -- - edge cases of deserialization ("deserializeFrom") -- | Pretty-print the given message into a human-readable form. pprintMessage :: Message msg => msg -> Doc pprintMessage = pprintMessageWithRegistry mempty -- | Pretty-print the given message into human-readable form, using the given -- 'Registry' to decode @google.protobuf.Any@ values. pprintMessageWithRegistry :: Message msg => Registry -> msg -> Doc pprintMessageWithRegistry reg msg -- Either put all fields together on a single line, or use a separate line -- for each field. We use a single "sep" for all fields (and all elements -- of all the repeated fields) to avoid putting some repeated fields on one -- line and other fields on multiple lines, which is less readable. = sep $ concatMap (pprintField reg msg) allFields ++ map pprintTaggedValue (msg ^. unknownFields) -- | Convert the given message into a human-readable 'String'. showMessage :: Message msg => msg -> String showMessage = render . pprintMessage -- | Convert the given message into a human-readable 'String', using the -- 'Registry' to encode @google.protobuf.Any@ values. showMessageWithRegistry :: Message msg => Registry -> msg -> String showMessageWithRegistry reg = render . pprintMessageWithRegistry reg -- | Serializes a proto as a string on a single line. Useful for debugging -- and error messages like @.DebugString()@ in other languages. showMessageShort :: Message msg => msg -> String showMessageShort = renderStyle (Style OneLineMode maxBound 1.5) . pprintMessage pprintField :: Registry -> msg -> FieldDescriptor msg -> [Doc] pprintField reg msg (FieldDescriptor name typeDescr accessor) = map (pprintFieldValue reg name typeDescr) $ case accessor of PlainField d f | Optional <- d, val == fieldDefault -> [] | otherwise -> [val] where val = msg ^. f OptionalField f -> catMaybes [msg ^. f] -- TODO: better printing for packed fields RepeatedField _ f -> msg ^. f MapField k v f -> pairToMsg <$> Map.assocs (msg ^. f) where pairToMsg (x,y) = defMessage & k .~ x & v .~ y pprintFieldValue :: Registry -> String -> FieldTypeDescriptor value -> value -> Doc pprintFieldValue reg name field@(MessageField MessageType) m | Just AnyMessageDescriptor { anyTypeUrlLens, anyValueLens } <- matchAnyMessage field, typeUri <- view anyTypeUrlLens m, fieldData <- view anyValueLens m, Just (SomeMessageType (Proxy :: Proxy value')) <- lookupRegistered typeUri reg, Right (anyValue :: value') <- decodeMessage fieldData = pprintSubmessage name $ sep [ lbrack <> text (Text.unpack typeUri) <> rbrack <+> lbrace , nest 2 (pprintMessageWithRegistry reg anyValue) , rbrace ] | otherwise = pprintSubmessage name (pprintMessageWithRegistry reg m) pprintFieldValue reg name (MessageField GroupType) m = pprintSubmessage name (pprintMessageWithRegistry reg m) pprintFieldValue _ name (ScalarField f) x = named name $ pprintScalarValue f x named :: String -> Doc -> Doc named n x = text n <> colon <+> x pprintScalarValue :: ScalarField value -> value -> Doc pprintScalarValue EnumField x = text (showEnum x) pprintScalarValue Int32Field x = primField x pprintScalarValue Int64Field x = primField x pprintScalarValue UInt32Field x = primField x pprintScalarValue UInt64Field x = primField x pprintScalarValue SInt32Field x = primField x pprintScalarValue SInt64Field x = primField x pprintScalarValue Fixed32Field x = primField x pprintScalarValue Fixed64Field x = primField x pprintScalarValue SFixed32Field x = primField x pprintScalarValue SFixed64Field x = primField x pprintScalarValue FloatField x = primField x pprintScalarValue DoubleField x = primField x pprintScalarValue BoolField x = boolValue x pprintScalarValue StringField x = pprintByteString (Text.encodeUtf8 x) pprintScalarValue BytesField x = pprintByteString x pprintSubmessage :: String -> Doc -> Doc pprintSubmessage name contents = sep [text name <+> lbrace, nest 2 contents, rbrace] -- | Formats a string in a way that mostly matches the C-compatible escaping -- used by the Protocol Buffer distribution. We depart a bit by escaping all -- non-ASCII characters, which depending on the locale, the distribution might -- not do. -- -- This uses three-digit octal escapes, e.g. "\011" plus \n, \r,, \t, \', \", -- and \\ only. Note that Haskell string-literal syntax calls for "\011" to be -- interpreted as decimal 11, rather than the decimal 9 it actually represent, -- so you can't use Prelude.read to parse the strings created here. pprintByteString :: Data.ByteString.ByteString -> Doc pprintByteString x = char '\"' <> text (concatMap escape $ Data.ByteString.unpack x) <> char '\"' where escape w8 | ch == '\n' = "\\n" | ch == '\r' = "\\r" | ch == '\t' = "\\t" | ch == '\"' = "\\\"" | ch == '\'' = "\\\'" | ch == '\\' = "\\\\" | isPrint ch && isAscii ch = ch : "" | otherwise = "\\" ++ pad (showOct w8 "") where ch = chr $ fromIntegral w8 pad str = replicate (3 - length str) '0' ++ str primField :: Show value => value -> Doc primField x = text (show x) boolValue :: Bool -> Doc boolValue True = text "true" boolValue False = text "false" pprintTaggedValue :: TaggedValue -> Doc pprintTaggedValue (TaggedValue t wv) = case wv of VarInt x -> named name $ primField x Fixed64 x -> named name $ primField x Fixed32 x -> named name $ primField x Lengthy x -> case runParser parseFieldSet x of Left _ -> named name $ pprintByteString x Right ts -> pprintSubmessage name $ sep $ map pprintTaggedValue ts -- TODO: implement better printing for unknown groups StartGroup -> named name $ text "start_group" EndGroup -> named name $ text "end_group" where name = show (unTag t) -------------------------------------------------------------------------------- -- Parsing -- | Parse a 'Message' from the human-readable protocol buffer text format. readMessage :: Message msg => Lazy.Text -> Either String msg readMessage = readMessageWithRegistry mempty -- | Parse a 'Message' from the human-readable protocol buffer text format. -- Throws an error if the parse was not successful. readMessageOrDie :: Message msg => Lazy.Text -> msg readMessageOrDie str = case readMessage str of Left e -> error $ "readMessageOrDie: " ++ e Right x -> x -- | Parse a 'Message' from a human-readable protocol buffer text format, using -- the given 'Registry' to decode 'Any' fields readMessageWithRegistry :: Message msg => Registry -> Lazy.Text -> Either String msg readMessageWithRegistry reg str = left show (parse Parser.parser "" str) >>= buildMessage reg buildMessage :: forall msg . Message msg => Registry -> Parser.Message -> Either String msg buildMessage reg fields | missing <- missingFields (Proxy @msg) fields, not $ null missing = Left $ "Missing fields " ++ show missing | otherwise = reverseRepeatedFields fieldsByTag <$> buildMessageFromDescriptor reg defMessage fields missingFields :: forall msg . Message msg => Proxy msg -> Parser.Message -> [String] missingFields _ = Set.toList . foldl' deleteField requiredFieldNames where requiredFieldNames :: Set.Set String requiredFieldNames = Set.fromList $ Map.keys $ Map.filter isRequired $ fieldsByTextFormatName @msg deleteField :: Set.Set String -> Parser.Field -> Set.Set String deleteField fs (Parser.Field (Parser.Key name) _) = Set.delete name fs deleteField fs (Parser.Field (Parser.UnknownKey n) _) | Just d <- Map.lookup (Tag (fromIntegral n)) (fieldsByTag @msg) = Set.delete (fieldDescriptorName d) fs deleteField fs _ = fs buildMessageFromDescriptor :: Message msg => Registry -> msg -> Parser.Message -> Either String msg buildMessageFromDescriptor reg = foldlM (addField reg) addField :: forall msg . Message msg => Registry -> msg -> Parser.Field -> Either String msg addField reg msg (Parser.Field key rawValue) = do FieldDescriptor name typeDescriptor accessor <- getFieldDescriptor value <- makeValue name reg typeDescriptor rawValue return $ modifyField accessor value msg where getFieldDescriptor | Parser.Key name <- key, Just f <- Map.lookup name fieldsByTextFormatName = return f | Parser.UnknownKey tag <- key, Just f <- Map.lookup (fromIntegral tag) fieldsByTag = return f | otherwise = Left $ "Unrecognized field " ++ show key modifyField :: FieldAccessor msg value -> value -> msg -> msg modifyField (PlainField _ f) value = set f value modifyField (OptionalField f) value = set f (Just value) modifyField (RepeatedField _ f) value = over f (value :) modifyField (MapField key value f) mapElem = over f (Map.insert (mapElem ^. key) (mapElem ^. value)) makeValue :: forall value . String -- ^ name of field -> Registry -> FieldTypeDescriptor value -> Parser.Value -> Either String value makeValue name _ (ScalarField f) v = first (("Error parsing field " ++ show name ++ ": ") ++) $ makeScalarValue f v makeValue name reg field@(MessageField MessageType) (Parser.MessageValue (Just typeUri) x) | Just AnyMessageDescriptor { anyTypeUrlLens, anyValueLens } <- matchAnyMessage field = case lookupRegistered typeUri reg of Nothing -> Left $ "Could not decode google.protobuf.Any for field " ++ show name ++ ": unregistered type URI " ++ show typeUri Just (SomeMessageType (Proxy :: Proxy value')) -> case buildMessage reg x :: Either String value' of Left err -> Left err Right value' -> Right (defMessage & anyTypeUrlLens .~ typeUri & anyValueLens .~ encodeMessage value') | otherwise = Left ("Type mismatch parsing explicitly typed message. Expected " ++ show (messageName (Proxy @value)) ++ ", got " ++ show typeUri) makeValue _ reg (MessageField _) (Parser.MessageValue _ x) = buildMessage reg x makeValue name _ (MessageField _) val = Left $ "Type mismatch for field " ++ show name ++ ": expected message, found " ++ show val makeScalarValue :: ScalarField value -> Parser.Value -> Either String value makeScalarValue Int32Field (Parser.IntValue x) = Right (fromInteger x) makeScalarValue Int64Field (Parser.IntValue x) = Right (fromInteger x) makeScalarValue UInt32Field (Parser.IntValue x) = Right (fromInteger x) makeScalarValue UInt64Field (Parser.IntValue x) = Right (fromInteger x) makeScalarValue SInt32Field (Parser.IntValue x) = Right (fromInteger x) makeScalarValue SInt64Field (Parser.IntValue x) = Right (fromInteger x) makeScalarValue Fixed32Field (Parser.IntValue x) = Right (fromInteger x) makeScalarValue Fixed64Field (Parser.IntValue x) = Right (fromInteger x) makeScalarValue SFixed32Field (Parser.IntValue x) = Right (fromInteger x) makeScalarValue SFixed64Field (Parser.IntValue x) = Right (fromInteger x) makeScalarValue FloatField (Parser.IntValue x) = Right (fromInteger x) makeScalarValue DoubleField (Parser.IntValue x) = Right (fromInteger x) makeScalarValue BoolField (Parser.IntValue x) | x == 0 = Right False | x == 1 = Right True | otherwise = Left $ "Unrecognized bool value " ++ show x makeScalarValue DoubleField (Parser.DoubleValue x) = Right x makeScalarValue FloatField (Parser.DoubleValue x) = Right (realToFrac x) makeScalarValue BoolField (Parser.EnumValue x) | x `elem` ["true", "True", "t"] = Right True | x `elem` ["false", "False", "f"] = Right False | otherwise = Left $ "Unrecognized bool value " ++ show x makeScalarValue StringField (Parser.ByteStringValue x) = Right (Text.decodeUtf8 x) makeScalarValue BytesField (Parser.ByteStringValue x) = Right x makeScalarValue EnumField (Parser.IntValue x) = maybe (Left $ "Unrecognized enum value " ++ show x) Right (maybeToEnum $ fromInteger x) makeScalarValue EnumField (Parser.EnumValue x) = maybe (Left $ "Unrecognized enum value " ++ show x) Right (readEnum x) makeScalarValue f val = Left $ "Type mismatch: " ++ show (f, val)