-- To get the defaults working sanely, I need to encode at least some reflection information. -- The to-be-bootstrapped descriptor.proto structures are not parsed enough for sane default usage. -- So this is currently over-designed for the immediate need of DescriptorInfo -> number -> FieldInfo -> Maybe HsDefault. -- These data structures and API are quite likely to be rewritten. -- -- A strong feature of this is that it does not contain any structures defined by descriptor.proto! -- This prevents me hitting any circular dependencies. -- -- module Text.ProtocolBuffers.Reflections(ProtoName(..),DescriptorInfo(..),FieldInfo(..) ,HsDefault(..),EnumInfo(..),EnumInfoApp ,ReflectDescriptor(..),ReflectEnum(..),GetMessageInfo(..) ,parseDefDouble,parseDefFloat ,parseDefBool,parseDefInteger ,parseDefString,parseDefBytes ,cEncode,cDecode ) where import Text.ProtocolBuffers.Basic import qualified Data.ByteString.Lazy.UTF8 as U import qualified Data.ByteString.Lazy as BS (null,pack,unpack) import qualified Data.ByteString.Lazy.Char8 as BSC(pack,unpack) import Numeric(readHex,readOct,readDec,showOct) import Data.Char(ord,chr,isHexDigit,isOctDigit,toLower) import Data.List(sort,unfoldr) import qualified Data.Foldable as F(toList) import Data.Bits(Bits((.|.),shiftL)) import Data.Word(Word8) import Data.Set(Set) import qualified Data.Set as Set import Data.Generics(Data) import Data.Typeable(Typeable) import Test.QuickCheck(quickCheck) import Codec.Binary.UTF8.String(encode) data ProtoName = ProtoName { haskellPrefix :: String -- Haskell specific prefix to module hierarchy (e.g. Text) , parentModule :: String -- Proto specified namespace (like java) , baseName :: String -- unqualfied name of this thing } deriving (Show,Read,Eq,Ord,Data,Typeable) data DescriptorInfo = DescriptorInfo { descName :: ProtoName , fields :: Seq FieldInfo } deriving (Show,Read,Eq,Ord,Data,Typeable) data GetMessageInfo = GetMessageInfo { requiredTags :: Set WireTag , allowedTags :: Set WireTag } deriving (Show,Read,Eq,Ord,Data,Typeable) data FieldInfo = FieldInfo { fieldName :: String , fieldNumber :: FieldId , wireTag :: WireTag , wireTagLength :: Int64 -- ^ Bytes required in the Varint formatted wireTag , isRequired :: Bool , canRepeat :: Bool , typeCode :: FieldType -- ^ fromEnum of Text.DescriptorProtos.FieldDescriptorProto.Type , typeName :: Maybe String , hsRawDefault :: Maybe ByteString -- ^ crappy, perhaps escaped, thing , hsDefault :: Maybe HsDefault -- ^ nice parsed thing } deriving (Show,Read,Eq,Ord,Data,Typeable) -- | 'HsDefault' stores the parsed default from the proto file in a -- form that will make a nice literal in the -- Language.Haskell.Exts.Syntax sense. data HsDefault = HsDef'Bool Bool | HsDef'ByteString ByteString | HsDef'Rational Rational | HsDef'Integer Integer deriving (Show,Read,Eq,Ord,Data,Typeable) data EnumInfo = EnumInfo { enumName :: ProtoName , enumItems :: [(EnumCode,String)] } deriving (Show,Read,Eq,Ord,Data,Typeable) type EnumInfoApp e = [(EnumCode,String,e)] class ReflectDescriptor m where getMessageInfo :: m -> GetMessageInfo -- Must not inspect argument getMessageInfo x = cached where cached = makeMessageInfo (reflectDescriptorInfo (undefined `asTypeOf` x)) reflectDescriptorInfo :: m -> DescriptorInfo -- Must not inspect argument parentOfDescriptor :: m -> Maybe DescriptorInfo -- Must not inspect argument parentOfDescriptor _ = Nothing class ReflectEnum e where reflectEnum :: EnumInfoApp e reflectEnumInfo :: e -> EnumInfo -- Must not inspect argument parentOfEnum :: e -> Maybe DescriptorInfo -- Must not inspect argument parentOfEnum _ = Nothing makeMessageInfo :: DescriptorInfo -> GetMessageInfo makeMessageInfo di = GetMessageInfo { requiredTags = Set.fromDistinctAscList . sort $ [ wireTag f | f <- F.toList (fields di), isRequired f] , allowedTags = Set.fromDistinctAscList . sort $ [ wireTag f | f <- F.toList (fields di)] } --- From here down is code used to parse the format of the default values in the .proto files {-# INLINE mayRead #-} mayRead :: ReadS a -> String -> Maybe a mayRead f s = case f s of [(a,"")] -> Just a; _ -> Nothing parseDefDouble :: ByteString -> Maybe HsDefault parseDefDouble bs = fmap (HsDef'Rational . toRational) . mayRead reads' . U.toString $ bs where reads' :: ReadS Double reads' = readSigned' reads parseDefFloat :: ByteString -> Maybe HsDefault parseDefFloat bs = fmap (HsDef'Rational . toRational) . mayRead reads' . U.toString $ bs where reads' :: ReadS Float reads' = readSigned' reads parseDefString :: ByteString -> Maybe HsDefault parseDefString bs = Just (HsDef'ByteString bs) parseDefBytes :: ByteString -> Maybe HsDefault parseDefBytes bs = Just . HsDef'ByteString . BS.pack . cDecode . BSC.unpack $ bs parseDefInteger :: ByteString -> Maybe HsDefault parseDefInteger bs = fmap HsDef'Integer . mayRead checkSign . U.toString $ bs where checkSign = readSigned' checkBase checkBase ('0':'x':xs) = readHex xs checkBase ('0':xs) = readOct xs checkBase xs = readDec xs parseDefBool :: ByteString -> Maybe HsDefault parseDefBool bs | bs == BSC.pack "true" = Just (HsDef'Bool True) | bs == BSC.pack "false" = Just (HsDef'Bool False) | otherwise = Nothing -- The Numeric.readSigned does not handle '+' for some odd reason readSigned' f ('-':xs) = map (\(v,s) -> (-v,s)) . f $ xs readSigned' f ('+':xs) = f xs readSigned' f xs = f xs -- see google's stubs/strutil.cc lines 398-449/1121 and C99 specification -- This mainly targets three digit octal codes cEncode :: [Word8] -> [Char] cEncode = concatMap one where one :: Word8 -> [Char] one x | (32 < x) && (x < 127) = [toEnum . fromEnum $ x] -- main case of unescaped value one 9 = sl 't' one 10 = sl 'n' one 13 = sl 'r' one 34 = sl '"' one 39 = sl '\'' one 92 = sl '\\' one 0 = '\\':"000" one x | x < 8 = '\\':'0':'0':(showOct x "") | x < 32 = '\\':'0':(showOct x "") | otherwise = '\\':(showOct x "") sl c = ['\\',c] -- Takes backslash encoded junk an retrieves the underlying bytes -- The \x is kosher, but \X is non-standard. I recognize \X because google does -- Convert backslash-escaped \u and \U crap into UTF-8 (this is why we have to use concat!) -- It only works on characters and values in the range 0..255 (the Char8's returned by ByteString.Char.unpack) -- It ONLY consumes up to 3 octal digits when reading an escape sequence, the rest are left alone. cDecode :: [Char] -> [Word8] cDecode = concat . unfoldr one where one :: [Char] -> Maybe ([Word8],[Char]) one (x:xs) | x /= '\\' = Just (checkChar8 x,xs) -- main case of unescaped value one [] = Nothing one ('\\':[]) = error "Text.ProtocolBuffers.Reflections.cDecode cannot understand ending with a backslash" one ('\\':ys) | 1 <= len = case mayRead readOct oct of Just w -> Just (checkByte w,rest) Nothing -> error "Text.ProtocolBuffers.Reflections.cDecode failed to decode octal sequence" where oct = takeWhile isOctDigit (take 3 ys) len = length oct rest = drop len ys one ('\\':x:ys) | 'x' == toLower x && ok = case mayRead readHex hex of Just w -> Just (checkByte w,rest) Nothing -> error "Text.ProtocolBuffers.Reflections.cDecode failed to decode hex sequence" where ok = 1 <= length hex (hex,rest) = span isHexDigit ys one ('\\':'u':ys) | ok = case mayRead readHex hex of Just w -> Just (checkUnicode w,rest) Nothing -> error "Text.ProtocolBuffers.Reflections.cDecode failed to decode 4 char unicode sequence" where ok = all isHexDigit hex && 4 == length hex (hex,rest) = splitAt 4 ys one ('\\':'U':ys) | ok = case mayRead readHex hex of Just w -> Just (checkUnicode w,rest) Nothing -> error "Text.ProtocolBuffers.Reflections.cDecode failed to decode 8 char unicode sequence" where ok = all isHexDigit hex && 8 == length hex (hex,rest) = splitAt 8 ys one ('\\':(x:xs)) = Just ([decode x],xs) decode :: Char -> Word8 decode 'a' = 7 decode 'b' = 8 decode 't' = 9 decode 'n' = 10 decode 'v' = 11 decode 'f' = 12 decode 'r' = 13 decode '\"' = 34 decode '\'' = 39 decode '?' = 63 -- C99 rule : "\?" is '?' decode '\\' = 92 decode x | toLower x == 'x' = error "Text.ProtocolBuffers.Reflections.cDecode cannot understand your 'xX' hexadecimal escaped value" decode x | toLower x == 'u' = error "Text.ProtocolBuffers.Reflections.cDecode cannot understand your 'uU' unicode UTF-8 hexadecimal escaped value" decode _ = error "Text.ProtocolBuffers.Reflections.cDecode cannot understand your backslash-escaped value" checkChar8 :: Char -> [Word8] checkChar8 c | (0 <= i) && (i <= 255) = [toEnum i] | otherwise = error "Text.ProtocolBuffers.Reflections.cDecode found Char out of range 0..255" where i = fromEnum c checkByte :: Integer -> [Word8] checkByte i | (0 <= i) && (i <= 255) = [fromInteger i] | otherwise = error "Text.ProtocolBuffers.Reflections.cDecode found Oct/Hex Int out of range 0..255" checkUnicode :: Integer -> [Word8] checkUnicode i | (0 <= i) && (i <= 127) = [fromInteger i] | i <= maxChar = encode [ toEnum . fromInteger $ i ] | otherwise = error "Text.ProtocolBuffers.Reflections.cDecode found Unicode Char out of range 0..0x10FFFF" where maxChar = toInteger (fromEnum (maxBound ::Char)) -- 0x10FFFF testEncodeDecode = let q :: [Int] -> Bool q = (\y -> let x = map (\z->abs z `mod` 255) y in x == (map fromEnum . cDecode . cEncode . map toEnum$ x)) in quickCheck q