{-# LANGUAGE CPP #-} {-# LANGUAGE DefaultSignatures #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE PolyKinds #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} #if __GLASGOW_HASKELL__ >= 711 {-# LANGUAGE PatternSynonyms #-} #endif #if __GLASGOW_HASKELL__ >= 800 {-# LANGUAGE TypeInType #-} #endif -- | -- -- Copyright: (c) 2019 Oleg Grenrus -- -- Structurally tag binary serialisaton stream. -- Useful when most 'Binary' instances are 'Generic' derived. -- -- Say you have a data type -- -- @ -- data Record = Record -- { _recordFields :: HM.HashMap Text (Integer, ByteString) -- , _recordEnabled :: Bool -- } -- deriving (Eq, Show, Generic) -- -- instance 'Binary' Record -- instance 'Structured' Record -- @ -- -- then you can serialise and deserialise @Record@ values with a structure tag by simply -- -- @ -- 'structuredEncode' record :: 'LBS.ByteString' -- 'structuredDecode' lbs :: IO Record -- @ -- -- If structure of @Record@ changes in between, deserialisation will fail early. -- -- Technically, 'Structured' is not related to 'Binary', and may -- be useful in other uses. -- module Distribution.Utils.Structured ( -- * Encoding and decoding -- | These functions operate like @binary@'s counterparts, -- but the serialised version has a structure hash in front. structuredEncode, structuredEncodeFile, structuredDecode, structuredDecodeOrFailIO, structuredDecodeFileOrFail, -- * Structured class Structured (structure), MD5, structureHash, structureBuilder, genericStructure, GStructured, nominalStructure, containerStructure, -- * Structure type Structure (..), TypeName, ConstructorName, TypeVersion, SopStructure, hashStructure, typeVersion, typeName, ) where import Data.Int (Int16, Int32, Int64, Int8) import Data.List.NonEmpty (NonEmpty) import Data.Proxy (Proxy (..)) import Data.Ratio (Ratio) import Data.Word (Word, Word16, Word32, Word64, Word8) import qualified Control.Monad.Trans.State.Strict as State import Control.Exception (ErrorCall (..), catch, evaluate) import GHC.Generics import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LBS import qualified Data.ByteString.Lazy.Builder as Builder import qualified Data.IntMap as IM import qualified Data.IntSet as IS import qualified Data.Map as Map import qualified Data.Sequence as Seq import qualified Data.Set as Set import qualified Data.Text as T import qualified Data.Text.Lazy as LT import qualified Data.Time as Time import qualified Distribution.Compat.Binary as Binary #ifdef MIN_VERSION_aeson import qualified Data.Aeson as Aeson #endif #if __GLASGOW_HASKELL__ >= 800 import Data.Kind (Type) #else #define Type * #endif import Distribution.Compat.Typeable (Typeable, TypeRep, typeRep) import Distribution.Utils.MD5 import Data.Monoid (mconcat) import qualified Data.Semigroup import qualified Data.Foldable #if !MIN_VERSION_base(4,8,0) import Control.Applicative (pure) import Data.Traversable (traverse) #endif #if !MIN_VERSION_base(4,7,0) import Data.Typeable (Typeable1, typeOf1) #endif ------------------------------------------------------------------------------- -- Types ------------------------------------------------------------------------------- type TypeName = String type ConstructorName = String -- | A sematic version of a data type. Usually 0. type TypeVersion = Word32 -- | Structure of a datatype. -- -- It can be infinite, as far as 'TypeRep's involved are finite. -- (e.g. polymorphic recursion might cause troubles). -- data Structure = Nominal !TypeRep !TypeVersion TypeName [Structure] -- ^ nominal, yet can be parametrised by other structures. | Newtype !TypeRep !TypeVersion TypeName Structure -- ^ a newtype wrapper | Structure !TypeRep !TypeVersion TypeName SopStructure -- ^ sum-of-products structure deriving (Eq, Ord, Show, Generic) type SopStructure = [(ConstructorName, [Structure])] -- | A MD5 hash digest of 'Structure'. hashStructure :: Structure -> MD5 hashStructure = md5 . LBS.toStrict . Builder.toLazyByteString . structureBuilder -- | A van-Laarhoven lens into 'TypeVersion' of 'Structure' -- -- @ -- 'typeVersion' :: Lens' 'Structure' 'TypeVersion' -- @ typeVersion :: Functor f => (TypeVersion -> f TypeVersion) -> Structure -> f Structure typeVersion f (Nominal t v n s) = fmap (\v' -> Nominal t v' n s) (f v) typeVersion f (Newtype t v n s) = fmap (\v' -> Newtype t v' n s) (f v) typeVersion f (Structure t v n s) = fmap (\v' -> Structure t v' n s) (f v) -- | A van-Laarhoven lens into 'TypeName' of 'Structure' -- -- @ -- 'typeName' :: Lens' 'Structure' 'TypeName' -- @ typeName :: Functor f => (TypeName -> f TypeName) -> Structure -> f Structure typeName f (Nominal t v n s) = fmap (\n' -> Nominal t v n' s) (f n) typeName f (Newtype t v n s) = fmap (\n' -> Newtype t v n' s) (f n) typeName f (Structure t v n s) = fmap (\n' -> Structure t v n' s) (f n) ------------------------------------------------------------------------------- -- Builder ------------------------------------------------------------------------------- -- | Flatten 'Structure' into something we can calculate hash of. -- -- As 'Structure' can be potentially infinite. For mutually recursive types, -- we keep track of 'TypeRep's, and put just 'TypeRep' name when it's occurred -- another time. structureBuilder :: Structure -> Builder.Builder structureBuilder s0 = State.evalState (go s0) Map.empty where go :: Structure -> State.State (Map.Map String (NonEmpty TypeRep)) Builder.Builder go (Nominal t v n s) = withTypeRep t $ do s' <- traverse go s return $ mconcat $ Builder.word8 1 : Builder.word32LE v : Builder.stringUtf8 n : s' go (Newtype t v n s) = withTypeRep t $ do s' <- go s return $ mconcat [Builder.word8 2, Builder.word32LE v, Builder.stringUtf8 n, s'] go (Structure t v n s) = withTypeRep t $ do s' <- goSop s return $ mconcat [Builder.word8 3, Builder.word32LE v, Builder.stringUtf8 n, s'] withTypeRep t k = do acc <- State.get case insert t acc of Nothing -> return $ mconcat [ Builder.word8 0, Builder.stringUtf8 (show t) ] Just acc' -> do State.put acc' k goSop :: SopStructure -> State.State (Map.Map String (NonEmpty TypeRep)) Builder.Builder goSop sop = do parts <- traverse part sop return $ mconcat parts part (cn, s) = do s' <- traverse go s return $ Data.Monoid.mconcat [ Builder.stringUtf8 cn, mconcat s' ] insert :: TypeRep -> Map.Map String (NonEmpty TypeRep) -> Maybe (Map.Map String (NonEmpty TypeRep)) insert tr m = case Map.lookup trShown m of Nothing -> inserted Just ne | tr `Data.Foldable.elem` ne -> Nothing | otherwise -> inserted where inserted = Just (Map.insertWith (Data.Semigroup.<>) trShown (pure tr) m) trShown = show tr ------------------------------------------------------------------------------- -- Classes ------------------------------------------------------------------------------- -- | Class of types with a known 'Structure'. -- -- For regular data types 'Structured' can be derived generically. -- -- @ -- data Record = Record { a :: Int, b :: Bool, c :: [Char] } deriving ('Generic') -- instance 'Structured' Record -- @ -- -- @since 3.2.0.0 -- class Typeable a => Structured a where structure :: Proxy a -> Structure default structure :: (Generic a, GStructured (Rep a)) => Proxy a -> Structure structure = genericStructure -- This member is hidden. It's there to precalc structureHash' :: Tagged a MD5 structureHash' = Tagged (hashStructure (structure (Proxy :: Proxy a))) -- private Tagged newtype Tagged a b = Tagged { untag :: b } -- | Semantically @'hashStructure' . 'structure'@. structureHash :: forall a. Structured a => Proxy a -> MD5 structureHash _ = untag (structureHash' :: Tagged a MD5) ------------------------------------------------------------------------------- -- Functions ------------------------------------------------------------------------------- -- | Structured 'Binary.encode'. -- Encode a value to using binary serialisation to a lazy 'LBS.ByteString'. -- Encoding starts with 16 byte large structure hash. structuredEncode :: forall a. (Binary.Binary a, Structured a) => a -> LBS.ByteString structuredEncode x = Binary.encode (Tag :: Tag a, x) -- | Lazily serialise a value to a file structuredEncodeFile :: (Binary.Binary a, Structured a) => FilePath -> a -> IO () structuredEncodeFile f = LBS.writeFile f . structuredEncode -- | Structured 'Binary.decode'. -- Decode a value from a lazy 'LBS.ByteString', reconstructing the original structure. -- Throws pure exception on invalid inputs. structuredDecode :: forall a. (Binary.Binary a, Structured a) => LBS.ByteString -> a structuredDecode lbs = snd (Binary.decode lbs :: (Tag a, a)) structuredDecodeOrFailIO :: (Binary.Binary a, Structured a) => LBS.ByteString -> IO (Either String a) structuredDecodeOrFailIO bs = catch (evaluate (structuredDecode bs) >>= return . Right) handler where #if MIN_VERSION_base(4,9,0) handler (ErrorCallWithLocation str _) = return $ Left str #else handler (ErrorCall str) = return $ Left str #endif -- | Lazily reconstruct a value previously written to a file. structuredDecodeFileOrFail :: (Binary.Binary a, Structured a) => FilePath -> IO (Either String a) structuredDecodeFileOrFail f = structuredDecodeOrFailIO =<< LBS.readFile f ------------------------------------------------------------------------------- -- Helper data ------------------------------------------------------------------------------- data Tag a = Tag instance Structured a => Binary.Binary (Tag a) where get = do actual <- binaryGetMD5 if actual == expected then return Tag else fail $ concat [ "Non-matching structured hashes: " , showMD5 actual , "; expected: " , showMD5 expected ] where expected = untag (structureHash' :: Tagged a MD5) put _ = binaryPutMD5 expected where expected = untag (structureHash' :: Tagged a MD5) ------------------------------------------------------------------------------- -- Smart constructors ------------------------------------------------------------------------------- -- | Use 'Typeable' to infer name nominalStructure :: Typeable a => Proxy a -> Structure nominalStructure p = Nominal tr 0 (show tr) [] where tr = typeRep p #if MIN_VERSION_base(4,7,0) containerStructure :: forall f a. (Typeable f, Structured a) => Proxy (f a) -> Structure containerStructure _ = Nominal faTypeRep 0 (show fTypeRep) [ structure (Proxy :: Proxy a) ] where fTypeRep = typeRep (Proxy :: Proxy f) faTypeRep = typeRep (Proxy :: Proxy (f a)) #else containerStructure :: forall f a. (Typeable1 f, Structured a) => Proxy (f a) -> Structure containerStructure _ = Nominal faTypeRep 0 (show fTypeRep) [ structure (Proxy :: Proxy a) ] where fTypeRep = typeOf1 (undefined :: f ()) faTypeRep = typeRep (Proxy :: Proxy (f a)) #endif ------------------------------------------------------------------------------- -- Generic ------------------------------------------------------------------------------- -- | Derive 'structure' genrically. genericStructure :: forall a. (Typeable a, Generic a, GStructured (Rep a)) => Proxy a -> Structure genericStructure _ = gstructured (typeRep (Proxy :: Proxy a)) (Proxy :: Proxy (Rep a)) 0 -- | Used to implement 'genericStructure'. class GStructured (f :: Type -> Type) where gstructured :: TypeRep -> Proxy f -> TypeVersion -> Structure instance (i ~ D, Datatype c, GStructuredSum f) => GStructured (M1 i c f) where gstructured tr _ v = case sop of #if MIN_VERSION_base(4,7,0) [(_, [s])] | isNewtype p -> Newtype tr v name s #endif _ -> Structure tr v name sop where p = undefined :: M1 i c f () name = datatypeName p sop = gstructuredSum (Proxy :: Proxy f) [] class GStructuredSum (f :: Type -> Type) where gstructuredSum :: Proxy f -> SopStructure -> SopStructure instance (i ~ C, Constructor c, GStructuredProd f) => GStructuredSum (M1 i c f) where gstructuredSum _ xs = (name, prod) : xs where name = conName (undefined :: M1 i c f ()) prod = gstructuredProd (Proxy :: Proxy f) [] instance (GStructuredSum f, GStructuredSum g) => GStructuredSum (f :+: g) where gstructuredSum _ xs = gstructuredSum (Proxy :: Proxy f) $ gstructuredSum (Proxy :: Proxy g) xs instance GStructuredSum V1 where gstructuredSum _ = id class GStructuredProd (f :: Type -> Type) where gstructuredProd :: Proxy f -> [Structure] -> [Structure] instance (i ~ S, GStructuredProd f) => GStructuredProd (M1 i c f) where gstructuredProd _ = gstructuredProd (Proxy :: Proxy f) instance Structured c => GStructuredProd (K1 i c) where gstructuredProd _ xs = structure (Proxy :: Proxy c) : xs instance GStructuredProd U1 where gstructuredProd _ = id instance (GStructuredProd f, GStructuredProd g) => GStructuredProd (f :*: g) where gstructuredProd _ xs = gstructuredProd (Proxy :: Proxy f) $ gstructuredProd (Proxy :: Proxy g) xs ------------------------------------------------------------------------------- -- instances ------------------------------------------------------------------------------- instance Structured () instance Structured Bool instance Structured Ordering instance Structured Char where structure = nominalStructure instance Structured Int where structure = nominalStructure instance Structured Integer where structure = nominalStructure instance Structured Data.Word.Word where structure = nominalStructure instance Structured Int8 where structure = nominalStructure instance Structured Int16 where structure = nominalStructure instance Structured Int32 where structure = nominalStructure instance Structured Int64 where structure = nominalStructure instance Structured Word8 where structure = nominalStructure instance Structured Word16 where structure = nominalStructure instance Structured Word32 where structure = nominalStructure instance Structured Word64 where structure = nominalStructure instance Structured Float where structure = nominalStructure instance Structured Double where structure = nominalStructure instance Structured a => Structured (Maybe a) instance (Structured a, Structured b) => Structured (Either a b) instance Structured a => Structured (Ratio a) where structure = containerStructure instance Structured a => Structured [a] where structure = containerStructure instance Structured a => Structured (NonEmpty a) where structure = containerStructure instance (Structured a1, Structured a2) => Structured (a1, a2) instance (Structured a1, Structured a2, Structured a3) => Structured (a1, a2, a3) instance (Structured a1, Structured a2, Structured a3, Structured a4) => Structured (a1, a2, a3, a4) instance (Structured a1, Structured a2, Structured a3, Structured a4, Structured a5) => Structured (a1, a2, a3, a4, a5) instance (Structured a1, Structured a2, Structured a3, Structured a4, Structured a5, Structured a6) => Structured (a1, a2, a3, a4, a5, a6) instance (Structured a1, Structured a2, Structured a3, Structured a4, Structured a5, Structured a6, Structured a7) => Structured (a1, a2, a3, a4, a5, a6, a7) instance Structured BS.ByteString where structure = nominalStructure instance Structured LBS.ByteString where structure = nominalStructure instance Structured T.Text where structure = nominalStructure instance Structured LT.Text where structure = nominalStructure instance (Structured k, Structured v) => Structured (Map.Map k v) where structure _ = Nominal (typeRep (Proxy :: Proxy (Map.Map k v))) 0 "Map" [ structure (Proxy :: Proxy k), structure (Proxy :: Proxy v) ] instance (Structured k) => Structured (Set.Set k) where structure = containerStructure instance (Structured v) => Structured (IM.IntMap v) where structure = containerStructure instance Structured IS.IntSet where structure = nominalStructure instance (Structured v) => Structured (Seq.Seq v) where structure = containerStructure instance Structured Time.UTCTime where structure = nominalStructure instance Structured Time.DiffTime where structure = nominalStructure instance Structured Time.UniversalTime where structure = nominalStructure instance Structured Time.NominalDiffTime where structure = nominalStructure instance Structured Time.Day where structure = nominalStructure instance Structured Time.TimeZone where structure = nominalStructure instance Structured Time.TimeOfDay where structure = nominalStructure instance Structured Time.LocalTime where structure = nominalStructure -- Proxy isn't Typeable in base-4.8 / base -- #if __GLASGOW_HASKELL__ >= 800 -- instance (Typeable k, Typeable (a :: k)) => Structured (Proxy a) -- #else -- instance (Typeable a) => Structured (Proxy a) where -- structure p = Structure (typeRep p) 0 "Proxy" [("Proxy",[])] -- #endif