src/Data/TypeHash.hs

{-# LANGUAGE ScopedTypeVariables, DeriveDataTypeable #-}
-- | Produce a /hash/ for a type that is unique for that type.
-- The hash takes both actual type names and type structure into account.
--
-- The purpose of the hash of a type is to be able to store the type
-- of a persisted value together with the value.
-- By comparing the type hash of a persisted value and the expected value
-- we can know if the persistened value is of the correct type.
--
-- The type code preserves the exact structure of the type and can be used to
-- check if one type is a subtype of another.  If one type is a subtype of
-- another it means that, e.g., @read . show@ will correctly between the types.
-- (Caveat @read . show@ is only guaranteed to work with named fields.)
-- 
-- The type hash uses a cryptographic hash and can only be used to test equality.
module Data.TypeHash(TypeCode, typeCode, convertibleTo, TypeHash, typeHash) where
import Data.Char(isAlpha)
import Control.Monad.State
import Data.Generics
import Data.Digest.Pure.MD5(MD5Digest, md5)
import Data.ByteString.Lazy(pack)

--import Debug.Trace

-- | Type codes.
newtype TypeCode = TypeCode Type
    deriving (Eq, Ord, Typeable, Data, Show)

-- | Turn the type of the value into a type code.
typeCode :: (Data a) => a -> TypeCode
typeCode = TypeCode . gType []

-- | Type hash.
newtype TypeHash = TypeHash String
    deriving (Eq, Ord, Typeable, Data, Show, Read)

-- | Turn the type of the value into a type hash.
typeHash :: (Data a) => a -> TypeHash
typeHash = TypeHash . show . md5 . pack . map (fromIntegral . fromEnum) . show . gType []

data Type
    = Name { typeName :: String }            -- Abstract type, or recursive reference
    | Data { typeName :: String, constrs :: [Constructor] }
    deriving (Eq, Ord, Show, Typeable, Data)
type Constructor = (String, [Field])
type Field = (String, Type)  -- a unique number is used for missing field names

gType :: (Data a) => [String] -> a -> Type
gType tns x =
    let tn = show $ fullTypeOf x
    in  case dataTypeRep $ dataTypeOf x of
        AlgRep cs | tn `notElem` tns ->
            Data { typeName = tn, constrs = map (gConstr (tn:tns) x) cs }
        _ -> Name { typeName = tn } -- Use type name for tryly abstract types.

gConstr :: (Data a) => [String] -> a -> Constr -> (String, [Field])
gConstr tns x c = (showConstr c,
                   zip fs (reverse $ execState (fromConstrM f c `asTypeOf` return x) []))
  where fs = constrFields c ++ [ show i | i <- [0::Int ..] ]
        f :: forall d . (Data d) => State [Type] d
        f = do modify (gType tns (undefined :: d) :); return undefined

-- Replace unqualified type name by qualified type name.
-- XXX Only does it on the top level, because I don't know how to get deeper.
fullTypeOf :: (Data a) => a -> TypeRep
fullTypeOf a | not $ isAlpha $ head $ tyConString $ typeRepTyCon $ ta = ta
	     | otherwise = mkTyConApp (mkTyCon $ dataTypeName $ dataTypeOf a)
                                      (typeRepArgs ta)
  where ta = typeOf a

------------

-- Check if a type is upwards compatible with another type, i.e., if it is a subtype.
-- S is a subtype of T if
--  S has the same or more constructors than T.  Constructor order does not matter,
--    but the constructor arguments must be subtypes again.
--  If S and T both have a single constructor, their names may differ.
--  A constructor with fields is a subtype if it has fewer or the same fields.
--    Field order does not matter, but the field types must be subtypes again.
--  Only the names of (concrete) types have changed.
--
-- These are mostly what you'd expect from sums and products.
--
-- | Is the first type (code) as subtype of the second, i.e.,
-- can the first type be converted to the second.
convertibleTo :: TypeCode -> TypeCode -> Bool
convertibleTo (TypeCode t1) (TypeCode t2) = subType [] t1 t2

type TypeNameMap = [(String, String)]

subType :: TypeNameMap -> Type -> Type -> Bool
--subType r t1 t2 | trace ("subtype " ++ show (r, t1, t2)) False = undefined
subType r (Name n1)            (Name n2)           = maybe (n1 == n2) (== n2) $ lookup n1 r
subType _ (Name {})            (Data {})           = False
subType _ (Data {})            (Name {})           = False
subType r (Data n1 [(_, fs1)]) (Data n2 [(_,fs2)]) = all (subField ((n1, n2):r) fs1) fs2
subType r (Data n1 cs1)        (Data n2 cs2)       = all (subConstructor ((n1, n2):r) cs2) cs1

subConstructor :: TypeNameMap -> [Constructor] -> Constructor -> Bool
--subConstructor r cs2 c1 | trace ("subConstructor " ++ show (c1, cs2)) False = undefined
subConstructor r cs2 (n1, fs1) = maybe False (all (subField r fs1)) $ lookup n1 cs2

subField :: TypeNameMap -> [Field] -> Field -> Bool
--subField r fs1 f | trace ("subField " ++ show (f, fs1)) False = undefined
subField r fs1 (f2, t2) = maybe False (\ t1 -> subType r t1 t2) $ lookup f2 fs1