{-# LANGUAGE NoMonomorphismRestriction #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE Rank2Types, ExistentialQuantification #-} {-# LANGUAGE MultiParamTypeClasses, FlexibleInstances #-} {-# LANGUAGE FunctionalDependencies, UndecidableInstances #-} -- | -- Why all these extensions? -- I could've cheated a bit and gotten by without the last four: -- the function typecheck below is partial anyway. -- If we permit one sort of errors (when the deserialized term is -- ill-typed), we may as well permit another sort of errors -- (a closed-looking term turns out open), even if the -- latter sort of error can't arise if our typecheck function is correct. -- due to the desire to let GHC check some correctness properties -- I wanted to avoid unnecessary errors and let GHC see the -- correctness of my code to a larger extent -- module Language.TypeCheck where -- No Data.Typeable, no gcast. Everything is above the board -- No import Data.Typeable import Language.Typ hiding (main) -- Our version of Typeable import Control.Monad import Control.Monad.Error import Language.TTFdB hiding (main') -- we use de Bruijn indices -- * Data type of trees representing our expressions `on the wire' -- * Serialized expressions are _untyped_ -- All content is in the form of character string -- Imagine the following to be XML, only more compact data Tree = Leaf String -- CDATA | Node String [Tree] -- element with its children deriving (Eq, Read, Show) -- * Expression format (extensible) -- * Node "Int" [Leaf number] -- int literal -- * Node "Add" [e1,e2] -- addition -- * Node "App" [e1,e2] -- application -- * Node "Var" [Leaf name] -- * Node "Lam" [Leaf name,e1,e2] -- e2 is body, e1 is type of var -- * ... -- * -- * Format of types -- * Node "TInt" [] -- Int -- * Node "TArr" [e1,e2] -- e1 -> e2 -- * ... -- * Serialized expressions are in the Church-style -- so that we won't need inference, only type checking -- Inference is not hard here; It is orthogonal and obscures our goal -- here of deserialization into a typed term -- * // -- * Desiderata -- * Type check once, interpret many times -- * detect type-checking error _before_ evaluation -- so that the evaluators can't possibly fail because -- the serialized expression was ill-typed. That error should be -- detected and handled earlier. -- Desired function: -- typecheck :: Tree -> ??? -- We will use de-Bruijn indices (why?) -- typecheck :: Symantics repr => Tree -> repr h ??? -- We can use something like read: -- read :: Read a => String -> a -- but that defeats the purpose since we must know a. -- We may want to deserialize the term and print it out. If our -- typecheck is like read, we get the read-show problem. -- * Avoiding read-show problem -- We have to abstract over a: need Dynamics, or existentials -- * Typing dynamic typing -- * Type-safe cast -- * // -- Old approach (see IncopeTypecheck.hs) -- The Show constraint is for debugging, so to see the result -- DynTerm repr h should be the result of typecheck -- * data DynTerm repr h = forall a. (Show a, Typeable a) => DynTerm (repr h a) data DynTerm repr h = forall a. DynTerm (TQ a) (repr h a) -- A sample `dynamic' object term tdyn1 = DynTerm tint (lam z `app` (int 1)) -- Interpret many times tdyn1_eval = case tdyn1 of DynTerm tr t -> show_as tr $ eval t -- "1" -- This test particularly shows that we don't even need to know -- the type of term; eventually we get the string tdyn1_view = case tdyn1 of DynTerm _ t -> view t -- "((\\x0 -> x0) 1)" -- * // -- * De-serialize type expressions -- The serialized terms contain type annotations on bound variables. -- We need to read the annotations and convert them to Haskell types. -- But Haskell types are not terms! So, we have to produce a term -- with the desired Haskell type: Typ.Typ -- * Error monad, to report deserialization errors -- The function read_t below is necessarily partial: -- the type annotation in the Tree form can be wrong, for example, -- (Node "Tarr" []). -- To report the error meaningfully, we use the Error monad -- We should have written read_t in the open recursion style, -- so that it would be extensible. -- But we omit the type extensibility here for the sake of clarity read_t :: Tree -> Either String Typ read_t (Node "TInt" []) = return $ Typ tint read_t (Node "TArr" [e1,e2]) = do Typ t1 <- read_t e1 Typ t2 <- read_t e2 return . Typ $ tarr t1 t2 read_t tree = fail $ "Bad type expression: " ++ show tree -- * A few explanations are in order -- The TArr clause operates on `some' types t1 and t2 -- * // -- * Type checking environment -- Our deserializer `typecheck' is too necessarily partial. -- Again we will use the error monad to report errors. -- Thus the tentative type for typecheck is -- * typecheck :: Symantics repr => -- * Tree -> Either String (DynTerm h repr) -- -- * Open terms may legitimately occur -- First of all, when processing the body of abstractions. -- Second, we may use `global constants' (standard Prelude, of sorts). -- We therefore type-check in the type environment Gamma, -- which is a map from variable names to their types. Hence -- type-check should have this signature: -- * typecheck :: Symantics repr => -- * Tree -> Gamma -> Either String (DynTerm h repr) -- But Gamma and h are related! We should make it explicit that -- Gamma (type checking environment) determines h (the run-time environment -- of a term). -- * Gamma is a compile-time environment: contains variable names -- * h is a run-time environment: contains values -- Both environments are indexed by types of their elements; -- since h is a nested tuple, so should be Gamma type VarName = String data VarDesc t = -- Component of Gamma VarDesc (TQ t) VarName -- * Relating Gamma and h class Var gamma h | gamma -> h where findvar :: Symantics repr => VarName -> gamma -> Either String (DynTerm repr h) -- Like asTypeOf, used to specialize polymorphic term repr h t -- to a specific type t asTypeRepr :: t -> repr h t -> repr h t asTypeRepr _ = id instance Var () () where findvar name _ = fail $ "Unbound variable: " ++ name instance Var gamma h => Var (VarDesc t,gamma) (t,h) where findvar name (VarDesc tr name',_) | name == name' = return $ DynTerm tr z findvar name (_,gamma) = do DynTerm tr v <- findvar name gamma return $ DynTerm tr (s v) -- * // typecheck :: (Symantics repr, Var gamma h) => Tree -> gamma -> Either String (DynTerm repr h) typecheck (Node "Int" [Leaf str]) gamma = case reads str of [(n,[])] -> return $ DynTerm tint (int n) _ -> fail $ "Bad Int literal: " ++ str typecheck (Node "Add" [e1, e2]) gamma = do DynTerm (TQ t1) d1 <- typecheck e1 gamma DynTerm (TQ t2) d2 <- typecheck e2 gamma case (as_int t1 d1, as_int t2 d2) of (Just t1', Just t2') -> return . DynTerm tint $ add t1' t2' (Nothing,_) -> fail $ "Bad type of a summand: " ++ view_t t1 (_,Nothing) -> fail $ "Bad type of a summand: " ++ view_t t2 typecheck (Node "Var" [Leaf name]) gamma = findvar name gamma typecheck (Node "Lam" [Leaf name,etyp,ebody]) gamma = do Typ ta <- read_t etyp DynTerm tbody body <- typecheck ebody (VarDesc ta name, gamma) return $ DynTerm (tarr ta tbody) (lam body) typecheck (Node "App" [e1, e2]) gamma = do DynTerm (TQ t1) d1 <- typecheck e1 gamma DynTerm (TQ t2) d2 <- typecheck e2 gamma AsArrow _ arr_cast <- return $ as_arrow t1 let errarr = fail $ "operator type is not an arrow: " ++ view_t t1 (ta,tb,equ_t1ab) <- maybe errarr return arr_cast let df = equ_cast equ_t1ab d1 case safe_gcast (TQ t2) d2 ta of Just da -> return . DynTerm tb $ app df da _ -> fail $ unwords ["Bad types of the application:", view_t t1, "and", view_t t2] typecheck e gamma = fail $ "Invalid Tree: " ++ show e -- Why the app case is complex, more complex than Add -- The type of the argument is, well, existential -- * // -- * Simple tests tx1 = typecheck (Node "Var" [Leaf "x"]) () tx2 = typecheck (Node "Lam" [Leaf "x",Node "TInt" [], Node "Var" [Leaf "x"]]) () tx3 = typecheck (Node "Lam" [Leaf "x",Node "TInt" [], Node "Lam" [Leaf "y",Node "TInt" [], Node "Var" [Leaf "x"]]]) () tx4 = typecheck (Node "Lam" [Leaf "x",Node "TInt" [], Node "Lam" [Leaf "y",Node "TInt" [], Node "Add" [Node "Int" [Leaf "10"], Node "Var" [Leaf "x"]]]]) () tx_view t = case t of Right (DynTerm _ t) -> view t Left err -> error err tx1_view = tx_view tx1 tx4_view = tx_view tx4 -- "(\\x0 -> (\\x1 -> (10+x0)))" -- * // -- * Closed interpreter -- to convert a monomorphic binding to polymorphic in tc_evalview -- below. newtype CL h a = CL{unCL :: forall repr. Symantics repr => repr h a} instance Symantics CL where int x = CL $ int x add e1 e2 = CL $ add (unCL e1) (unCL e2) z = CL z s v = CL $ s (unCL v) lam e = CL $ lam (unCL e) app e1 e2 = CL $ app (unCL e1) (unCL e2) -- * // -- * Main tests -- * Type-check once, evaluate many times -- One may worry that each time we evaluate a deserialized term, -- we are type checking it from scratch. -- The following shows that it is not the case: -- if Tree represents an ill-typed term, we determine an error -- before we started evaluating it (many times). tc_evalview exp = do DynTerm tr d <- typecheck exp () let d' = unCL d -- Make d polymorphic again return $ (show_as tr $ eval d', view d') -- * A few combinators to help build trees -- XML, or even its abstractions, are too verbose tr_int x = Node "Int" [Leaf $ show x] tr_add e1 e2 = Node "Add" [e1, e2] tr_app e1 e2 = Node "App" [e1, e2] tr_lam v t e = Node "Lam" [Leaf v, t, e] tr_var v = Node "Var" [Leaf v] tr_tint = Node "TInt" [] tr_tarr t1 t2 = Node "TArr" [t1,t2] -- dt1:: Symantics repr => DynTerm repr () dt1 = tc_evalview (tr_add (tr_int 1) (tr_int 2)) -- Right ("3","(1+2)") dt2 = tc_evalview (tr_app (tr_int 1) (tr_int 2)) -- Left "operator type is not an arrow: Int" dt4 = tc_evalview (tr_lam "x" tr_tint (tr_lam "y" (tr_tint `tr_tarr` tr_tint) (tr_lam "z" tr_tint (tr_var "y")))) -- Right (" ((Int -> Int) -> (Int -> (Int -> Int))))>", -- "(\\x0 -> (\\x1 -> (\\x2 -> x1)))") dt41 = tc_evalview (tr_lam "x" tr_tint (tr_lam "y" (tr_tint `tr_tarr` tr_tint) (tr_lam "z" tr_tint (tr_var "x")))) -- Right (" ((Int -> Int) -> (Int -> Int)))>", -- "(\\x0 -> (\\x1 -> (\\x2 -> x0)))") dt5 = tc_evalview (tr_lam "x" tr_tint (tr_add (tr_var "x") (tr_int 1))) -- Right (" Int)>", -- "(\\x0 -> (x0+1))") -- Making a deliberate mistake... dt6 = tc_evalview (tr_lam "x" (tr_tint `tr_tarr` tr_tint) (tr_add (tr_var "x") (tr_int 1))) -- Left "Bad type of a summand: (Int -> Int)" -- Untyped Church Numeral 2 exp_n2 = (tr_lam "f" (tr_tint `tr_tarr` tr_tint) (tr_lam "x" tr_tint (tr_app (tr_var "f") (tr_app (tr_var "f") (tr_var "x"))))) dt7 = tc_evalview exp_n2 -- Right (" Int) -> (Int -> Int))>", -- "(\\x0 -> (\\x1 -> (x0 (x0 x1))))") dt8 = tc_evalview (tr_app (tr_app exp_n2 (tr_lam "x" tr_tint (tr_add (tr_var "x") (tr_var "x")))) (tr_int 11)) -- Right ("44", -- "(((\\x0 -> (\\x1 -> (x0 (x0 x1)))) (\\x0 -> (x0+x0))) 11)") -- Exercise: add open recursion to typecheck, so to make -- it extensible. Add the Mul alternative, or booleans main' = do print tdyn1_eval print tdyn1_view print tx4_view print dt1 print dt2 print dt4 print dt41 print dt5 print dt6 print dt7 print dt8