{- Database.hs (c) Richard Eisenberg 2012 eir@cis.upenn.edu This file contains the full code for the database interface example presented in /Dependently typed programming with singletons/ -} {-# LANGUAGE PolyKinds, DataKinds, TemplateHaskell, TypeFamilies, GADTs, TypeOperators, RankNTypes, FlexibleContexts, UndecidableInstances, FlexibleInstances, ScopedTypeVariables, MultiParamTypeClasses, OverlappingInstances, ConstraintKinds, CPP #-} {-# OPTIONS_GHC -fno-warn-warnings-deprecations #-} -- The OverlappingInstances is needed only to allow the InC and SubsetC classes. -- This is simply a convenience so that GHC can infer the necessary proofs of -- schema inclusion. The library could easily be designed without this flag, -- but it would require a client to explicity build proof terms from -- InProof and Subset. module GradingClient.Database where import Prelude hiding ( tail, id ) import Data.Singletons.Prelude import Data.Singletons.SuppressUnusedWarnings import Data.Singletons.TH import Control.Monad import Data.List hiding ( tail ) import Control.Monad.Error $(singletons [d| -- Basic Nat type data Nat = Zero | Succ Nat deriving (Eq, Ord) |]) -- Conversions to any from Integers fromNat :: Nat -> Integer fromNat Zero = 0 fromNat (Succ n) = (fromNat n) + 1 toNat :: Integer -> Nat toNat 0 = Zero toNat n | n > 0 = Succ (toNat (n - 1)) toNat _ = error "Converting negative to Nat" -- Display and read Nats using decimal digits instance Show Nat where show = show . fromNat instance Read Nat where readsPrec n s = map (\(a,rest) -> (toNat a,rest)) $ readsPrec n s $(singletons [d| -- Our "U"niverse of types. These types can be stored in our database. data U = BOOL | STRING | NAT | VEC U Nat deriving (Read, Eq, Show) -- A re-definition of Char as an algebraic data type. -- This is necessary to allow for promotion and type-level Strings. data AChar = CA | CB | CC | CD | CE | CF | CG | CH | CI | CJ | CK | CL | CM | CN | CO | CP | CQ | CR | CS | CT | CU | CV | CW | CX | CY | CZ deriving (Read, Show, Eq) -- A named attribute in our database data Attribute = Attr [AChar] U -- A schema is an ordered list of named attributes data Schema = Sch [Attribute] -- append two schemas append :: Schema -> Schema -> Schema append (Sch s1) (Sch s2) = Sch (s1 ++ s2) -- predicate to check that a schema is free of a certain attribute attrNotIn :: Attribute -> Schema -> Bool attrNotIn _ (Sch []) = True attrNotIn (Attr name u) (Sch ((Attr name' _) : t)) = (name /= name') && (attrNotIn (Attr name u) (Sch t)) -- predicate to check that two schemas are disjoint disjoint :: Schema -> Schema -> Bool disjoint (Sch []) _ = True disjoint (Sch (h : t)) s = (attrNotIn h s) && (disjoint (Sch t) s) -- predicate to check if a name occurs in a schema occurs :: [AChar] -> Schema -> Bool occurs _ (Sch []) = False occurs name (Sch ((Attr name' _) : attrs)) = name == name' || occurs name (Sch attrs) -- looks up an element type from a schema lookup :: [AChar] -> Schema -> U lookup _ (Sch []) = undefined lookup name (Sch ((Attr name' u) : attrs)) = if name == name' then u else lookup name (Sch attrs) |]) -- The El type family gives us the type associated with a constructor -- of U: type family El (u :: U) :: * type instance El BOOL = Bool type instance El STRING = String type instance El NAT = Nat type instance El (VEC u n) = Vec (El u) n -- Length-indexed vectors data Vec :: * -> Nat -> * where VNil :: Vec a Zero VCons :: a -> Vec a n -> Vec a (Succ n) -- Read instances are keyed by the index of the vector to aid in parsing instance Read (Vec a Zero) where readsPrec _ s = [(VNil, s)] instance (Read a, Read (Vec a n)) => Read (Vec a (Succ n)) where readsPrec n s = do (a, rest) <- readsPrec n s (tail, restrest) <- readsPrec n rest return (VCons a tail, restrest) -- Because the Read instances are keyed by the length of the vector, -- it is not obvious to the compiler that all Vecs have a Read instance. -- We must make a short inductive proof of this fact. -- First, we define a datatype to store the resulting instance, keyed -- by the parameters to Vec: data VecReadInstance a n where VecReadInstance :: Read (Vec a n) => VecReadInstance a n -- Then, we make a function that produces an instance of Read for a -- Vec, given the datatype it is over and its length, both encoded -- using singleton types: vecReadInstance :: Read (El u) => SU u -> SNat n -> VecReadInstance (El u) n vecReadInstance _ SZero = VecReadInstance vecReadInstance u (SSucc n) = case vecReadInstance u n of VecReadInstance -> VecReadInstance -- The Show instance can be straightforwardly defined: instance Show a => Show (Vec a n) where show VNil = "" show (VCons h t) = (show h) ++ " " ++ (show t) -- We need to be able to Read and Show elements of our database, so -- we must know that any type of the form (El u) for some (u :: U) -- has a Read and Show instance. Because we can't declare this instance -- directly (as, in general, declaring an instance of a type family -- would be unsound), we provide inductive proofs that these instances -- exist: data ElUReadInstance u where ElUReadInstance :: Read (El u) => ElUReadInstance u elUReadInstance :: Sing u -> ElUReadInstance u elUReadInstance SBOOL = ElUReadInstance elUReadInstance SSTRING = ElUReadInstance elUReadInstance SNAT = ElUReadInstance elUReadInstance (SVEC u n) = case elUReadInstance u of ElUReadInstance -> case vecReadInstance u n of VecReadInstance -> ElUReadInstance data ElUShowInstance u where ElUShowInstance :: Show (El u) => ElUShowInstance u elUShowInstance :: Sing u -> ElUShowInstance u elUShowInstance SBOOL = ElUShowInstance elUShowInstance SSTRING = ElUShowInstance elUShowInstance SNAT = ElUShowInstance elUShowInstance (SVEC u _) = case elUShowInstance u of ElUShowInstance -> ElUShowInstance showAttrProof :: Sing (Attr nm u) -> ElUShowInstance u showAttrProof (SAttr _ u) = elUShowInstance u -- A Row is one row of our database table, keyed by its schema. data Row :: Schema -> * where EmptyRow :: [Int] -> Row (Sch '[]) -- the Ints are the unique id of the row ConsRow :: El u -> Row (Sch s) -> Row (Sch ((Attr name u) ': s)) -- We build Show instances for a Row element by element: instance Show (Row (Sch '[])) where show (EmptyRow n) = "(id=" ++ (show n) ++ ")" instance (Show (El u), Show (Row (Sch attrs))) => Show (Row (Sch ((Attr name u) ': attrs))) where show (ConsRow h t) = case t of EmptyRow n -> (show h) ++ " (id=" ++ (show n) ++ ")" _ -> (show h) ++ ", " ++ (show t) -- A Handle in our system is an abstract handle to a loaded table. -- The constructor is not exported. In our simplistic case, we -- just store the list of rows. A more sophisticated implementation -- could store some identifier to the connection to an external database. data Handle :: Schema -> * where Handle :: [Row s] -> Handle s -- The following functions parse our very simple flat file database format. -- The file, with a name ending in ".dat", consists of a sequence of lines, -- where each line contains one entry in the table. There is no row separator; -- if a row contains n pieces of data, that row is represented in n lines in -- the file. -- A schema is stored in a file of the same name, except ending in ".schema". -- Each line in the file is a constructor of U indicating the type of the -- corresponding row element. -- Use Either for error handling in parsing functions type ErrorM = Either String -- This function is relatively uninteresting except for its use of -- pattern matching to introduce the instances of Read and Show for -- elements readRow :: Int -> SSchema s -> [String] -> ErrorM (Row s, [String]) readRow id (SSch SNil) strs = return (EmptyRow [id], strs) readRow _ (SSch (SCons _ _)) [] = throwError "Ran out of data while processing row" readRow id (SSch (SCons (SAttr _ u) at)) (sh:st) = do (rowTail, strTail) <- readRow id (SSch at) st case elUReadInstance u of ElUReadInstance -> let results = readsPrec 0 sh in if null results then throwError $ "No parse of " ++ sh ++ " as a " ++ (show (fromSing u)) else let item = fst $ head results in case elUShowInstance u of ElUShowInstance -> return (ConsRow item rowTail, strTail) readRows :: SSchema s -> [String] -> [Row s] -> ErrorM [Row s] readRows _ [] soFar = return soFar readRows sch lst soFar = do (row, rest) <- readRow (length soFar) sch lst readRows sch rest (row : soFar) -- Given the name of a database and its schema, return a handle to the -- database. connect :: String -> SSchema s -> IO (Handle s) connect name schema = do schString <- readFile (name ++ ".schema") let schEntries = lines schString usFound = map read schEntries -- load schema just using "read" (Sch attrs) = fromSing schema usExpected = map (\(Attr _ u) -> u) attrs unless (usFound == usExpected) -- compare found schema with expected (fail "Expected schema does not match found schema") dataString <- readFile (name ++ ".dat") let dataEntries = lines dataString result = readRows schema dataEntries [] -- read actual data case result of Left errorMsg -> fail errorMsg Right rows -> return $ Handle rows -- In order to define strongly-typed projection from a row, we need to have a notion -- that one schema is a subset of another. We permit the schemas to have their columns -- in different orders. We define this subset relation via two inductively defined -- propositions. In Haskell, these inductively defined propositions take the form of -- GADTs. In their original form, they would look like this: {- data InProof :: Attribute -> Schema -> * where InElt :: InProof attr (Sch (attr ': schTail)) InTail :: InProof attr (Sch attrs) -> InProof attr (Sch (a ': attrs)) data SubsetProof :: Schema -> Schema -> * where SubsetEmpty :: SubsetProof (Sch '[]) s' SubsetCons :: InProof attr s' -> SubsetProof (Sch attrs) s' -> SubsetProof (Sch (attr ': attrs)) s' -} -- However, it would be convenient to users of the database library not to require -- building these proofs manually. So, we define type classes so that the compiler -- builds the proofs automatically. To make everything work well together, we also -- make the parameters to the proof GADT constructors implicit -- i.e. in the form -- of type class constraints. data InProof :: Attribute -> Schema -> * where InElt :: InProof attr (Sch (attr ': schTail)) InTail :: InC name u (Sch attrs) => InProof (Attr name u) (Sch (a ': attrs)) class InC (name :: [AChar]) (u :: U) (sch :: Schema) where inProof :: InProof (Attr name u) sch instance InC name u (Sch ((Attr name u) ': schTail)) where inProof = InElt instance InC name u (Sch attrs) => InC name u (Sch (a ': attrs)) where inProof = InTail data SubsetProof :: Schema -> Schema -> * where SubsetEmpty :: SubsetProof (Sch '[]) s' SubsetCons :: (InC name u s', SubsetC (Sch attrs) s') => SubsetProof (Sch ((Attr name u) ': attrs)) s' class SubsetC (s :: Schema) (s' :: Schema) where subset :: SubsetProof s s' instance SubsetC (Sch '[]) s' where subset = SubsetEmpty instance (InC name u s', SubsetC (Sch attrs) s') => SubsetC (Sch ((Attr name u) ': attrs)) s' where subset = SubsetCons -- To access the data in a structured (and well-typed!) way, we use -- an RA (short for Relational Algebra). An RA is indexed by the schema -- of the data it produces. data RA :: Schema -> * where -- The RA includes all data represented by the handle. Read :: Handle s -> RA s -- The RA is a union of the rows represented by the two RAs provided. -- Note that the schemas of the two RAs must be the same for this -- constructor use to type-check. Union :: RA s -> RA s -> RA s -- The RA is the list of rows in the first RA, omitting those in the -- second. Once again, the schemas must match. Diff :: RA s -> RA s -> RA s -- The RA is a Cartesian product of the two RAs provided. Note that -- the schemas of the two provided RAs must be disjoint. Product :: (Disjoint s s' ~ True, SingI s, SingI s') => RA s -> RA s' -> RA (Append s s') -- The RA is a projection conforming to the schema provided. The -- type-checker ensures that this schema is a subset of the data -- included in the provided RA. Project :: (SubsetC s' s, SingI s) => SSchema s' -> RA s -> RA s' -- The RA contains only those rows of the provided RA for which -- the provided expression evaluates to True. Note that the -- schema of the provided RA and the resultant RA are the same -- because the columns of data are the same. Also note that -- the expression must return a Bool for this to type-check. Select :: Expr s BOOL -> RA s -> RA s -- Other constructors would be added in a more robust database -- implementation. -- An Expr is used with the Select constructor to choose some -- subset of rows from a table. Expressions are indexed by the -- schema over which they operate and the return value they -- produce. data Expr :: Schema -> U -> * where -- Equality among two elements Equal :: Eq (El u) => Expr s u -> Expr s u -> Expr s BOOL -- A less-than comparison among two Nats LessThan :: Expr s NAT -> Expr s NAT -> Expr s BOOL -- A literal number LiteralNat :: Integer -> Expr s NAT -- Projection in an expression -- evaluates to the value -- of the named attribute. Element :: (Occurs nm s ~ True) => SSchema s -> Sing nm -> Expr s (Lookup nm s) -- A more robust implementation would include more constructors -- Retrieves the id from a row. Ids are used when computing unions and -- differences. getId :: Row s -> [Int] getId (EmptyRow n) = n getId (ConsRow _ t) = getId t -- Changes the id of a row to a new value changeId :: [Int] -> Row s -> Row s changeId n (EmptyRow _) = EmptyRow n changeId n (ConsRow h t) = ConsRow h (changeId n t) -- Equality for rows based on ids. eqRow :: Row s -> Row s -> Bool eqRow r1 r2 = getId r1 == getId r2 -- Equality for attributes based on names eqAttr :: Attribute -> Attribute -> Bool eqAttr (Attr nm _) (Attr nm' _) = nm == nm' -- Appends two rows. There are three suspicious case statements -- they are -- suspicious in that the different branches are all exactly identical. Here -- is why they are needed: -- The two case statements on r are necessary to deconstruct the index in the -- type of r; GHC does not use the fact that s' must be (Sch a') for some a'. -- By doing a case analysis on r, GHC uses the types given in the different -- constructors for Row, both of which give the form of s' as (Sch a'). This -- deconstruction is necessary for the type family Append to compute, because -- Append is defined only when its second argument is of the form (Sch a'). -- The case statement on rowAppend t r is necessary to avoid potential -- overlapping instances for the SingRep class; the instances are needed for -- the call to ConsRow. The potential for overlapping instances comes from -- ambiguity in the component types of (Append s s'). By doing case analysis -- on rowAppend t r, these variables become fixed, and the potential for -- overlapping instances disappears. -- We use the "cases" Singletons library operation to produce the case -- analysis in the first clause. This "cases" operation produces a case -- statement where each branch is identical and each constructor parameter -- is ignored. The "cases" operation does not work for the second clause -- because the code in the clause depends on definitions generated earlier. -- Template Haskell restricts certain dependencies between auto-generated -- code blocks to prevent the possibility of circular dependencies. -- In this case, if the $(singletons ...) blocks above were in a different -- module, the "cases" operation would be applicable here. $( return [] ) rowAppend :: Row s -> Row s' -> Row (Append s s') rowAppend (EmptyRow n) r = $(cases ''Row [| r |] [| changeId (n ++ (getId r)) r |]) rowAppend (ConsRow h t) r = case r of EmptyRow _ -> case rowAppend t r of EmptyRow _ -> ConsRow h (rowAppend t r) ConsRow _ _ -> ConsRow h (rowAppend t r) ConsRow _ _ -> case rowAppend t r of EmptyRow _ -> ConsRow h (rowAppend t r) ConsRow _ _ -> ConsRow h (rowAppend t r) -- Choose the elements of one list based on truth values in another choose :: [Bool] -> [a] -> [a] choose [] _ = [] choose (False : btail) (_ : t) = choose btail t choose (True : btail) (h : t) = h : (choose btail t) choose _ [] = [] -- The query function is the eliminator for an RA. It returns a list of -- rows containing the data produced by the RA. query :: forall s. SingI s => RA s -> IO [Row s] query (Read (Handle rows)) = return rows query (Union ra rb) = do rowsa <- query ra rowsb <- query rb return $ unionBy eqRow rowsa rowsb query (Diff ra rb) = do rowsa <- query ra rowsb <- query rb return $ deleteFirstsBy eqRow rowsa rowsb query (Product ra rb) = do rowsa <- query ra rowsb <- query rb return $ do -- entering the [] Monad rowa <- rowsa rowb <- rowsb return $ rowAppend rowa rowb query (Project sch ra) = do rows <- query ra return $ map (projectRow sch) rows where -- The projectRow function uses the relationship encoded in the Subset -- relation to project the requested columns of data in a type-safe manner. -- It recurs on the structure of the provided schema, creating the output -- row to be in the same order as the input schema. This is necessary for -- the output to type-check, as it is indexed by the input schema. -- We use explicit quantification to get access to scoped type variables. projectRow :: forall (sch :: Schema) (s' :: Schema). SubsetC sch s' => SSchema sch -> Row s' -> Row sch -- Base case: empty schema projectRow (SSch SNil) r = EmptyRow (getId r) -- In the recursive case, we need to pattern-match on the proof that -- the provided schema is a subset of the provided RA. We extract this -- proof (of type SubsetProof s s') from the SubsetC instance using the -- subset method. projectRow (SSch (SCons attr tail)) r = case subset :: SubsetProof sch s' of -- Because we know that the schema is non-empty, the only possibility -- here is SubsetCons: SubsetCons -> let rtail = projectRow (SSch tail) r in case attr of SAttr _ u -> case elUShowInstance u of ElUShowInstance -> ConsRow (extractElt attr r) rtail -- GHC correctly determines that this case is impossible if it is -- not commented. -- SubsetEmpty -> undefined <== IMPOSSIBLE -- However, the current version of GHC (7.5) does not suppress warnings -- for incomplete pattern matches when the remaining cases are impossible. -- So, we include this case (impossible to reach for any terminated value) -- to suppress the warning. _ -> error "Type checking failed" -- Retrieves the element, looked up by the name of the provided attribute, -- from a row. The explicit quantification is necessary to create the scoped -- type variables to use in the return type of <> extractElt :: forall nm u sch. InC nm u sch => Sing (Attr nm u) -> Row sch -> El u extractElt attr r = case inProof :: InProof (Attr nm u) sch of InElt -> case r of ConsRow h _ -> h -- EmptyRow _ -> undefined <== IMPOSSIBLE _ -> error "Type checking failed" InTail -> case r of ConsRow _ t -> extractElt attr t -- EmptyRow _ -> undefined <== IMPOSSBLE _ -> error "Type checking failed" query (Select expr r) = do rows <- query r let vals = map (eval expr) rows return $ choose vals rows where -- Evaluates an expression eval :: forall s' u. SingI s' => Expr s' u -> Row s' -> El u eval (Element _ (name :: Sing name)) row = case row of -- EmptyRow _ -> undefined <== IMPOSSIBLE ConsRow h t -> case row of (ConsRow _ _ :: Row (Sch ((Attr name' u') ': attrs))) -> case sing :: Sing s' of -- SSch SNil -> undefined <== IMPOSSIBLE SSch (SCons (SAttr name' _) stail) -> case name %:== name' of STrue -> h SFalse -> withSingI stail (eval (Element (SSch stail) name) t) _ -> bugInGHC _ -> bugInGHC eval (Equal (e1 :: Expr s' u') e2) row = let v1 = eval e1 row v2 = eval e2 row in v1 == v2 -- Note that the types really help us here: the LessThan constructor is -- defined only over Expr s NAT, so we know that evaluating e1 and e2 will -- yield Nats, which are a member of the Ord type class. eval (LessThan e1 e2) row = let v1 = eval e1 row v2 = eval e2 row in v1 < v2 eval (LiteralNat x) _ = toNat x