{- Copyright 2009 Jake Wheat This file contains the ast nodes, and the api functions to pass an ast and get back type information. It uses the Utrecht University Attribute Grammar system: http://www.cs.uu.nl/wiki/bin/view/HUT/AttributeGrammarSystem http://www.haskell.org/haskellwiki/The_Monad.Reader/Issue4/Why_Attribute_Grammars_Matter The attr and sem definitions are in TypeChecking.ag, which is included into this file. These ast nodes are both used as the result of successful parsing, and as the input to the type checker and the pretty printer. = compiling use uuagc -dcfws Ast.ag to generate a new Ast.hs from this file (install uuagc with cabal install uuagc ) -} MODULE {Database.HsSqlPpp.Ast} { -- exports MySourcePos --ast nodes ,Statement (..) ,SelectExpression (..) ,FnBody (..) ,SetClause (..) ,TableRef (..) ,JoinExpression (..) ,JoinType (..) ,SelectList (..) ,SelectItem (..) ,CopySource (..) ,AttributeDef (..) ,RowConstraint (..) ,Constraint (..) ,TypeAttributeDef (..) ,ParamDef (..) ,VarDef (..) ,RaiseType (..) ,CombineType (..) ,Volatility (..) ,Language (..) ,TypeName (..) ,DropType (..) ,Cascade (..) ,Direction (..) ,Distinct (..) ,Natural (..) ,IfExists (..) ,RestartIdentity (..) ,Expression (..) ,OperatorType (..) ,getOperatorType ,InList (..) ,StatementList --checking stuff ,Message (..) ,MessageStuff (..) --types ,Type (..) ,PseudoType (..) ,TypeErrorInfo (..) ,StatementInfo (..) --scope ,Scope(..) ,defaultScope ,emptyScope --fns ,checkAst ,getExpressionType ,getStatementsType ,getStatementsTypeScope ,getStatementsInfo ,getStatementsInfoScope ,resetSps ,resetSp ,resetSp' ,resetSps' ,nsp --forward some defs ,typeSmallInt ,typeBigInt ,typeInt ,typeNumeric ,typeFloat4 ,typeFloat8 ,typeVarChar ,typeChar ,typeBool } { import Data.Maybe import Data.List import Debug.Trace import Control.Monad.Error import Control.Arrow import Database.HsSqlPpp.TypeType import Database.HsSqlPpp.AstUtils import Database.HsSqlPpp.TypeConversion import Database.HsSqlPpp.TypeCheckingH import Database.HsSqlPpp.Scope import Database.HsSqlPpp.DefaultScope } {- ================================================================================ SQL top level statements everything is chucked in here: dml, ddl, plpgsql statements -} DATA Statement --queries | SelectStatement ex:SelectExpression -- dml --table targetcolumns insertdata(values or select statement) returning | Insert table : String targetCols : StringList insData : SelectExpression returning : (Maybe SelectList) --tablename setitems where returning | Update table : String assigns : SetClauseList whr : Where returning : (Maybe SelectList) --tablename, where, returning | Delete table : String whr : Where returning : (Maybe SelectList) --tablename column names, from | Copy table : String targetCols : StringList source : CopySource --represents inline data for copy statement | CopyData insData : String | Truncate tables: StringList restartIdentity : RestartIdentity cascade : Cascade -- ddl | CreateTable name : String atts : AttributeDefList cons : ConstraintList | CreateTableAs name : String expr : SelectExpression | CreateView name : String expr : SelectExpression | CreateType name : String atts : TypeAttributeDefList -- language name args rettype bodyquoteused body vol | CreateFunction lang : Language name : String params : ParamDefList rettype : TypeName bodyQuote : String body : FnBody vol : Volatility -- name type checkexpression | CreateDomain name : String typ : TypeName check : (Maybe Expression) -- ifexists (name,argtypes)* cascadeorrestrict | DropFunction ifE : IfExists sigs : StringStringListPairList cascade : Cascade -- ifexists names cascadeorrestrict | DropSomething dropType : DropType ifE : IfExists names : StringList cascade : Cascade | Assignment target : String value : Expression | Return value : (Maybe Expression) | ReturnNext expr : Expression | ReturnQuery sel : SelectExpression | Raise level : RaiseType message : String args : ExpressionList | NullStatement | Perform expr : Expression | Execute expr : Expression | ExecuteInto expr : Expression targets : StringList | ForSelectStatement var : String sel : SelectExpression sts : StatementList | ForIntegerStatement var : String from : Expression to : Expression sts : StatementList | WhileStatement expr : Expression sts : StatementList | ContinueStatement --variable, list of when parts, else part | CaseStatement val : Expression cases : ExpressionListStatementListPairList els : StatementList --list is --first if (condition, statements):elseifs(condition, statements) --last bit is else statements | If cases : ExpressionStatementListPairList els : StatementList -- ============================================================================= --Statement components -- maybe this should be called relation valued expression? DATA SelectExpression | Select selDistinct : Distinct selSelectList : SelectList selTref : MTableRef selWhere : Where selGroupBy : ExpressionList selHaving : (Maybe Expression) selOrderBy : ExpressionList selDir : Direction selLimit : (Maybe Expression) selOffset : (Maybe Expression) | CombineSelect ctype : CombineType sel1 : SelectExpression sel2 : SelectExpression | Values vll:ExpressionListList TYPE MTableRef = MAYBE TableRef TYPE Where = MAYBE Expression DATA FnBody | SqlFnBody sts : StatementList | PlpgsqlFnBody VarDefList sts : StatementList DATA SetClause | SetClause att:String val:Expression | RowSetClause atts:StringList vals:ExpressionList DATA TableRef | Tref tbl:String | TrefAlias tbl : String alias : String | JoinedTref tbl : TableRef nat : Natural joinType : JoinType tbl1 : TableRef onExpr : OnExpr | SubTref sel : SelectExpression alias : String | TrefFun fn:Expression | TrefFunAlias fn:Expression alias:String TYPE OnExpr = MAYBE JoinExpression DATA JoinExpression | JoinOn Expression | JoinUsing StringList DATA JoinType | Inner | LeftOuter| RightOuter | FullOuter | Cross -- select columns, into columns DATA SelectList | SelectList items:SelectItemList StringList DATA SelectItem | SelExp ex:Expression | SelectItem ex:Expression name:String DATA CopySource | CopyFilename String | Stdin --name type default null constraint DATA AttributeDef | AttributeDef name : String typ : TypeName check : (Maybe Expression) cons : RowConstraintList --Constraints which appear attached to an individual field DATA RowConstraint | NullConstraint | NotNullConstraint | RowCheckConstraint Expression | RowUniqueConstraint | RowPrimaryKeyConstraint | RowReferenceConstraint table : String att : (Maybe String) onUpdate : Cascade onDelete : Cascade --constraints which appear on a separate row in the create table DATA Constraint | UniqueConstraint StringList | PrimaryKeyConstraint StringList | CheckConstraint Expression -- sourcecols targettable targetcols ondelete onupdate | ReferenceConstraint atts : StringList table : String tableAtts : StringList onUpdate : Cascade onDelete : Cascade DATA TypeAttributeDef | TypeAttDef name : String typ : TypeName DATA ParamDef | ParamDef name:String typ:TypeName | ParamDefTp typ:TypeName DATA VarDef | VarDef name : String typ : TypeName value : (Maybe Expression) DATA RaiseType | RNotice | RException | RError DATA CombineType | Except | Union | Intersect | UnionAll DATA Volatility | Volatile | Stable | Immutable DATA Language | Sql | Plpgsql DATA TypeName | SimpleTypeName tn:String | PrecTypeName tn:String prec:Integer | ArrayTypeName typ:TypeName | SetOfTypeName typ:TypeName DATA DropType | Table | Domain | View | Type DATA Cascade | Cascade | Restrict DATA Direction | Asc | Desc DATA Distinct | Distinct | Dupes DATA Natural | Natural | Unnatural DATA IfExists | Require | IfExists DATA RestartIdentity | RestartIdentity | ContinueIdentity {- ================================================================================ Expressions Similarly to the statement type, all expressions are chucked into one even though there are many restrictions on which expressions can appear in different places. Maybe this should be called scalar expression? -} DATA Expression | IntegerLit Integer | FloatLit Double | StringLit quote : String value : String | NullLit | BooleanLit Bool | PositionalArg Integer | Cast expr:Expression tn:TypeName | Identifier i:String | Case cases : CaseExpressionListExpressionPairList els : MaybeExpression | CaseSimple value : Expression cases : CaseExpressionListExpressionPairList els : MaybeExpression | Exists sel : SelectExpression | FunCall funName:String args:ExpressionList | InPredicate expr:Expression i:Bool list:InList -- windowfn selectitem partitionby orderby orderbyasc? | WindowFn fn : Expression partitionBy : ExpressionList orderBy : ExpressionList dir : Direction | ScalarSubQuery sel : SelectExpression TYPE MaybeExpression = MAYBE Expression {- list of expression flavours from postgresql with the equivalents in this ast pg here -- ---- constant/literal integerlit, floatlit, unknownstringlit, nulllit, boollit column reference identifier positional parameter reference positionalarg subscripted expression funcall field selection expression identifier operator invocation funcall function call funcall aggregate expression funcall window function call windowfn type cast cast scalar subquery scalarsubquery array constructor funcall row constructor funall Anything that is represented in the ast as some sort of name plus a list of expressions as arguments is treated as the same type of node: FunCall. This includes symbol operators regular function calls keyword operators e.g. and, like (ones which can be parsed as normal syntactic operators) unusual syntax operators, e.g. between unusual syntax function calls e.g. substring(x from 5 for 3) arrayctors e.g. array[3,5,6] rowctors e.g. ROW (2,4,6) array subscripting list of keyword operators (regular prefix, infix and postfix): and, or, not is null, is not null, isnull, notnull is distinct from, is not distinct from is true, is not true,is false, is not false, is unknown, is not unknown like, not like, ilike, not ilike similar to, not similar to in, not in (don't include these here since the argument isn't always an expr) unusual syntax operators and fn calls between, not between, between symmetric overlay, substring, trim any, some, all Most of unusual syntax forms and keywords operators are not yet supported, so this is mainly a todo list. Keyword operators are encoded with the function name as a ! followed by a string e.g. operator 'and' -> FunCall "!and" ... see keywordOperatorTypes value in AstUtils.lhs for the list of currently supported keyword operators. -} DATA InList | InList exprs : ExpressionList | InSelect sel : SelectExpression -- some list nodes, not sure if all of these are needed as separately -- named node types TYPE ExpressionList = [Expression] TYPE ExpressionListList = [ExpressionList] TYPE StringList = [String] TYPE SetClauseList = [SetClause] TYPE AttributeDefList = [AttributeDef] TYPE ConstraintList = [Constraint] TYPE TypeAttributeDefList = [TypeAttributeDef] TYPE ParamDefList = [ParamDef] TYPE StringStringListPair = (String,StringList) TYPE StringStringListPairList = [StringStringListPair] TYPE ExpressionListStatementListPair = (ExpressionList,StatementList) TYPE ExpressionListStatementListPairList = [ExpressionListStatementListPair] TYPE ExpressionStatementListPair = (Expression, StatementList) TYPE ExpressionStatementListPairList = [ExpressionStatementListPair] TYPE VarDefList = [VarDef] TYPE SelectItemList = [SelectItem] TYPE RowConstraintList = [RowConstraint] TYPE CaseExpressionListExpressionPair = (CaseExpressionList,Expression) TYPE CaseExpressionList = [Expression] TYPE CaseExpressionListExpressionPairList = [CaseExpressionListExpressionPair] {- slightly hacky, add the source positions only in statement lists this includes top level statements, and statements inside createfunction, and nested inside if, while, case statements, and the like, but unfortunately not select expressions inside other expressions/statements, so we get particularly crap source positions for big select statements. Was done like this for expediency. This will be changed back to StatementList = [Statement] when all the nodes have sourcepositioning information added to them, doing which has been put off for no good reason. -} TYPE SourcePosStatement = (MySourcePos, Statement) TYPE StatementList = [SourcePosStatement] -- Add a root data type so we can put initial values for inherited -- attributes in the section which defines and uses those attributes -- rather than in the sem_ calls DATA Root | Root statements:StatementList DERIVING Root: Show -- use an expression root also to support type checking, -- etc., individual expressions DATA ExpressionRoot | ExpressionRoot expr:Expression DERIVING ExpressionRoot: Show {- ================================================================================ =some basic bookkeeping attributes which every node has -} SET AllNodes = Statement SelectExpression FnBody SetClause TableRef JoinExpression JoinType SelectList SelectItem CopySource AttributeDef RowConstraint Constraint TypeAttributeDef ParamDef VarDef RaiseType CombineType Volatility Language TypeName DropType Cascade Direction Distinct Natural IfExists RestartIdentity Expression InList MaybeExpression ExpressionList ExpressionListList StringList SetClauseList AttributeDefList ConstraintList TypeAttributeDefList ParamDefList StringStringListPair StringStringListPairList StatementList ExpressionListStatementListPair ExpressionListStatementListPairList ExpressionStatementListPair ExpressionStatementListPairList VarDefList SelectItemList RowConstraintList CaseExpressionListExpressionPair CaseExpressionListExpressionPairList CaseExpressionList SourcePosStatement MTableRef TableRef OnExpr Where SET NonListNodes = Statement SelectExpression FnBody SetClause TableRef JoinExpression JoinType SelectItem CopySource AttributeDef RowConstraint Constraint TypeAttributeDef ParamDef VarDef RaiseType CombineType Volatility Language TypeName DropType Cascade Direction Distinct Natural IfExists RestartIdentity Expression InList MaybeExpression StringStringListPair ExpressionListStatementListPair ExpressionStatementListPair SourcePosStatement MTableRef OnExpr Where SET ListNodes = SelectList ExpressionList ExpressionListList StringList SetClauseList AttributeDefList ConstraintList TypeAttributeDefList ParamDefList StringStringListPairList StatementList ExpressionListStatementListPairList ExpressionStatementListPairList VarDefList SelectItemList RowConstraintList CaseExpressionListExpressionPairList CaseExpressionList CaseExpressionListExpressionPair DERIVING AllNodes: Show,Eq INCLUDE "TypeChecking.ag" {- ================================================================================ used to use record syntax to try to insulate code from field changes, and not have to write out loads of nothings and [] for simple selects, but don't know how to create haskell named records from uuagc DATA things makeSelect :: Statement makeSelect = Select Dupes (SelectList [SelExp (Identifier "*")] []) Nothing Nothing [] Nothing [] Asc Nothing Nothing ================================================================================ = checkAst test function to run on asts, returns a list of errors, warnings, etc. bit stale -} { checkAst :: StatementList -> [Message] checkAst sts = let t = sem_Root (Root sts) in (messages_Syn_Root (wrap_Root t Inh_Root {scope_Inh_Root = defaultScope})) {- ================================================================================ = Types These are the utility functions which clients use to typecheck sql. -} getExpressionType :: Scope -> Expression -> Type getExpressionType scope ex = let t = sem_ExpressionRoot (ExpressionRoot ex) in (nodeType_Syn_ExpressionRoot (wrap_ExpressionRoot t Inh_ExpressionRoot {scope_Inh_ExpressionRoot = combineScopes defaultScope scope})) getStatementsType :: StatementList -> [Type] getStatementsType = getStatementsTypeScope emptyScope getStatementsTypeScope :: Scope -> StatementList -> [Type] getStatementsTypeScope scope st = let t = sem_Root (Root st) ta = wrap_Root t Inh_Root {scope_Inh_Root = combineScopes defaultScope scope} tl = nodeType_Syn_Root ta in (unwrapTypeList tl) getStatementsInfo :: StatementList -> [StatementInfo] getStatementsInfo = getStatementsInfoScope emptyScope getStatementsInfoScope :: Scope -> StatementList -> [StatementInfo] getStatementsInfoScope scope st = let t = sem_Root (Root st) ta = wrap_Root t Inh_Root {scope_Inh_Root = combineScopes defaultScope scope} t2 = statementInfo_Syn_Root ta in t2 --hack job, often not interested in the source positions when testing --the asts produced, so this function will reset all the source --positions to empty ("", 0, 0) so we can compare them for equality, etc. --without having to get the positions correct. resetSps :: [Statement] -> [Statement] resetSps = map resetSp resetSp :: Statement -> Statement resetSp (CreateFunction l n p r bq b v) = CreateFunction l n p r bq (case b of SqlFnBody stss -> SqlFnBody (map resetSp' stss) PlpgsqlFnBody vd stss -> PlpgsqlFnBody vd (map resetSp' stss)) v resetSp (ForSelectStatement v s stss) = ForSelectStatement v s (map resetSp' stss) resetSp (ForIntegerStatement v f t stss) = ForIntegerStatement v f t (map resetSp' stss) resetSp (CaseStatement v cs els) = CaseStatement v (map (second (map resetSp')) cs) (map resetSp' els) resetSp (If cs els) = If (map (second (map resetSp')) cs) (map resetSp' els) resetSp a = a resetSp' :: SourcePosStatement -> SourcePosStatement resetSp' (_,st) = (nsp,resetSp st) resetSps' :: StatementList -> StatementList resetSps' = map resetSp' nsp :: MySourcePos nsp = ("", 0,0) } {- Future plans: Investigate how much mileage can get out of making these nodes the parse tree nodes, and using a separate ast. Hinges on how much extra value can get from making the types more restrictive for the ast nodes compared to the parse tree. Would like to turn this back into regular Haskell file, maybe could use AspectAG instead of uuagc to make this happen? -}