Implement constant-expression simplification per Bernard

Frankpitt, plus some improvements from yours truly. The simplifier depends on the proiscachable field of pg_proc to tell it whether a function is safe to pre-evaluate --- things like nextval() are not, for example. Update pg_proc.h to contain reasonable cacheability information; as of 6.5.* hardly any functions were marked cacheable. I may have erred too far in the other direction; see recent mail to pghackers for more info. This update does not force an initdb, exactly, but you won't see much benefit from the simplifier until you do one.

Implement constant-expression simplification per Bernard
Frankpitt, plus some improvements from yours truly. The simplifier depends on the proiscachable field of pg_proc to tell it whether a function is safe to pre-evaluate --- things like nextval() are not, for example. Update pg_proc.h to contain reasonable cacheability information; as of 6.5.* hardly any functions were marked cacheable. I may have erred too far in the other direction; see recent mail to pghackers for more info. This update does not force an initdb, exactly, but you won't see much benefit from the simplifier until you do one.
40f65241 · Tom Lane · 95d3d468 · 40f65241 · 40f65241 · 40f65241
Commit 40f65241 authored Sep 26, 1999 by Tom Lane
7 changed files
--- a/src/backend/executor/execQual.c
+++ b/src/backend/executor/execQual.c
@@ -7,7 +7,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/execQual.c,v 1.60 1999/09/24 00:24:23 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/execQual.c,v 1.61 1999/09/26 02:28:15 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -956,8 +956,8 @@ ExecEvalFunc(Expr *funcClause,
 static Datum
 ExecEvalNot(Expr *notclause, ExprContext *econtext, bool *isNull)
 {
-	Datum		expr_value;
 	Node	   *clause;
+	Datum		expr_value;
 	bool		isDone;
 	clause = lfirst(notclause->args);
@@ -995,67 +995,47 @@ ExecEvalOr(Expr *orExpr, ExprContext *econtext, bool *isNull)
 	List	   *clauses;
 	List	   *clause;
 	bool		isDone;
-	bool		IsNull;
+	bool		AnyNull;
-	Datum		const_value = 0;
+	Datum		clause_value;
-	IsNull = false;
 	clauses = orExpr->args;
+	AnyNull = false;
 	/*
-	 * we use three valued logic functions here... we evaluate each of the
+	 * If any of the clauses is TRUE, the OR result is TRUE regardless
-	 * clauses in turn, as soon as one is true we return that value.  If
+	 * of the states of the rest of the clauses, so we can stop evaluating
-	 * none is true and  none of the clauses evaluate to NULL we return
+	 * and return TRUE immediately.  If none are TRUE and one or more is
-	 * the value of the last clause evaluated (which should be false) with
+	 * NULL, we return NULL; otherwise we return FALSE.  This makes sense
-	 * *isNull set to false else if none is true and at least one clause
+	 * when you interpret NULL as "don't know": if we have a TRUE then the
-	 * evaluated to NULL we set *isNull flag to true -
+	 * OR is TRUE even if we aren't sure about some of the other inputs.
+	 * If all the known inputs are FALSE, but we have one or more "don't
+	 * knows", then we have to report that we "don't know" what the OR's
+	 * result should be --- perhaps one of the "don't knows" would have been
+	 * TRUE if we'd known its value.  Only when all the inputs are known
+	 * to be FALSE can we state confidently that the OR's result is FALSE.
 	 */
 	foreach(clause, clauses)
 	{
 		/*
 		 * We don't iterate over sets in the quals, so pass in an isDone
 		 * flag, but ignore it.
 		 */
-		const_value = ExecEvalExpr((Node *) lfirst(clause),
+		clause_value = ExecEvalExpr((Node *) lfirst(clause),
-								   econtext,
+									econtext,
-								   isNull,
+									isNull,
-								   &isDone);
+									&isDone);
 		/*
-		 * if the expression evaluates to null, then we remember it in the
+		 * if we have a non-null true result, then return it.
-		 * local IsNull flag, if none of the clauses are true then we need
-		 * to set *isNull to true again.
 		 */
 		if (*isNull)
-		{
+			AnyNull = true;		/* remember we got a null */
-			IsNull = *isNull;
+		else if (DatumGetInt32(clause_value) != 0)
+			return clause_value;
-			/*
-			 * Many functions don't (or can't!) check if an argument is
-			 * NULL or NOT_NULL and may return TRUE (1) with *isNull TRUE
-			 * (an_int4_column <> 1: int4ne returns TRUE for NULLs). Not
-			 * having time to fix the function manager I want to fix OR:
-			 * if we had 'x <> 1 OR x isnull' then when x is NULL TRUE was
-			 * returned by the 'x <> 1' clause ... but ExecQualClause says
-			 * that the qualification should *fail* if isnull is TRUE for
-			 * any value returned by ExecEvalExpr. So, force this rule
-			 * here: if isnull is TRUE then the clause failed. Note:
-			 * nullvalue() & nonnullvalue() always sets isnull to FALSE
-			 * for NULLs. - vadim 09/22/97
-			 */
-			const_value = 0;
-		}
-		/*
-		 * if we have a true result, then we return it.
-		 */
-		if (DatumGetInt32(const_value) != 0)
-			return const_value;
 	}
-	/* IsNull is true if at least one clause evaluated to NULL */
+	/* AnyNull is true if at least one clause evaluated to NULL */
-	*isNull = IsNull;
+	*isNull = AnyNull;
-	return const_value;
+	return (Datum) false;
 }
 /* ----------------------------------------------------------------
@@ -1067,49 +1047,43 @@ ExecEvalAnd(Expr *andExpr, ExprContext *econtext, bool *isNull)
 {
 	List	   *clauses;
 	List	   *clause;
-	Datum		const_value = 0;
 	bool		isDone;
-	bool		IsNull;
+	bool		AnyNull;
+	Datum		clause_value;
-	IsNull = false;
 	clauses = andExpr->args;
+	AnyNull = false;
 	/*
-	 * we evaluate each of the clauses in turn, as soon as one is false we
+	 * If any of the clauses is FALSE, the AND result is FALSE regardless
-	 * return that value.  If none are false or NULL then we return the
+	 * of the states of the rest of the clauses, so we can stop evaluating
-	 * value of the last clause evaluated, which should be true.
+	 * and return FALSE immediately.  If none are FALSE and one or more is
+	 * NULL, we return NULL; otherwise we return TRUE.  This makes sense
+	 * when you interpret NULL as "don't know", using the same sort of
+	 * reasoning as for OR, above.
 	 */
 	foreach(clause, clauses)
 	{
 		/*
 		 * We don't iterate over sets in the quals, so pass in an isDone
 		 * flag, but ignore it.
 		 */
-		const_value = ExecEvalExpr((Node *) lfirst(clause),
+		clause_value = ExecEvalExpr((Node *) lfirst(clause),
-								   econtext,
+									econtext,
-								   isNull,
+									isNull,
-								   &isDone);
+									&isDone);
 		/*
-		 * if the expression evaluates to null, then we remember it in
+		 * if we have a non-null false result, then return it.
-		 * IsNull, if none of the clauses after this evaluates to false we
-		 * will have to set *isNull to true again.
 		 */
 		if (*isNull)
-			IsNull = *isNull;
+			AnyNull = true;		/* remember we got a null */
+		else if (DatumGetInt32(clause_value) == 0)
-		/*
+			return clause_value;
-		 * if we have a false result, then we return it, since the
-		 * conjunction must be false.
-		 */
-		if (DatumGetInt32(const_value) == 0)
-			return const_value;
 	}
-	*isNull = IsNull;
+	/* AnyNull is true if at least one clause evaluated to NULL */
-	return const_value;
+	*isNull = AnyNull;
+	return (Datum) (! AnyNull);
 }
 /* ----------------------------------------------------------------
@@ -1126,7 +1100,7 @@ ExecEvalCase(CaseExpr *caseExpr, ExprContext *econtext, bool *isNull)
 {
 	List	   *clauses;
 	List	   *clause;
-	Datum		const_value = 0;
+	Datum		clause_value;
 	bool		isDone;
 	clauses = caseExpr->args;
@@ -1144,37 +1118,35 @@ ExecEvalCase(CaseExpr *caseExpr, ExprContext *econtext, bool *isNull)
 		 * We don't iterate over sets in the quals, so pass in an isDone
 		 * flag, but ignore it.
 		 */
-		const_value = ExecEvalExpr((Node *) wclause->expr,
+		clause_value = ExecEvalExpr(wclause->expr,
-								   econtext,
+									econtext,
-								   isNull,
+									isNull,
-								   &isDone);
+									&isDone);
 		/*
 		 * if we have a true test, then we return the result, since the
 		 * case statement is satisfied.  A NULL result from the test is
 		 * not considered true.
 		 */
-		if (DatumGetInt32(const_value) != 0 && ! *isNull)
+		if (DatumGetInt32(clause_value) != 0 && ! *isNull)
 		{
-			const_value = ExecEvalExpr((Node *) wclause->result,
+			return ExecEvalExpr(wclause->result,
-									   econtext,
+								econtext,
-									   isNull,
+								isNull,
-									   &isDone);
+								&isDone);
-			return (Datum) const_value;
 		}
 	}
 	if (caseExpr->defresult)
 	{
-		const_value = ExecEvalExpr((Node *) caseExpr->defresult,
+		return ExecEvalExpr(caseExpr->defresult,
-								   econtext,
+							econtext,
-								   isNull,
+							isNull,
-								   &isDone);
+							&isDone);
 	}
-	else
-		*isNull = true;
-	return const_value;
+	*isNull = true;
+	return (Datum) 0;
 }
 /* ----------------------------------------------------------------
@@ -1357,7 +1329,6 @@ bool
 ExecQual(List *qual, ExprContext *econtext)
 {
 	List	   *clause;
-	bool		result;
 	/*
 	 * debugging stuff
@@ -1378,27 +1349,17 @@ ExecQual(List *qual, ExprContext *econtext)
 	 * a "qual" is a list of clauses.  To evaluate the qual, we evaluate
 	 * each of the clauses in the list.
 	 *
-	 * ExecQualClause returns true when we know the qualification *failed* so
+	 * ExecQualClause returns true when we know the qualification *failed*
-	 * we just pass each clause in qual to it until we know the qual
+	 * so we just pass each clause in qual to it until we know the qual
 	 * failed or there are no more clauses.
 	 */
-	result = false;
 	foreach(clause, qual)
 	{
-		result = ExecQualClause((Node *) lfirst(clause), econtext);
+		if (ExecQualClause((Node *) lfirst(clause), econtext))
-		if (result == true)
+			return false;		/* qual failed, so return false */
-			break;
 	}
-	/*
-	 * if result is true, then it means a clause failed so we return
-	 * false.  if result is false then it means no clause failed so we
-	 * return true.
-	 */
-	if (result == true)
-		return false;
 	return true;
 }

--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -7,7 +7,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.48 1999/08/21 03:48:57 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.49 1999/09/26 02:28:21 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -223,6 +223,22 @@ _equalAggref(Aggref *a, Aggref *b)
 	return true;
 }
+static bool
+_equalSubLink(SubLink *a, SubLink *b)
+{
+	if (a->subLinkType != b->subLinkType)
+		return false;
+	if (a->useor != b->useor)
+		return false;
+	if (!equal(a->lefthand, b->lefthand))
+		return false;
+	if (!equal(a->oper, b->oper))
+		return false;
+	if (!equal(a->subselect, b->subselect))
+		return false;
+	return true;
+}
 static bool
 _equalArray(Array *a, Array *b)
 {
@@ -393,7 +409,7 @@ _equalSubPlan(SubPlan *a, SubPlan *b)
 	if (a->plan_id != b->plan_id)
 		return false;
-	if (!equal(a->sublink->oper, b->sublink->oper))
+	if (!equal(a->sublink, b->sublink))
 		return false;
 	return true;
@@ -713,6 +729,9 @@ equal(void *a, void *b)
 		case T_Aggref:
 			retval = _equalAggref(a, b);
 			break;
+		case T_SubLink:
+			retval = _equalSubLink(a, b);
+			break;
 		case T_Func:
 			retval = _equalFunc(a, b);
 			break;

--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -7,7 +7,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.44 1999/09/13 00:17:25 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.45 1999/09/26 02:28:27 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -66,24 +66,41 @@ query_planner(Query *root,
 	List	   *level_tlist;
 	Plan	   *subplan;
+	/*
+	 * Simplify constant expressions in both targetlist and qual.
+	 *
+	 * Note that at this point the qual has not yet been converted to
+	 * implicit-AND form, so we can apply eval_const_expressions directly.
+	 * Also note that we need to do this before SS_process_sublinks,
+	 * because that routine inserts bogus "Const" nodes.
+	 */
+	tlist = (List *) eval_const_expressions((Node *) tlist);
+	qual = (List *) eval_const_expressions((Node *) qual);
+	/*
+	 * Canonicalize the qual, and convert it to implicit-AND format.
+	 */
+	qual = canonicalize_qual((Expr *) qual, true);
+#ifdef OPTIMIZER_DEBUG
+	printf("After canonicalize_qual()\n");
+	pprint(qual);
+#endif
+	/* Replace uplevel vars with Param nodes */
 	if (PlannerQueryLevel > 1)
 	{
-		/* should copy be made ? */
 		tlist = (List *) SS_replace_correlation_vars((Node *) tlist);
 		qual = (List *) SS_replace_correlation_vars((Node *) qual);
 	}
+	/* Expand SubLinks to SubPlans */
 	if (root->hasSubLinks)
 		qual = (List *) SS_process_sublinks((Node *) qual);
-	qual = canonicalize_qual((Expr *) qual, true);
-#ifdef OPTIMIZER_DEBUG
-	printf("After canonicalize_qual()\n");
-	pprint(qual);
-#endif
 	/*
 	 * Pull out any non-variable qualifications so these can be put in the
-	 * topmost result node.
+	 * topmost result node.  (Any *really* non-variable quals will probably
+	 * have been optimized away by eval_const_expressions().  What we're
+	 * looking for here is quals that depend only on outer-level vars...)
 	 */
 	qual = pull_constant_clauses(qual, &constant_qual);

--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -7,7 +7,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.68 1999/09/18 19:07:00 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.69 1999/09/26 02:28:27 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -301,7 +301,28 @@ union_planner(Query *parse)
 	 */
 	if (parse->havingQual)
 	{
-		List	   *ql;
+		/*--------------------
+		 * Require the havingQual to contain at least one aggregate function
+		 * (else it could have been done as a WHERE constraint).  This check
+		 * used to be much stricter, requiring an aggregate in each clause of
+		 * the CNF-ified qual.  However, that's probably overly anal-retentive.
+		 * We now do it first so that we will not complain if there is an
+		 * aggregate but it gets optimized away by eval_const_expressions().
+		 * The agg itself is never const, of course, but consider
+		 *		SELECT ... HAVING xyz OR (COUNT(*) > 1)
+		 * where xyz reduces to constant true in a particular query.
+		 * We probably should not refuse this query.
+		 *--------------------
+		 */
+		if (pull_agg_clause(parse->havingQual) == NIL)
+			elog(ERROR, "SELECT/HAVING requires aggregates to be valid");
+		/* Simplify constant expressions in havingQual */
+		parse->havingQual = eval_const_expressions(parse->havingQual);
+		/* Convert the havingQual to implicit-AND normal form */
+		parse->havingQual = (Node *)
+			canonicalize_qual((Expr *) parse->havingQual, true);
 		/* Replace uplevel Vars with Params */
 		if (PlannerQueryLevel > 1)
@@ -323,20 +344,6 @@ union_planner(Query *parse)
 												  parse->targetList))
 				elog(ERROR, "Sub-SELECT in HAVING clause must use only GROUPed attributes from outer SELECT");
 		}
-		/* convert the havingQual to implicit-AND normal form */
-		parse->havingQual = (Node *)
-			canonicalize_qual((Expr *) parse->havingQual, true);
-		/*
-		 * Require an aggregate function to appear in each clause of the
-		 * havingQual (else it could have been done as a WHERE constraint).
-		 */
-		foreach(ql, (List *) parse->havingQual)
-		{
-			if (pull_agg_clause(lfirst(ql)) == NIL)
-				elog(ERROR, "SELECT/HAVING requires aggregates to be valid");
-		}
 	}
 	/*

--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -6,7 +6,7 @@
 *
 * Copyright (c) 1994, Regents of the University of California
 *
- * $Id: clauses.h,v 1.29 1999/08/22 20:14:56 tgl Exp $
+ * $Id: clauses.h,v 1.30 1999/09/26 02:28:44 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -52,6 +52,8 @@ extern void get_rels_atts(Node *clause, int *relid1,
 			  AttrNumber *attno1, int *relid2, AttrNumber *attno2);
 extern void CommuteClause(Expr *clause);
+extern Node *eval_const_expressions(Node *node);
 extern bool expression_tree_walker(Node *node, bool (*walker) (),
 								   void *context);
 extern Node *expression_tree_mutator(Node *node, Node * (*mutator) (),