Commit 05f916e6 authored by Tom Lane's avatar Tom Lane

Adjust subquery qual pushdown rules to be more forgiving: if a qual

refers to a non-DISTINCT output column of a DISTINCT ON subquery, or
if it refers to a function-returning-set, we cannot push it down.
But the old implementation refused to push down *any* quals if the
subquery had any such 'dangerous' outputs.  Now we just look at the
output columns actually referenced by each qual expression.  More code
than before, but probably no slower since we don't make unnecessary checks.
parent e43094b1
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.99 2003/03/10 03:53:49 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.100 2003/03/22 01:49:38 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -26,7 +26,9 @@ ...@@ -26,7 +26,9 @@
#include "optimizer/plancat.h" #include "optimizer/plancat.h"
#include "optimizer/planner.h" #include "optimizer/planner.h"
#include "optimizer/prep.h" #include "optimizer/prep.h"
#include "optimizer/var.h"
#include "parser/parsetree.h" #include "parser/parsetree.h"
#include "parser/parse_clause.h"
#include "rewrite/rewriteManip.h" #include "rewrite/rewriteManip.h"
...@@ -49,6 +51,7 @@ static RelOptInfo *make_one_rel_by_joins(Query *root, int levels_needed, ...@@ -49,6 +51,7 @@ static RelOptInfo *make_one_rel_by_joins(Query *root, int levels_needed,
List *initial_rels); List *initial_rels);
static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery); static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery);
static bool recurse_pushdown_safe(Node *setOp, Query *topquery); static bool recurse_pushdown_safe(Node *setOp, Query *topquery);
static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual);
static void subquery_push_qual(Query *subquery, Index rti, Node *qual); static void subquery_push_qual(Query *subquery, Index rti, Node *qual);
static void recurse_push_qual(Node *setOp, Query *topquery, static void recurse_push_qual(Node *setOp, Query *topquery,
Index rti, Node *qual); Index rti, Node *qual);
...@@ -305,16 +308,14 @@ set_subquery_pathlist(Query *root, RelOptInfo *rel, ...@@ -305,16 +308,14 @@ set_subquery_pathlist(Query *root, RelOptInfo *rel,
* *
* There are several cases where we cannot push down clauses. * There are several cases where we cannot push down clauses.
* Restrictions involving the subquery are checked by * Restrictions involving the subquery are checked by
* subquery_is_pushdown_safe(). Also, we do not push down clauses * subquery_is_pushdown_safe(). Restrictions on individual clauses are
* that contain subselects, mainly because I'm not sure it will work * checked by qual_is_pushdown_safe().
* correctly (the subplan hasn't yet transformed sublinks to
* subselects).
* *
* Non-pushed-down clauses will get evaluated as qpquals of the * Non-pushed-down clauses will get evaluated as qpquals of the
* SubqueryScan node. * SubqueryScan node.
* *
* XXX Are there any cases where we want to make a policy decision not to * XXX Are there any cases where we want to make a policy decision not to
* push down, because it'd result in a worse plan? * push down a pushable qual, because it'd result in a worse plan?
*/ */
if (rel->baserestrictinfo != NIL && if (rel->baserestrictinfo != NIL &&
subquery_is_pushdown_safe(subquery, subquery)) subquery_is_pushdown_safe(subquery, subquery))
...@@ -328,15 +329,15 @@ set_subquery_pathlist(Query *root, RelOptInfo *rel, ...@@ -328,15 +329,15 @@ set_subquery_pathlist(Query *root, RelOptInfo *rel,
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lst); RestrictInfo *rinfo = (RestrictInfo *) lfirst(lst);
Node *clause = (Node *) rinfo->clause; Node *clause = (Node *) rinfo->clause;
if (contain_subplans(clause)) if (qual_is_pushdown_safe(subquery, rti, clause))
{ {
/* Keep it in the upper query */ /* Push it down */
upperrestrictlist = lappend(upperrestrictlist, rinfo); subquery_push_qual(subquery, rti, clause);
} }
else else
{ {
/* Push it down */ /* Keep it in the upper query */
subquery_push_qual(subquery, rti, clause); upperrestrictlist = lappend(upperrestrictlist, rinfo);
} }
} }
rel->baserestrictinfo = upperrestrictlist; rel->baserestrictinfo = upperrestrictlist;
...@@ -527,21 +528,10 @@ make_one_rel_by_joins(Query *root, int levels_needed, List *initial_rels) ...@@ -527,21 +528,10 @@ make_one_rel_by_joins(Query *root, int levels_needed, List *initial_rels)
* *
* Conditions checked here: * Conditions checked here:
* *
* 1. If the subquery has a LIMIT clause or a DISTINCT ON clause, we must * 1. If the subquery has a LIMIT clause, we must not push down any quals,
* not push down any quals, since that could change the set of rows * since that could change the set of rows returned.
* returned. (Actually, we could push down quals into a DISTINCT ON
* subquery if they refer only to DISTINCT-ed output columns, but
* checking that seems more work than it's worth. In any case, a
* plain DISTINCT is safe to push down past.)
*
* 2. If the subquery has any functions returning sets in its target list,
* we do not push down any quals, since the quals
* might refer to those tlist items, which would mean we'd introduce
* functions-returning-sets into the subquery's WHERE/HAVING quals.
* (It'd be sufficient to not push down quals that refer to those
* particular tlist items, but that's much clumsier to check.)
* *
* 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
* quals into it, because that would change the results. For subqueries * quals into it, because that would change the results. For subqueries
* using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can push the quals * using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can push the quals
* into each component query, so long as all the component queries share * into each component query, so long as all the component queries share
...@@ -554,11 +544,8 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery) ...@@ -554,11 +544,8 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery)
{ {
SetOperationStmt *topop; SetOperationStmt *topop;
/* Check points 1 and 2 */ /* Check point 1 */
if (subquery->limitOffset != NULL || if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
subquery->limitCount != NULL ||
has_distinct_on_clause(subquery) ||
expression_returns_set((Node *) subquery->targetList))
return false; return false;
/* Are we at top level, or looking at a setop component? */ /* Are we at top level, or looking at a setop component? */
...@@ -622,6 +609,89 @@ recurse_pushdown_safe(Node *setOp, Query *topquery) ...@@ -622,6 +609,89 @@ recurse_pushdown_safe(Node *setOp, Query *topquery)
return true; return true;
} }
/*
* qual_is_pushdown_safe - is a particular qual safe to push down?
*
* qual is a restriction clause applying to the given subquery (whose RTE
* has index rti in the parent query).
*
* Conditions checked here:
*
* 1. The qual must not contain any subselects (mainly because I'm not sure
* it will work correctly: sublinks will already have been transformed into
* subplans in the qual, but not in the subquery).
*
* 2. If the subquery uses DISTINCT ON, we must not push down any quals that
* refer to non-DISTINCT output columns, because that could change the set
* of rows returned. This condition is vacuous for DISTINCT, because then
* there are no non-DISTINCT output columns, but unfortunately it's fairly
* expensive to tell the difference between DISTINCT and DISTINCT ON in the
* parsetree representation. It's cheaper to just make sure all the Vars
* in the qual refer to DISTINCT columns.
*
* 3. We must not push down any quals that refer to subselect outputs that
* return sets, else we'd introduce functions-returning-sets into the
* subquery's WHERE/HAVING quals.
*/
static bool
qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual)
{
bool safe = true;
List *vars;
List *l;
Bitmapset *tested = NULL;
/* Refuse subselects (point 1) */
if (contain_subplans(qual))
return false;
/*
* Examine all Vars used in clause; since it's a restriction clause,
* all such Vars must refer to subselect output columns.
*/
vars = pull_var_clause(qual, false);
foreach(l, vars)
{
Var *var = (Var *) lfirst(l);
TargetEntry *tle;
Assert(var->varno == rti);
/*
* We use a bitmapset to avoid testing the same attno more than
* once. (NB: this only works because subquery outputs can't
* have negative attnos.)
*/
if (bms_is_member(var->varattno, tested))
continue;
tested = bms_add_member(tested, var->varattno);
tle = (TargetEntry *) nth(var->varattno-1, subquery->targetList);
Assert(tle->resdom->resno == var->varattno);
Assert(!tle->resdom->resjunk);
/* If subquery uses DISTINCT or DISTINCT ON, check point 2 */
if (subquery->distinctClause != NIL &&
!targetIsInSortList(tle, subquery->distinctClause))
{
/* non-DISTINCT column, so fail */
safe = false;
break;
}
/* Refuse functions returning sets (point 3) */
if (expression_returns_set((Node *) tle->expr))
{
safe = false;
break;
}
}
freeList(vars);
bms_free(tested);
return safe;
}
/* /*
* subquery_push_qual - push down a qual that we have determined is safe * subquery_push_qual - push down a qual that we have determined is safe
*/ */
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.132 2003/03/14 00:55:17 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.133 2003/03/22 01:49:38 tgl Exp $
* *
* HISTORY * HISTORY
* AUTHOR DATE MAJOR EVENT * AUTHOR DATE MAJOR EVENT
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "optimizer/clauses.h" #include "optimizer/clauses.h"
#include "optimizer/var.h" #include "optimizer/var.h"
#include "parser/analyze.h" #include "parser/analyze.h"
#include "parser/parse_clause.h"
#include "tcop/tcopprot.h" #include "tcop/tcopprot.h"
#include "utils/acl.h" #include "utils/acl.h"
#include "utils/builtins.h" #include "utils/builtins.h"
...@@ -813,28 +814,6 @@ pull_constant_clauses(List *quals, List **constantQual) ...@@ -813,28 +814,6 @@ pull_constant_clauses(List *quals, List **constantQual)
* think of a better place for it... * think of a better place for it...
*****************************************************************************/ *****************************************************************************/
/*
* Test whether a sort/group reference value appears in the given list of
* SortClause (or GroupClause) nodes.
*
* Because GroupClause is typedef'd as SortClause, either kind of
* node list can be passed without casting.
*/
static bool
sortgroupref_is_present(Index sortgroupref, List *clauselist)
{
List *clause;
foreach(clause, clauselist)
{
SortClause *scl = (SortClause *) lfirst(clause);
if (scl->tleSortGroupRef == sortgroupref)
return true;
}
return false;
}
/* /*
* Test whether a query uses DISTINCT ON, ie, has a distinct-list that is * Test whether a query uses DISTINCT ON, ie, has a distinct-list that is
* not the same as the set of output columns. * not the same as the set of output columns.
...@@ -864,15 +843,14 @@ has_distinct_on_clause(Query *query) ...@@ -864,15 +843,14 @@ has_distinct_on_clause(Query *query)
foreach(targetList, query->targetList) foreach(targetList, query->targetList)
{ {
TargetEntry *tle = (TargetEntry *) lfirst(targetList); TargetEntry *tle = (TargetEntry *) lfirst(targetList);
Index ressortgroupref = tle->resdom->ressortgroupref;
if (ressortgroupref == 0) if (tle->resdom->ressortgroupref == 0)
{ {
if (tle->resdom->resjunk) if (tle->resdom->resjunk)
continue; /* we can ignore unsorted junk cols */ continue; /* we can ignore unsorted junk cols */
return true; /* definitely not in DISTINCT list */ return true; /* definitely not in DISTINCT list */
} }
if (sortgroupref_is_present(ressortgroupref, query->distinctClause)) if (targetIsInSortList(tle, query->distinctClause))
{ {
if (tle->resdom->resjunk) if (tle->resdom->resjunk)
return true; /* junk TLE in DISTINCT means DISTINCT ON */ return true; /* junk TLE in DISTINCT means DISTINCT ON */
...@@ -883,7 +861,7 @@ has_distinct_on_clause(Query *query) ...@@ -883,7 +861,7 @@ has_distinct_on_clause(Query *query)
/* This TLE is not in DISTINCT list */ /* This TLE is not in DISTINCT list */
if (!tle->resdom->resjunk) if (!tle->resdom->resjunk)
return true; /* non-junk, non-DISTINCT, so DISTINCT ON */ return true; /* non-junk, non-DISTINCT, so DISTINCT ON */
if (sortgroupref_is_present(ressortgroupref, query->sortClause)) if (targetIsInSortList(tle, query->sortClause))
return true; /* sorted, non-distinct junk */ return true; /* sorted, non-distinct junk */
/* unsorted junk is okay, keep looking */ /* unsorted junk is okay, keep looking */
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/parser/parse_clause.c,v 1.111 2003/03/10 03:53:51 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/parser/parse_clause.c,v 1.112 2003/03/22 01:49:38 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -60,7 +60,6 @@ static TargetEntry *findTargetlistEntry(ParseState *pstate, Node *node, ...@@ -60,7 +60,6 @@ static TargetEntry *findTargetlistEntry(ParseState *pstate, Node *node,
List *tlist, int clause); List *tlist, int clause);
static List *addTargetToSortList(TargetEntry *tle, List *sortlist, static List *addTargetToSortList(TargetEntry *tle, List *sortlist,
List *targetlist, List *opname); List *targetlist, List *opname);
static bool targetIsInSortList(TargetEntry *tle, List *sortList);
/* /*
...@@ -1386,7 +1385,7 @@ assignSortGroupRef(TargetEntry *tle, List *tlist) ...@@ -1386,7 +1385,7 @@ assignSortGroupRef(TargetEntry *tle, List *tlist)
* reason we need this routine (and not just a quick test for nonzeroness * reason we need this routine (and not just a quick test for nonzeroness
* of ressortgroupref) is that a TLE might be in only one of the lists. * of ressortgroupref) is that a TLE might be in only one of the lists.
*/ */
static bool bool
targetIsInSortList(TargetEntry *tle, List *sortList) targetIsInSortList(TargetEntry *tle, List *sortList)
{ {
Index ref = tle->resdom->ressortgroupref; Index ref = tle->resdom->ressortgroupref;
......
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* parse_clause.h * parse_clause.h
* * handle clauses in parser
* *
* *
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: parse_clause.h,v 1.29 2002/06/20 20:29:51 momjian Exp $ * $Id: parse_clause.h,v 1.30 2003/03/22 01:49:38 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -30,5 +30,6 @@ extern List *transformDistinctClause(ParseState *pstate, List *distinctlist, ...@@ -30,5 +30,6 @@ extern List *transformDistinctClause(ParseState *pstate, List *distinctlist,
extern List *addAllTargetsToSortList(List *sortlist, List *targetlist); extern List *addAllTargetsToSortList(List *sortlist, List *targetlist);
extern Index assignSortGroupRef(TargetEntry *tle, List *tlist); extern Index assignSortGroupRef(TargetEntry *tle, List *tlist);
extern bool targetIsInSortList(TargetEntry *tle, List *sortList);
#endif /* PARSE_CLAUSE_H */ #endif /* PARSE_CLAUSE_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment