Commit 5c74ce23 authored by Tom Lane's avatar Tom Lane

Improve UniquePath logic to detect the case where the input is already

known unique (eg, it is a SELECT DISTINCT ... subquery), and not do a
redundant unique-ification step.
parent cce442da
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.226 2004/01/05 05:07:35 tgl Exp $ * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.227 2004/01/05 18:04:38 tgl Exp $
* *
* NOTES * NOTES
* Every node type that can appear in stored rules' parsetrees *must* * Every node type that can appear in stored rules' parsetrees *must*
...@@ -1023,7 +1023,7 @@ _outUniquePath(StringInfo str, UniquePath *node) ...@@ -1023,7 +1023,7 @@ _outUniquePath(StringInfo str, UniquePath *node)
_outPathInfo(str, (Path *) node); _outPathInfo(str, (Path *) node);
WRITE_NODE_FIELD(subpath); WRITE_NODE_FIELD(subpath);
WRITE_BOOL_FIELD(use_hash); WRITE_ENUM_FIELD(umethod, UniquePathMethod);
WRITE_FLOAT_FIELD(rows, "%.0f"); WRITE_FLOAT_FIELD(rows, "%.0f");
} }
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.162 2004/01/05 05:07:35 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.163 2004/01/05 18:04:38 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -605,10 +605,14 @@ create_unique_plan(Query *root, UniquePath *best_path) ...@@ -605,10 +605,14 @@ create_unique_plan(Query *root, UniquePath *best_path)
subplan->targetlist = newtlist; subplan->targetlist = newtlist;
} }
/* Done if we don't need to do any actual unique-ifying */
if (best_path->umethod == UNIQUE_PATH_NOOP)
return subplan;
/* Copy tlist again to make one we can put sorting labels on */ /* Copy tlist again to make one we can put sorting labels on */
my_tlist = copyObject(subplan->targetlist); my_tlist = copyObject(subplan->targetlist);
if (best_path->use_hash) if (best_path->umethod == UNIQUE_PATH_HASH)
{ {
long numGroups; long numGroups;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.159 2004/01/04 03:51:52 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.160 2004/01/05 18:04:39 tgl Exp $
* *
* HISTORY * HISTORY
* AUTHOR DATE MAJOR EVENT * AUTHOR DATE MAJOR EVENT
...@@ -921,6 +921,21 @@ has_distinct_on_clause(Query *query) ...@@ -921,6 +921,21 @@ has_distinct_on_clause(Query *query)
return false; return false;
} }
/*
* Test whether a query uses simple DISTINCT, ie, has a distinct-list that
* is the same as the set of output columns.
*/
bool
has_distinct_clause(Query *query)
{
/* Is there a DISTINCT clause at all? */
if (query->distinctClause == NIL)
return false;
/* It's DISTINCT if it's not DISTINCT ON */
return !has_distinct_on_clause(query);
}
/***************************************************************************** /*****************************************************************************
* * * *
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.97 2004/01/05 05:07:35 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.98 2004/01/05 18:04:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -20,12 +20,14 @@ ...@@ -20,12 +20,14 @@
#include "executor/executor.h" #include "executor/executor.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "nodes/plannodes.h" #include "nodes/plannodes.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h" #include "optimizer/cost.h"
#include "optimizer/pathnode.h" #include "optimizer/pathnode.h"
#include "optimizer/paths.h" #include "optimizer/paths.h"
#include "optimizer/restrictinfo.h" #include "optimizer/restrictinfo.h"
#include "parser/parse_expr.h" #include "parser/parse_expr.h"
#include "parser/parse_oper.h" #include "parser/parse_oper.h"
#include "parser/parsetree.h"
#include "utils/memutils.h" #include "utils/memutils.h"
#include "utils/selfuncs.h" #include "utils/selfuncs.h"
#include "utils/syscache.h" #include "utils/syscache.h"
...@@ -546,6 +548,30 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath) ...@@ -546,6 +548,30 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
pathnode->subpath = subpath; pathnode->subpath = subpath;
/*
* If the input is a subquery that uses DISTINCT, we don't need to do
* anything; its output is already unique. (Are there any other cases
* in which we can easily prove the input must be distinct?)
*/
if (rel->rtekind == RTE_SUBQUERY)
{
RangeTblEntry *rte = rt_fetch(rel->relid, root->rtable);
Query *subquery = rte->subquery;
if (has_distinct_clause(subquery))
{
pathnode->umethod = UNIQUE_PATH_NOOP;
pathnode->rows = rel->rows;
pathnode->path.startup_cost = subpath->startup_cost;
pathnode->path.total_cost = subpath->total_cost;
pathnode->path.pathkeys = subpath->pathkeys;
rel->cheapest_unique_path = (Path *) pathnode;
return pathnode;
}
}
/* /*
* Try to identify the targetlist that will actually be unique-ified. * Try to identify the targetlist that will actually be unique-ified.
* In current usage, this routine is only used for sub-selects of IN * In current usage, this routine is only used for sub-selects of IN
...@@ -599,7 +625,7 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath) ...@@ -599,7 +625,7 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
* compare costs. We only try this if we know the targetlist for sure * compare costs. We only try this if we know the targetlist for sure
* (else we can't be sure about the datatypes involved). * (else we can't be sure about the datatypes involved).
*/ */
pathnode->use_hash = false; pathnode->umethod = UNIQUE_PATH_SORT;
if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist)) if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist))
{ {
/* /*
...@@ -617,11 +643,11 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath) ...@@ -617,11 +643,11 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
subpath->total_cost, subpath->total_cost,
rel->rows); rel->rows);
if (agg_path.total_cost < sort_path.total_cost) if (agg_path.total_cost < sort_path.total_cost)
pathnode->use_hash = true; pathnode->umethod = UNIQUE_PATH_HASH;
} }
} }
if (pathnode->use_hash) if (pathnode->umethod == UNIQUE_PATH_HASH)
{ {
pathnode->path.startup_cost = agg_path.startup_cost; pathnode->path.startup_cost = agg_path.startup_cost;
pathnode->path.total_cost = agg_path.total_cost; pathnode->path.total_cost = agg_path.total_cost;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.91 2004/01/05 05:07:36 tgl Exp $ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.92 2004/01/05 18:04:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -442,15 +442,26 @@ typedef struct MaterialPath ...@@ -442,15 +442,26 @@ typedef struct MaterialPath
* its subpath. * its subpath.
* *
* This is unlike the other Path nodes in that it can actually generate * This is unlike the other Path nodes in that it can actually generate
* two different plans: either hash-based or sort-based implementation. * different plans: either hash-based or sort-based implementation, or a
* The decision is sufficiently localized that it's not worth having two * no-op if the input path can be proven distinct already. The decision
* separate Path node types. * is sufficiently localized that it's not worth having separate Path node
* types. (Note: in the no-op case, we could eliminate the UniquePath node
* entirely and just return the subpath; but it's convenient to have a
* UniquePath in the path tree to signal upper-level routines that the input
* is known distinct.)
*/ */
typedef enum
{
UNIQUE_PATH_NOOP, /* input is known unique already */
UNIQUE_PATH_HASH, /* use hashing */
UNIQUE_PATH_SORT /* use sorting */
} UniquePathMethod;
typedef struct UniquePath typedef struct UniquePath
{ {
Path path; Path path;
Path *subpath; Path *subpath;
bool use_hash; UniquePathMethod umethod;
double rows; /* estimated number of result tuples */ double rows; /* estimated number of result tuples */
} UniquePath; } UniquePath;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.71 2004/01/04 03:51:52 tgl Exp $ * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.72 2004/01/05 18:04:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -57,6 +57,7 @@ extern bool is_pseudo_constant_clause(Node *clause); ...@@ -57,6 +57,7 @@ extern bool is_pseudo_constant_clause(Node *clause);
extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids); extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids);
extern List *pull_constant_clauses(List *quals, List **constantQual); extern List *pull_constant_clauses(List *quals, List **constantQual);
extern bool has_distinct_clause(Query *query);
extern bool has_distinct_on_clause(Query *query); extern bool has_distinct_on_clause(Query *query);
extern int NumRelids(Node *clause); extern int NumRelids(Node *clause);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment