Commit be3b265c authored by Tom Lane's avatar Tom Lane

Improve SELECT DISTINCT to consider hash aggregation, as well as sort/uniq,

as methods for implementing the DISTINCT step.  This eliminates the former
performance gap between DISTINCT and GROUP BY, and also makes it possible
to do SELECT DISTINCT on datatypes that only support hashing not sorting.

SELECT DISTINCT ON is still always implemented by sorting; it would take
executor changes to support hashing that, and it's not clear it's worth
the trouble.

This is a release-note-worthy incompatibility from previous PG versions,
since SELECT DISTINCT can no longer be counted on to deliver sorted output
without explicitly saying ORDER BY.  (Anyone who can't cope with that
can consider turning off enable_hashagg.)

Several regression test queries needed to have ORDER BY added to preserve
stable output order.  I fixed the ones that manifested here, but there
might be some other cases that show up on other platforms.
parent 4abd7b49
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.329 2008/08/02 21:31:59 tgl Exp $ * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.330 2008/08/05 02:43:17 tgl Exp $
* *
* NOTES * NOTES
* Every node type that can appear in stored rules' parsetrees *must* * Every node type that can appear in stored rules' parsetrees *must*
...@@ -1334,6 +1334,7 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node) ...@@ -1334,6 +1334,7 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node)
WRITE_NODE_FIELD(append_rel_list); WRITE_NODE_FIELD(append_rel_list);
WRITE_NODE_FIELD(query_pathkeys); WRITE_NODE_FIELD(query_pathkeys);
WRITE_NODE_FIELD(group_pathkeys); WRITE_NODE_FIELD(group_pathkeys);
WRITE_NODE_FIELD(distinct_pathkeys);
WRITE_NODE_FIELD(sort_pathkeys); WRITE_NODE_FIELD(sort_pathkeys);
WRITE_FLOAT_FIELD(total_table_pages, "%.0f"); WRITE_FLOAT_FIELD(total_table_pages, "%.0f");
WRITE_FLOAT_FIELD(tuple_fraction, "%.4f"); WRITE_FLOAT_FIELD(tuple_fraction, "%.4f");
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.108 2008/08/03 19:10:52 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.109 2008/08/05 02:43:17 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -66,9 +66,9 @@ ...@@ -66,9 +66,9 @@
* PlannerInfo field and not a passed parameter is that the low-level routines * PlannerInfo field and not a passed parameter is that the low-level routines
* in indxpath.c need to see it.) * in indxpath.c need to see it.)
* *
* Note: the PlannerInfo node also includes group_pathkeys and sort_pathkeys, * Note: the PlannerInfo node also includes group_pathkeys, distinct_pathkeys,
* which like query_pathkeys need to be canonicalized once the info is * and sort_pathkeys, which like query_pathkeys need to be canonicalized once
* available. * the info is available.
* *
* tuple_fraction is interpreted as follows: * tuple_fraction is interpreted as follows:
* 0: expect all tuples to be retrieved (normal case) * 0: expect all tuples to be retrieved (normal case)
...@@ -120,6 +120,8 @@ query_planner(PlannerInfo *root, List *tlist, ...@@ -120,6 +120,8 @@ query_planner(PlannerInfo *root, List *tlist,
root->query_pathkeys); root->query_pathkeys);
root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys = canonicalize_pathkeys(root,
root->group_pathkeys); root->group_pathkeys);
root->distinct_pathkeys = canonicalize_pathkeys(root,
root->distinct_pathkeys);
root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys = canonicalize_pathkeys(root,
root->sort_pathkeys); root->sort_pathkeys);
return; return;
...@@ -237,10 +239,12 @@ query_planner(PlannerInfo *root, List *tlist, ...@@ -237,10 +239,12 @@ query_planner(PlannerInfo *root, List *tlist,
/* /*
* We have completed merging equivalence sets, so it's now possible to * We have completed merging equivalence sets, so it's now possible to
* convert the requested query_pathkeys to canonical form. Also * convert the requested query_pathkeys to canonical form. Also
* canonicalize the groupClause and sortClause pathkeys for use later. * canonicalize the groupClause, distinctClause and sortClause pathkeys
* for use later.
*/ */
root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys); root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys); root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys);
root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys);
root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys); root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys);
/* /*
...@@ -286,9 +290,11 @@ query_planner(PlannerInfo *root, List *tlist, ...@@ -286,9 +290,11 @@ query_planner(PlannerInfo *root, List *tlist,
/* /*
* If both GROUP BY and ORDER BY are specified, we will need two * If both GROUP BY and ORDER BY are specified, we will need two
* levels of sort --- and, therefore, certainly need to read all the * levels of sort --- and, therefore, certainly need to read all the
* tuples --- unless ORDER BY is a subset of GROUP BY. * tuples --- unless ORDER BY is a subset of GROUP BY. Likewise if
* we have both DISTINCT and GROUP BY.
*/ */
if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys)) if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys) ||
!pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys))
tuple_fraction = 0.0; tuple_fraction = 0.0;
} }
else if (parse->hasAggs || root->hasHavingQual) else if (parse->hasAggs || root->hasHavingQual)
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.237 2008/08/03 19:10:52 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.238 2008/08/05 02:43:17 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -69,12 +69,17 @@ static double preprocess_limit(PlannerInfo *root, ...@@ -69,12 +69,17 @@ static double preprocess_limit(PlannerInfo *root,
int64 *offset_est, int64 *count_est); int64 *offset_est, int64 *count_est);
static void preprocess_groupclause(PlannerInfo *root); static void preprocess_groupclause(PlannerInfo *root);
static Oid *extract_grouping_ops(List *groupClause); static Oid *extract_grouping_ops(List *groupClause);
static AttrNumber *extract_grouping_cols(List *groupClause, List *tlist);
static bool grouping_is_sortable(List *groupClause); static bool grouping_is_sortable(List *groupClause);
static bool grouping_is_hashable(List *groupClause); static bool grouping_is_hashable(List *groupClause);
static bool choose_hashed_grouping(PlannerInfo *root, static bool choose_hashed_grouping(PlannerInfo *root,
double tuple_fraction, double limit_tuples, double tuple_fraction, double limit_tuples,
Path *cheapest_path, Path *sorted_path, Path *cheapest_path, Path *sorted_path,
double dNumGroups, AggClauseCounts *agg_counts); double dNumGroups, AggClauseCounts *agg_counts);
static bool choose_hashed_distinct(PlannerInfo *root,
Plan *input_plan, List *input_pathkeys,
double tuple_fraction, double limit_tuples,
double dNumDistinctRows);
static List *make_subplanTargetList(PlannerInfo *root, List *tlist, static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
AttrNumber **groupColIdx, bool *need_tlist_eval); AttrNumber **groupColIdx, bool *need_tlist_eval);
static void locate_grouping_columns(PlannerInfo *root, static void locate_grouping_columns(PlannerInfo *root,
...@@ -757,7 +762,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) ...@@ -757,7 +762,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
double limit_tuples = -1.0; double limit_tuples = -1.0;
Plan *result_plan; Plan *result_plan;
List *current_pathkeys; List *current_pathkeys;
List *sort_pathkeys;
double dNumGroups = 0; double dNumGroups = 0;
/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */ /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
...@@ -829,7 +833,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) ...@@ -829,7 +833,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* Calculate pathkeys that represent result ordering requirements * Calculate pathkeys that represent result ordering requirements
*/ */
Assert(parse->distinctClause == NIL); Assert(parse->distinctClause == NIL);
sort_pathkeys = make_pathkeys_for_sortclauses(root, root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
parse->sortClause, parse->sortClause,
tlist, tlist,
true); true);
...@@ -838,7 +842,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) ...@@ -838,7 +842,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
{ {
/* No set operations, do regular planning */ /* No set operations, do regular planning */
List *sub_tlist; List *sub_tlist;
List *group_pathkeys;
AttrNumber *groupColIdx = NULL; AttrNumber *groupColIdx = NULL;
bool need_tlist_eval = true; bool need_tlist_eval = true;
QualCost tlist_cost; QualCost tlist_cost;
...@@ -870,14 +873,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) ...@@ -870,14 +873,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
/* /*
* Calculate pathkeys that represent grouping/ordering requirements. * Calculate pathkeys that represent grouping/ordering requirements.
* Stash them in PlannerInfo so that query_planner can canonicalize * Stash them in PlannerInfo so that query_planner can canonicalize
* them after EquivalenceClasses have been formed. * them after EquivalenceClasses have been formed. The sortClause
* * is certainly sort-able, but GROUP BY and DISTINCT might not be,
* Note: for the moment, DISTINCT is always implemented via sort/uniq, * in which case we just leave their pathkeys empty.
* and we set the sort_pathkeys to be the more rigorous of the
* DISTINCT and ORDER BY requirements. This should be changed
* someday, but DISTINCT ON is a bit of a problem ...
*/ */
if (parse->groupClause && grouping_is_sortable(parse->groupClause)) if (parse->groupClause &&
grouping_is_sortable(parse->groupClause))
root->group_pathkeys = root->group_pathkeys =
make_pathkeys_for_sortclauses(root, make_pathkeys_for_sortclauses(root,
parse->groupClause, parse->groupClause,
...@@ -886,13 +887,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) ...@@ -886,13 +887,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
else else
root->group_pathkeys = NIL; root->group_pathkeys = NIL;
if (list_length(parse->distinctClause) > list_length(parse->sortClause)) if (parse->distinctClause &&
root->sort_pathkeys = grouping_is_sortable(parse->distinctClause))
root->distinct_pathkeys =
make_pathkeys_for_sortclauses(root, make_pathkeys_for_sortclauses(root,
parse->distinctClause, parse->distinctClause,
tlist, tlist,
false); false);
else else
root->distinct_pathkeys = NIL;
root->sort_pathkeys = root->sort_pathkeys =
make_pathkeys_for_sortclauses(root, make_pathkeys_for_sortclauses(root,
parse->sortClause, parse->sortClause,
...@@ -917,17 +921,27 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) ...@@ -917,17 +921,27 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
} }
/* /*
* Figure out whether we need a sorted result from query_planner. * Figure out whether we want a sorted result from query_planner.
* *
* If we have a sortable GROUP BY clause, then we want a result sorted * If we have a sortable GROUP BY clause, then we want a result sorted
* properly for grouping. Otherwise, if there is an ORDER BY clause, * properly for grouping. Otherwise, if there's a sortable DISTINCT
* we want to sort by the ORDER BY clause. (Note: if we have both, and * clause that's more rigorous than the ORDER BY clause, we try to
* ORDER BY is a superset of GROUP BY, it would be tempting to request * produce output that's sufficiently well sorted for the DISTINCT.
* sort by ORDER BY --- but that might just leave us failing to * Otherwise, if there is an ORDER BY clause, we want to sort by the
* exploit an available sort order at all. Needs more thought...) * ORDER BY clause.
*
* Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a
* superset of GROUP BY, it would be tempting to request sort by ORDER
* BY --- but that might just leave us failing to exploit an available
* sort order at all. Needs more thought. The choice for DISTINCT
* versus ORDER BY is much easier, since we know that the parser
* ensured that one is a superset of the other.
*/ */
if (root->group_pathkeys) if (root->group_pathkeys)
root->query_pathkeys = root->group_pathkeys; root->query_pathkeys = root->group_pathkeys;
else if (list_length(root->distinct_pathkeys) >
list_length(root->sort_pathkeys))
root->query_pathkeys = root->distinct_pathkeys;
else if (root->sort_pathkeys) else if (root->sort_pathkeys)
root->query_pathkeys = root->sort_pathkeys; root->query_pathkeys = root->sort_pathkeys;
else else
...@@ -942,9 +956,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) ...@@ -942,9 +956,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
query_planner(root, sub_tlist, tuple_fraction, limit_tuples, query_planner(root, sub_tlist, tuple_fraction, limit_tuples,
&cheapest_path, &sorted_path, &dNumGroups); &cheapest_path, &sorted_path, &dNumGroups);
group_pathkeys = root->group_pathkeys;
sort_pathkeys = root->sort_pathkeys;
/* /*
* If grouping, decide whether to use sorted or hashed grouping. * If grouping, decide whether to use sorted or hashed grouping.
*/ */
...@@ -1024,7 +1035,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) ...@@ -1024,7 +1035,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
/* Detect if we'll need an explicit sort for grouping */ /* Detect if we'll need an explicit sort for grouping */
if (parse->groupClause && !use_hashed_grouping && if (parse->groupClause && !use_hashed_grouping &&
!pathkeys_contained_in(group_pathkeys, current_pathkeys)) !pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
{ {
need_sort_for_grouping = true; need_sort_for_grouping = true;
/* /*
...@@ -1135,7 +1146,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) ...@@ -1135,7 +1146,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
parse->groupClause, parse->groupClause,
groupColIdx, groupColIdx,
result_plan); result_plan);
current_pathkeys = group_pathkeys; current_pathkeys = root->group_pathkeys;
} }
aggstrategy = AGG_SORTED; aggstrategy = AGG_SORTED;
...@@ -1178,7 +1189,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) ...@@ -1178,7 +1189,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
parse->groupClause, parse->groupClause,
groupColIdx, groupColIdx,
result_plan); result_plan);
current_pathkeys = group_pathkeys; current_pathkeys = root->group_pathkeys;
} }
result_plan = (Plan *) make_group(root, result_plan = (Plan *) make_group(root,
...@@ -1214,35 +1225,129 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) ...@@ -1214,35 +1225,129 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
} /* end of if (setOperations) */ } /* end of if (setOperations) */
/* /*
* If we were not able to make the plan come out in the right order, add * If there is a DISTINCT clause, add the necessary node(s).
* an explicit sort step. */
if (parse->distinctClause)
{
double dNumDistinctRows;
long numDistinctRows;
bool use_hashed_distinct;
bool can_sort;
bool can_hash;
/*
* If there was grouping or aggregation, use the current number of
* rows as the estimated number of DISTINCT rows (ie, assume the
* result was already mostly unique). If not, use the number of
* distinct-groups calculated by query_planner.
*/
if (parse->groupClause || root->hasHavingQual || parse->hasAggs)
dNumDistinctRows = result_plan->plan_rows;
else
dNumDistinctRows = dNumGroups;
/* Also convert to long int --- but 'ware overflow! */
numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);
/*
* If we have a sortable DISTINCT ON clause, we always use sorting.
* This enforces the expected behavior of DISTINCT ON.
*/ */
if (sort_pathkeys) can_sort = grouping_is_sortable(parse->distinctClause);
if (can_sort && parse->hasDistinctOn)
use_hashed_distinct = false;
else
{ {
if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys)) can_hash = grouping_is_hashable(parse->distinctClause);
if (can_hash && can_sort)
{ {
result_plan = (Plan *) make_sort_from_pathkeys(root, /* we have a meaningful choice to make ... */
result_plan, use_hashed_distinct =
sort_pathkeys, choose_hashed_distinct(root,
limit_tuples); result_plan, current_pathkeys,
current_pathkeys = sort_pathkeys; tuple_fraction, limit_tuples,
dNumDistinctRows);
}
else if (can_hash)
use_hashed_distinct = true;
else if (can_sort)
use_hashed_distinct = false;
else
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("could not implement DISTINCT"),
errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
use_hashed_distinct = false; /* keep compiler quiet */
} }
} }
if (use_hashed_distinct)
{
/* Hashed aggregate plan --- no sort needed */
result_plan = (Plan *) make_agg(root,
result_plan->targetlist,
NIL,
AGG_HASHED,
list_length(parse->distinctClause),
extract_grouping_cols(parse->distinctClause,
result_plan->targetlist),
extract_grouping_ops(parse->distinctClause),
numDistinctRows,
0,
result_plan);
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
}
else
{
/* /*
* If there is a DISTINCT clause, add the UNIQUE node. * Use a Unique node to implement DISTINCT. Add an explicit sort
* if we couldn't make the path come out the way the Unique node
* needs it. If we do have to sort, sort by the more rigorous
* of DISTINCT and ORDER BY, to avoid a second sort below.
*/ */
if (parse->distinctClause) if (!pathkeys_contained_in(root->distinct_pathkeys,
current_pathkeys))
{ {
result_plan = (Plan *) make_unique(result_plan, parse->distinctClause); if (list_length(root->distinct_pathkeys) >=
list_length(root->sort_pathkeys))
current_pathkeys = root->distinct_pathkeys;
else
{
current_pathkeys = root->sort_pathkeys;
/* Assert checks that parser didn't mess up... */
Assert(pathkeys_contained_in(root->distinct_pathkeys,
current_pathkeys));
}
result_plan = (Plan *) make_sort_from_pathkeys(root,
result_plan,
current_pathkeys,
-1.0);
}
result_plan = (Plan *) make_unique(result_plan,
parse->distinctClause);
result_plan->plan_rows = dNumDistinctRows;
/* The Unique node won't change sort ordering */
}
}
/* /*
* If there was grouping or aggregation, leave plan_rows as-is (ie, * If ORDER BY was given and we were not able to make the plan come out in
* assume the result was already mostly unique). If not, use the * the right order, add an explicit sort step.
* number of distinct-groups calculated by query_planner.
*/ */
if (!parse->groupClause && !root->hasHavingQual && !parse->hasAggs) if (parse->sortClause)
result_plan->plan_rows = dNumGroups; {
if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
{
result_plan = (Plan *) make_sort_from_pathkeys(root,
result_plan,
root->sort_pathkeys,
limit_tuples);
current_pathkeys = root->sort_pathkeys;
}
} }
/* /*
...@@ -1622,6 +1727,31 @@ extract_grouping_ops(List *groupClause) ...@@ -1622,6 +1727,31 @@ extract_grouping_ops(List *groupClause)
return groupOperators; return groupOperators;
} }
/*
* extract_grouping_cols - make an array of the grouping column resnos
* for a SortGroupClause list
*/
static AttrNumber *
extract_grouping_cols(List *groupClause, List *tlist)
{
AttrNumber *grpColIdx;
int numCols = list_length(groupClause);
int colno = 0;
ListCell *glitem;
grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
foreach(glitem, groupClause)
{
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
grpColIdx[colno++] = tle->resno;
}
return grpColIdx;
}
/* /*
* grouping_is_sortable - is it possible to implement grouping list by sorting? * grouping_is_sortable - is it possible to implement grouping list by sorting?
* *
...@@ -1680,6 +1810,7 @@ choose_hashed_grouping(PlannerInfo *root, ...@@ -1680,6 +1810,7 @@ choose_hashed_grouping(PlannerInfo *root,
double cheapest_path_rows; double cheapest_path_rows;
int cheapest_path_width; int cheapest_path_width;
Size hashentrysize; Size hashentrysize;
List *target_pathkeys;
List *current_pathkeys; List *current_pathkeys;
Path hashed_p; Path hashed_p;
Path sorted_p; Path sorted_p;
...@@ -1716,6 +1847,20 @@ choose_hashed_grouping(PlannerInfo *root, ...@@ -1716,6 +1847,20 @@ choose_hashed_grouping(PlannerInfo *root,
if (hashentrysize * dNumGroups > work_mem * 1024L) if (hashentrysize * dNumGroups > work_mem * 1024L)
return false; return false;
/*
* When we have both GROUP BY and DISTINCT, use the more-rigorous of
* DISTINCT and ORDER BY as the assumed required output sort order.
* This is an oversimplification because the DISTINCT might get
* implemented via hashing, but it's not clear that the case is common
* enough (or that our estimates are good enough) to justify trying to
* solve it exactly.
*/
if (list_length(root->distinct_pathkeys) >
list_length(root->sort_pathkeys))
target_pathkeys = root->distinct_pathkeys;
else
target_pathkeys = root->sort_pathkeys;
/* /*
* See if the estimated cost is no more than doing it the other way. While * See if the estimated cost is no more than doing it the other way. While
* avoiding the need for sorted input is usually a win, the fact that the * avoiding the need for sorted input is usually a win, the fact that the
...@@ -1737,8 +1882,8 @@ choose_hashed_grouping(PlannerInfo *root, ...@@ -1737,8 +1882,8 @@ choose_hashed_grouping(PlannerInfo *root,
cheapest_path->startup_cost, cheapest_path->total_cost, cheapest_path->startup_cost, cheapest_path->total_cost,
cheapest_path_rows); cheapest_path_rows);
/* Result of hashed agg is always unsorted */ /* Result of hashed agg is always unsorted */
if (root->sort_pathkeys) if (target_pathkeys)
cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost, cost_sort(&hashed_p, root, target_pathkeys, hashed_p.total_cost,
dNumGroups, cheapest_path_width, limit_tuples); dNumGroups, cheapest_path_width, limit_tuples);
if (sorted_path) if (sorted_path)
...@@ -1770,9 +1915,9 @@ choose_hashed_grouping(PlannerInfo *root, ...@@ -1770,9 +1915,9 @@ choose_hashed_grouping(PlannerInfo *root,
sorted_p.startup_cost, sorted_p.total_cost, sorted_p.startup_cost, sorted_p.total_cost,
cheapest_path_rows); cheapest_path_rows);
/* The Agg or Group node will preserve ordering */ /* The Agg or Group node will preserve ordering */
if (root->sort_pathkeys && if (target_pathkeys &&
!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) !pathkeys_contained_in(target_pathkeys, current_pathkeys))
cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost, cost_sort(&sorted_p, root, target_pathkeys, sorted_p.total_cost,
dNumGroups, cheapest_path_width, limit_tuples); dNumGroups, cheapest_path_width, limit_tuples);
/* /*
...@@ -1791,6 +1936,111 @@ choose_hashed_grouping(PlannerInfo *root, ...@@ -1791,6 +1936,111 @@ choose_hashed_grouping(PlannerInfo *root,
return false; return false;
} }
/*
* choose_hashed_distinct - should we use hashing for DISTINCT?
*
* This is fairly similar to choose_hashed_grouping, but there are enough
* differences that it doesn't seem worth trying to unify the two functions.
*
* But note that making the two choices independently is a bit bogus in
* itself. If the two could be combined into a single choice operation
* it'd probably be better, but that seems far too unwieldy to be practical,
* especially considering that the combination of GROUP BY and DISTINCT
* isn't very common in real queries. By separating them, we are giving
* extra preference to using a sorting implementation when a common sort key
* is available ... and that's not necessarily wrong anyway.
*
* Note: this is only applied when both alternatives are actually feasible.
*/
static bool
choose_hashed_distinct(PlannerInfo *root,
Plan *input_plan, List *input_pathkeys,
double tuple_fraction, double limit_tuples,
double dNumDistinctRows)
{
int numDistinctCols = list_length(root->parse->distinctClause);
Size hashentrysize;
List *current_pathkeys;
Path hashed_p;
Path sorted_p;
/* Prefer sorting when enable_hashagg is off */
if (!enable_hashagg)
return false;
/*
* Don't do it if it doesn't look like the hashtable will fit into
* work_mem.
*/
hashentrysize = MAXALIGN(input_plan->plan_width) + MAXALIGN(sizeof(MinimalTupleData));
if (hashentrysize * dNumDistinctRows > work_mem * 1024L)
return false;
/*
* See if the estimated cost is no more than doing it the other way. While
* avoiding the need for sorted input is usually a win, the fact that the
* output won't be sorted may be a loss; so we need to do an actual cost
* comparison.
*
* We need to consider input_plan + hashagg [+ final sort] versus
* input_plan [+ sort] + group [+ final sort] where brackets indicate
* a step that may not be needed.
*
* These path variables are dummies that just hold cost fields; we don't
* make actual Paths for these steps.
*/
cost_agg(&hashed_p, root, AGG_HASHED, 0,
numDistinctCols, dNumDistinctRows,
input_plan->startup_cost, input_plan->total_cost,
input_plan->plan_rows);
/*
* Result of hashed agg is always unsorted, so if ORDER BY is present
* we need to charge for the final sort.
*/
if (root->parse->sortClause)
cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,
dNumDistinctRows, input_plan->plan_width, limit_tuples);
/* Now for the GROUP case ... */
sorted_p.startup_cost = input_plan->startup_cost;
sorted_p.total_cost = input_plan->total_cost;
current_pathkeys = input_pathkeys;
if (!pathkeys_contained_in(root->distinct_pathkeys, current_pathkeys))
{
/* We don't want to sort twice */
if (list_length(root->distinct_pathkeys) >=
list_length(root->sort_pathkeys))
current_pathkeys = root->distinct_pathkeys;
else
current_pathkeys = root->sort_pathkeys;
cost_sort(&sorted_p, root, current_pathkeys, sorted_p.total_cost,
input_plan->plan_rows, input_plan->plan_width, -1.0);
}
cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows,
sorted_p.startup_cost, sorted_p.total_cost,
input_plan->plan_rows);
if (root->parse->sortClause &&
!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,
dNumDistinctRows, input_plan->plan_width, limit_tuples);
/*
* Now make the decision using the top-level tuple fraction. First we
* have to convert an absolute count (LIMIT) into fractional form.
*/
if (tuple_fraction >= 1.0)
tuple_fraction /= dNumDistinctRows;
if (compare_fractional_path_costs(&hashed_p, &sorted_p,
tuple_fraction) < 0)
{
/* Hashed is cheaper, so use it */
return true;
}
return false;
}
/*--------------- /*---------------
* make_subplanTargetList * make_subplanTargetList
* Generate appropriate target list when grouping is required. * Generate appropriate target list when grouping is required.
...@@ -1857,7 +2107,7 @@ make_subplanTargetList(PlannerInfo *root, ...@@ -1857,7 +2107,7 @@ make_subplanTargetList(PlannerInfo *root,
/* /*
* Otherwise, start with a "flattened" tlist (having just the vars * Otherwise, start with a "flattened" tlist (having just the vars
* mentioned in the targetlist and HAVING qual --- but not upper- level * mentioned in the targetlist and HAVING qual --- but not upper-level
* Vars; they will be replaced by Params later on). * Vars; they will be replaced by Params later on).
*/ */
sub_tlist = flatten_tlist(tlist); sub_tlist = flatten_tlist(tlist);
...@@ -1886,16 +2136,28 @@ make_subplanTargetList(PlannerInfo *root, ...@@ -1886,16 +2136,28 @@ make_subplanTargetList(PlannerInfo *root,
SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl); SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist); Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
TargetEntry *te = NULL; TargetEntry *te = NULL;
/*
* Find or make a matching sub_tlist entry. If the groupexpr
* isn't a Var, no point in searching. (Note that the parser
* won't make multiple groupClause entries for the same TLE.)
*/
if (groupexpr && IsA(groupexpr, Var))
{
ListCell *sl; ListCell *sl;
/* Find or make a matching sub_tlist entry */
foreach(sl, sub_tlist) foreach(sl, sub_tlist)
{ {
te = (TargetEntry *) lfirst(sl); TargetEntry *lte = (TargetEntry *) lfirst(sl);
if (equal(groupexpr, te->expr))
if (equal(groupexpr, lte->expr))
{
te = lte;
break; break;
} }
if (!sl) }
}
if (!te)
{ {
te = makeTargetEntry((Expr *) groupexpr, te = makeTargetEntry((Expr *) groupexpr,
list_length(sub_tlist) + 1, list_length(sub_tlist) + 1,
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.173 2008/08/03 19:10:52 tgl Exp $ * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.174 2008/08/05 02:43:17 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1447,9 +1447,6 @@ transformDistinctClause(ParseState *pstate, ...@@ -1447,9 +1447,6 @@ transformDistinctClause(ParseState *pstate,
/* /*
* Now add any remaining non-resjunk tlist items, using default * Now add any remaining non-resjunk tlist items, using default
* sort/group semantics for their data types. * sort/group semantics for their data types.
*
* XXX for now, the planner requires distinctClause to be sortable,
* so we have to insist on that here.
*/ */
foreach(tlitem, *targetlist) foreach(tlitem, *targetlist)
{ {
...@@ -1459,8 +1456,7 @@ transformDistinctClause(ParseState *pstate, ...@@ -1459,8 +1456,7 @@ transformDistinctClause(ParseState *pstate,
continue; /* ignore junk */ continue; /* ignore junk */
result = addTargetToGroupList(pstate, tle, result = addTargetToGroupList(pstate, tle,
result, *targetlist, result, *targetlist,
true, /* XXX for now */ false, true);
true);
} }
return result; return result;
...@@ -1555,8 +1551,7 @@ transformDistinctOnClause(ParseState *pstate, List *distinctlist, ...@@ -1555,8 +1551,7 @@ transformDistinctOnClause(ParseState *pstate, List *distinctlist,
errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions"))); errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
result = addTargetToGroupList(pstate, tle, result = addTargetToGroupList(pstate, tle,
result, *targetlist, result, *targetlist,
true, /* someday allow hash-only? */ false, true);
true);
} }
return result; return result;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.156 2008/04/21 20:54:15 tgl Exp $ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.157 2008/08/05 02:43:17 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -163,6 +163,7 @@ typedef struct PlannerInfo ...@@ -163,6 +163,7 @@ typedef struct PlannerInfo
* actual pathkeys afterwards */ * actual pathkeys afterwards */
List *group_pathkeys; /* groupClause pathkeys, if any */ List *group_pathkeys; /* groupClause pathkeys, if any */
List *distinct_pathkeys; /* distinctClause pathkeys, if any */
List *sort_pathkeys; /* sortClause pathkeys, if any */ List *sort_pathkeys; /* sortClause pathkeys, if any */
List *initial_rels; /* RelOptInfos we are now trying to join */ List *initial_rels; /* RelOptInfos we are now trying to join */
......
...@@ -79,7 +79,7 @@ INSERT INTO TEMP_GROUP ...@@ -79,7 +79,7 @@ INSERT INTO TEMP_GROUP
INSERT INTO TEMP_GROUP INSERT INTO TEMP_GROUP
SELECT 2, i.f1, f.f1 SELECT 2, i.f1, f.f1
FROM INT4_TBL i, FLOAT8_TBL f; FROM INT4_TBL i, FLOAT8_TBL f;
SELECT DISTINCT f1 AS two FROM TEMP_GROUP; SELECT DISTINCT f1 AS two FROM TEMP_GROUP ORDER BY 1;
two two
----- -----
1 1
......
...@@ -129,7 +129,8 @@ WHERE p1.oid != p2.oid AND ...@@ -129,7 +129,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.prorettype < p2.prorettype); (p1.prorettype < p2.prorettype)
ORDER BY 1, 2;
prorettype | prorettype prorettype | prorettype
------------+------------ ------------+------------
25 | 1043 25 | 1043
...@@ -142,7 +143,8 @@ WHERE p1.oid != p2.oid AND ...@@ -142,7 +143,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.proargtypes[0] < p2.proargtypes[0]); (p1.proargtypes[0] < p2.proargtypes[0])
ORDER BY 1, 2;
proargtypes | proargtypes proargtypes | proargtypes
-------------+------------- -------------+-------------
25 | 1042 25 | 1042
...@@ -158,7 +160,8 @@ WHERE p1.oid != p2.oid AND ...@@ -158,7 +160,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.proargtypes[1] < p2.proargtypes[1]); (p1.proargtypes[1] < p2.proargtypes[1])
ORDER BY 1, 2;
proargtypes | proargtypes proargtypes | proargtypes
-------------+------------- -------------+-------------
23 | 28 23 | 28
...@@ -173,7 +176,8 @@ WHERE p1.oid != p2.oid AND ...@@ -173,7 +176,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.proargtypes[2] < p2.proargtypes[2]); (p1.proargtypes[2] < p2.proargtypes[2])
ORDER BY 1, 2;
proargtypes | proargtypes proargtypes | proargtypes
-------------+------------- -------------+-------------
1114 | 1184 1114 | 1184
...@@ -185,7 +189,8 @@ WHERE p1.oid != p2.oid AND ...@@ -185,7 +189,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.proargtypes[3] < p2.proargtypes[3]); (p1.proargtypes[3] < p2.proargtypes[3])
ORDER BY 1, 2;
proargtypes | proargtypes proargtypes | proargtypes
-------------+------------- -------------+-------------
1114 | 1184 1114 | 1184
...@@ -197,7 +202,8 @@ WHERE p1.oid != p2.oid AND ...@@ -197,7 +202,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.proargtypes[4] < p2.proargtypes[4]); (p1.proargtypes[4] < p2.proargtypes[4])
ORDER BY 1, 2;
proargtypes | proargtypes proargtypes | proargtypes
-------------+------------- -------------+-------------
(0 rows) (0 rows)
...@@ -208,7 +214,8 @@ WHERE p1.oid != p2.oid AND ...@@ -208,7 +214,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.proargtypes[5] < p2.proargtypes[5]); (p1.proargtypes[5] < p2.proargtypes[5])
ORDER BY 1, 2;
proargtypes | proargtypes proargtypes | proargtypes
-------------+------------- -------------+-------------
(0 rows) (0 rows)
...@@ -219,7 +226,8 @@ WHERE p1.oid != p2.oid AND ...@@ -219,7 +226,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.proargtypes[6] < p2.proargtypes[6]); (p1.proargtypes[6] < p2.proargtypes[6])
ORDER BY 1, 2;
proargtypes | proargtypes proargtypes | proargtypes
-------------+------------- -------------+-------------
(0 rows) (0 rows)
...@@ -230,7 +238,8 @@ WHERE p1.oid != p2.oid AND ...@@ -230,7 +238,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.proargtypes[7] < p2.proargtypes[7]); (p1.proargtypes[7] < p2.proargtypes[7])
ORDER BY 1, 2;
proargtypes | proargtypes proargtypes | proargtypes
-------------+------------- -------------+-------------
(0 rows) (0 rows)
......
...@@ -14,7 +14,7 @@ SELECT DISTINCT two FROM tmp; ...@@ -14,7 +14,7 @@ SELECT DISTINCT two FROM tmp;
-- --
-- awk '{print $5;}' onek.data | sort -n | uniq -- awk '{print $5;}' onek.data | sort -n | uniq
-- --
SELECT DISTINCT ten FROM tmp; SELECT DISTINCT ten FROM tmp ORDER BY 1;
ten ten
----- -----
0 0
...@@ -32,7 +32,7 @@ SELECT DISTINCT ten FROM tmp; ...@@ -32,7 +32,7 @@ SELECT DISTINCT ten FROM tmp;
-- --
-- awk '{print $16;}' onek.data | sort -d | uniq -- awk '{print $16;}' onek.data | sort -d | uniq
-- --
SELECT DISTINCT string4 FROM tmp; SELECT DISTINCT string4 FROM tmp ORDER BY 1;
string4 string4
--------- ---------
AAAAxx AAAAxx
......
...@@ -183,7 +183,8 @@ SELECT p.name, name(p.hobbies) FROM person* p; ...@@ -183,7 +183,8 @@ SELECT p.name, name(p.hobbies) FROM person* p;
-- the next two queries demonstrate how functions generate bogus duplicates. -- the next two queries demonstrate how functions generate bogus duplicates.
-- this is a "feature" .. -- this is a "feature" ..
-- --
SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r; SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r
ORDER BY 1,2;
SELECT hobbies_r.name, (hobbies_r.equipment).name FROM hobbies_r; SELECT hobbies_r.name, (hobbies_r.equipment).name FROM hobbies_r;
......
...@@ -469,7 +469,8 @@ SELECT p.name, name(p.hobbies) FROM person* p; ...@@ -469,7 +469,8 @@ SELECT p.name, name(p.hobbies) FROM person* p;
-- the next two queries demonstrate how functions generate bogus duplicates. -- the next two queries demonstrate how functions generate bogus duplicates.
-- this is a "feature" .. -- this is a "feature" ..
-- --
SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r; SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r
ORDER BY 1,2;
name | name name | name
-------------+--------------- -------------+---------------
basketball | hightops basketball | hightops
......
...@@ -63,7 +63,7 @@ INSERT INTO TEMP_GROUP ...@@ -63,7 +63,7 @@ INSERT INTO TEMP_GROUP
SELECT 2, i.f1, f.f1 SELECT 2, i.f1, f.f1
FROM INT4_TBL i, FLOAT8_TBL f; FROM INT4_TBL i, FLOAT8_TBL f;
SELECT DISTINCT f1 AS two FROM TEMP_GROUP; SELECT DISTINCT f1 AS two FROM TEMP_GROUP ORDER BY 1;
SELECT f1 AS two, max(f3) AS max_float, min(f3) as min_float SELECT f1 AS two, max(f3) AS max_float, min(f3) as min_float
FROM TEMP_GROUP FROM TEMP_GROUP
......
...@@ -121,7 +121,8 @@ WHERE p1.oid != p2.oid AND ...@@ -121,7 +121,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.prorettype < p2.prorettype); (p1.prorettype < p2.prorettype)
ORDER BY 1, 2;
SELECT DISTINCT p1.proargtypes[0], p2.proargtypes[0] SELECT DISTINCT p1.proargtypes[0], p2.proargtypes[0]
FROM pg_proc AS p1, pg_proc AS p2 FROM pg_proc AS p1, pg_proc AS p2
...@@ -129,7 +130,8 @@ WHERE p1.oid != p2.oid AND ...@@ -129,7 +130,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.proargtypes[0] < p2.proargtypes[0]); (p1.proargtypes[0] < p2.proargtypes[0])
ORDER BY 1, 2;
SELECT DISTINCT p1.proargtypes[1], p2.proargtypes[1] SELECT DISTINCT p1.proargtypes[1], p2.proargtypes[1]
FROM pg_proc AS p1, pg_proc AS p2 FROM pg_proc AS p1, pg_proc AS p2
...@@ -137,7 +139,8 @@ WHERE p1.oid != p2.oid AND ...@@ -137,7 +139,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.proargtypes[1] < p2.proargtypes[1]); (p1.proargtypes[1] < p2.proargtypes[1])
ORDER BY 1, 2;
SELECT DISTINCT p1.proargtypes[2], p2.proargtypes[2] SELECT DISTINCT p1.proargtypes[2], p2.proargtypes[2]
FROM pg_proc AS p1, pg_proc AS p2 FROM pg_proc AS p1, pg_proc AS p2
...@@ -145,7 +148,8 @@ WHERE p1.oid != p2.oid AND ...@@ -145,7 +148,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.proargtypes[2] < p2.proargtypes[2]); (p1.proargtypes[2] < p2.proargtypes[2])
ORDER BY 1, 2;
SELECT DISTINCT p1.proargtypes[3], p2.proargtypes[3] SELECT DISTINCT p1.proargtypes[3], p2.proargtypes[3]
FROM pg_proc AS p1, pg_proc AS p2 FROM pg_proc AS p1, pg_proc AS p2
...@@ -153,7 +157,8 @@ WHERE p1.oid != p2.oid AND ...@@ -153,7 +157,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.proargtypes[3] < p2.proargtypes[3]); (p1.proargtypes[3] < p2.proargtypes[3])
ORDER BY 1, 2;
SELECT DISTINCT p1.proargtypes[4], p2.proargtypes[4] SELECT DISTINCT p1.proargtypes[4], p2.proargtypes[4]
FROM pg_proc AS p1, pg_proc AS p2 FROM pg_proc AS p1, pg_proc AS p2
...@@ -161,7 +166,8 @@ WHERE p1.oid != p2.oid AND ...@@ -161,7 +166,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.proargtypes[4] < p2.proargtypes[4]); (p1.proargtypes[4] < p2.proargtypes[4])
ORDER BY 1, 2;
SELECT DISTINCT p1.proargtypes[5], p2.proargtypes[5] SELECT DISTINCT p1.proargtypes[5], p2.proargtypes[5]
FROM pg_proc AS p1, pg_proc AS p2 FROM pg_proc AS p1, pg_proc AS p2
...@@ -169,7 +175,8 @@ WHERE p1.oid != p2.oid AND ...@@ -169,7 +175,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.proargtypes[5] < p2.proargtypes[5]); (p1.proargtypes[5] < p2.proargtypes[5])
ORDER BY 1, 2;
SELECT DISTINCT p1.proargtypes[6], p2.proargtypes[6] SELECT DISTINCT p1.proargtypes[6], p2.proargtypes[6]
FROM pg_proc AS p1, pg_proc AS p2 FROM pg_proc AS p1, pg_proc AS p2
...@@ -177,7 +184,8 @@ WHERE p1.oid != p2.oid AND ...@@ -177,7 +184,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.proargtypes[6] < p2.proargtypes[6]); (p1.proargtypes[6] < p2.proargtypes[6])
ORDER BY 1, 2;
SELECT DISTINCT p1.proargtypes[7], p2.proargtypes[7] SELECT DISTINCT p1.proargtypes[7], p2.proargtypes[7]
FROM pg_proc AS p1, pg_proc AS p2 FROM pg_proc AS p1, pg_proc AS p2
...@@ -185,7 +193,8 @@ WHERE p1.oid != p2.oid AND ...@@ -185,7 +193,8 @@ WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND p1.prolang = 12 AND p2.prolang = 12 AND
NOT p1.proisagg AND NOT p2.proisagg AND NOT p1.proisagg AND NOT p2.proisagg AND
(p1.proargtypes[7] < p2.proargtypes[7]); (p1.proargtypes[7] < p2.proargtypes[7])
ORDER BY 1, 2;
-- Look for functions that return type "internal" and do not have any -- Look for functions that return type "internal" and do not have any
-- "internal" argument. Such a function would be a security hole since -- "internal" argument. Such a function would be a security hole since
......
...@@ -10,12 +10,12 @@ SELECT DISTINCT two FROM tmp; ...@@ -10,12 +10,12 @@ SELECT DISTINCT two FROM tmp;
-- --
-- awk '{print $5;}' onek.data | sort -n | uniq -- awk '{print $5;}' onek.data | sort -n | uniq
-- --
SELECT DISTINCT ten FROM tmp; SELECT DISTINCT ten FROM tmp ORDER BY 1;
-- --
-- awk '{print $16;}' onek.data | sort -d | uniq -- awk '{print $16;}' onek.data | sort -d | uniq
-- --
SELECT DISTINCT string4 FROM tmp; SELECT DISTINCT string4 FROM tmp ORDER BY 1;
-- --
-- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq | -- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq |
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment