Rearrange the querytree representation of ORDER BY/GROUP BY/DISTINCT items

as per my recent proposal: 1. Fold SortClause and GroupClause into a single node type SortGroupClause. We were already relying on them to be struct-equivalent, so using two node tags wasn't accomplishing much except to get in the way of comparing items with equal(). 2. Add an "eqop" field to SortGroupClause to carry the associated equality operator. This is cheap for the parser to get at the same time it's looking up the sort operator, and storing it eliminates the need for repeated not-so-cheap lookups during planning. In future this will also let us represent GROUP/DISTINCT operations on datatypes that have hash opclasses but no btree opclasses (ie, they have equality but no natural sort order). The previous representation simply didn't work for that, since its only indicator of comparison semantics was a sort operator. 3. Add a hasDistinctOn boolean to struct Query to explicitly record whether the distinctClause came from DISTINCT or DISTINCT ON. This allows removing some complicated and not 100% bulletproof code that attempted to figure that out from the distinctClause alone. This patch doesn't in itself create any new capability, but it's necessary infrastructure for future attempts to use hash-based grouping for DISTINCT and UNION/INTERSECT/EXCEPT.

Rearrange the querytree representation of ORDER BY/GROUP BY/DISTINCT items
as per my recent proposal: 1. Fold SortClause and GroupClause into a single node type SortGroupClause. We were already relying on them to be struct-equivalent, so using two node tags wasn't accomplishing much except to get in the way of comparing items with equal(). 2. Add an "eqop" field to SortGroupClause to carry the associated equality operator. This is cheap for the parser to get at the same time it's looking up the sort operator, and storing it eliminates the need for repeated not-so-cheap lookups during planning. In future this will also let us represent GROUP/DISTINCT operations on datatypes that have hash opclasses but no btree opclasses (ie, they have equality but no natural sort order). The previous representation simply didn't work for that, since its only indicator of comparison semantics was a sort operator. 3. Add a hasDistinctOn boolean to struct Query to explicitly record whether the distinctClause came from DISTINCT or DISTINCT ON. This allows removing some complicated and not 100% bulletproof code that attempted to figure that out from the distinctClause alone. This patch doesn't in itself create any new capability, but it's necessary infrastructure for future attempts to use hash-based grouping for DISTINCT and UNION/INTERSECT/EXCEPT.
95113047 · Tom Lane · 49f001d8 · 95113047 · 95113047 · 95113047
Commit 95113047 authored Aug 02, 2008 by Tom Lane
33 changed files
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -8,7 +8,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.76 2008/06/14 18:04:33 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.77 2008/08/02 21:31:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -1335,7 +1335,7 @@ find_expr_references_walker(Node *node,
 		}
 		return false;
 	}
-	if (IsA(node, Const))
+	else if (IsA(node, Const))
 	{
 		Const	   *con = (Const *) node;
 		Oid			objoid;
@@ -1408,7 +1408,7 @@ find_expr_references_walker(Node *node,
 		}
 		return false;
 	}
-	if (IsA(node, Param))
+	else if (IsA(node, Param))
 	{
 		Param	   *param = (Param *) node;
@@ -1416,7 +1416,7 @@ find_expr_references_walker(Node *node,
 		add_object_address(OCLASS_TYPE, param->paramtype, 0,
 						   context->addrs);
 	}
-	if (IsA(node, FuncExpr))
+	else if (IsA(node, FuncExpr))
 	{
 		FuncExpr   *funcexpr = (FuncExpr *) node;
@@ -1424,7 +1424,7 @@ find_expr_references_walker(Node *node,
 						   context->addrs);
 		/* fall through to examine arguments */
 	}
-	if (IsA(node, OpExpr))
+	else if (IsA(node, OpExpr))
 	{
 		OpExpr	   *opexpr = (OpExpr *) node;
@@ -1432,7 +1432,7 @@ find_expr_references_walker(Node *node,
 						   context->addrs);
 		/* fall through to examine arguments */
 	}
-	if (IsA(node, DistinctExpr))
+	else if (IsA(node, DistinctExpr))
 	{
 		DistinctExpr *distinctexpr = (DistinctExpr *) node;
@@ -1440,7 +1440,7 @@ find_expr_references_walker(Node *node,
 						   context->addrs);
 		/* fall through to examine arguments */
 	}
-	if (IsA(node, ScalarArrayOpExpr))
+	else if (IsA(node, ScalarArrayOpExpr))
 	{
 		ScalarArrayOpExpr *opexpr = (ScalarArrayOpExpr *) node;
@@ -1448,7 +1448,7 @@ find_expr_references_walker(Node *node,
 						   context->addrs);
 		/* fall through to examine arguments */
 	}
-	if (IsA(node, NullIfExpr))
+	else if (IsA(node, NullIfExpr))
 	{
 		NullIfExpr *nullifexpr = (NullIfExpr *) node;
@@ -1456,7 +1456,7 @@ find_expr_references_walker(Node *node,
 						   context->addrs);
 		/* fall through to examine arguments */
 	}
-	if (IsA(node, Aggref))
+	else if (IsA(node, Aggref))
 	{
 		Aggref	   *aggref = (Aggref *) node;
@@ -1464,12 +1464,12 @@ find_expr_references_walker(Node *node,
 						   context->addrs);
 		/* fall through to examine arguments */
 	}
-	if (is_subplan(node))
+	else if (is_subplan(node))
 	{
 		/* Extra work needed here if we ever need this case */
 		elog(ERROR, "already-planned subqueries not supported");
 	}
-	if (IsA(node, RelabelType))
+	else if (IsA(node, RelabelType))
 	{
 		RelabelType *relab = (RelabelType *) node;
@@ -1477,7 +1477,7 @@ find_expr_references_walker(Node *node,
 		add_object_address(OCLASS_TYPE, relab->resulttype, 0,
 						   context->addrs);
 	}
-	if (IsA(node, CoerceViaIO))
+	else if (IsA(node, CoerceViaIO))
 	{
 		CoerceViaIO *iocoerce = (CoerceViaIO *) node;
@@ -1485,7 +1485,7 @@ find_expr_references_walker(Node *node,
 		add_object_address(OCLASS_TYPE, iocoerce->resulttype, 0,
 						   context->addrs);
 	}
-	if (IsA(node, ArrayCoerceExpr))
+	else if (IsA(node, ArrayCoerceExpr))
 	{
 		ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node;
@@ -1496,7 +1496,7 @@ find_expr_references_walker(Node *node,
 						   context->addrs);
 		/* fall through to examine arguments */
 	}
-	if (IsA(node, ConvertRowtypeExpr))
+	else if (IsA(node, ConvertRowtypeExpr))
 	{
 		ConvertRowtypeExpr *cvt = (ConvertRowtypeExpr *) node;
@@ -1504,14 +1504,14 @@ find_expr_references_walker(Node *node,
 		add_object_address(OCLASS_TYPE, cvt->resulttype, 0,
 						   context->addrs);
 	}
-	if (IsA(node, RowExpr))
+	else if (IsA(node, RowExpr))
 	{
 		RowExpr    *rowexpr = (RowExpr *) node;
 		add_object_address(OCLASS_TYPE, rowexpr->row_typeid, 0,
 						   context->addrs);
 	}
-	if (IsA(node, RowCompareExpr))
+	else if (IsA(node, RowCompareExpr))
 	{
 		RowCompareExpr *rcexpr = (RowCompareExpr *) node;
 		ListCell   *l;
@@ -1528,14 +1528,25 @@ find_expr_references_walker(Node *node,
 		}
 		/* fall through to examine arguments */
 	}
-	if (IsA(node, CoerceToDomain))
+	else if (IsA(node, CoerceToDomain))
 	{
 		CoerceToDomain *cd = (CoerceToDomain *) node;
 		add_object_address(OCLASS_TYPE, cd->resulttype, 0,
 						   context->addrs);
 	}
-	if (IsA(node, Query))
+	else if (IsA(node, SortGroupClause))
+	{
+		SortGroupClause *sgc = (SortGroupClause *) node;
+		add_object_address(OCLASS_OPERATOR, sgc->eqop, 0,
+						   context->addrs);
+		if (OidIsValid(sgc->sortop))
+			add_object_address(OCLASS_OPERATOR, sgc->sortop, 0,
+							   context->addrs);
+		return false;
+	}
+	else if (IsA(node, Query))
 	{
 		/* Recurse into RTE subquery or not-yet-planned sublink subquery */
 		Query	   *query = (Query *) node;
@@ -1572,6 +1583,11 @@ find_expr_references_walker(Node *node,
 			}
 		}
+		/* query_tree_walker ignores ORDER BY etc, but we need those opers */
+		find_expr_references_walker((Node *) query->sortClause, context);
+		find_expr_references_walker((Node *) query->groupClause, context);
+		find_expr_references_walker((Node *) query->distinctClause, context);
 		/* Examine substructure of query */
 		context->rtables = lcons(query->rtable, context->rtables);
 		result = query_tree_walker(query,

--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.123 2008/07/01 10:33:09 heikki Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.124 2008/08/02 21:31:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -1490,10 +1490,8 @@ static bool
 std_typanalyze(VacAttrStats *stats)
 {
 	Form_pg_attribute attr = stats->attr;
-	Operator	func_operator;
+	Oid			ltopr;
-	Oid			eqopr = InvalidOid;
+	Oid			eqopr;
-	Oid			eqfunc = InvalidOid;
-	Oid			ltopr = InvalidOid;
 	StdAnalyzeData *mystats;
 	/* If the attstattarget column is negative, use the default value */
@@ -1501,29 +1499,19 @@ std_typanalyze(VacAttrStats *stats)
 	if (attr->attstattarget < 0)
 		attr->attstattarget = default_statistics_target;
+	/* Look for default "<" and "=" operators for column's type */
+	get_sort_group_operators(attr->atttypid,
+							 false, false, false,
+							 &ltopr, &eqopr, NULL);
 	/* If column has no "=" operator, we can't do much of anything */
-	func_operator = equality_oper(attr->atttypid, true);
+	if (!OidIsValid(eqopr))
-	if (func_operator != NULL)
-	{
-		eqopr = oprid(func_operator);
-		eqfunc = oprfuncid(func_operator);
-		ReleaseSysCache(func_operator);
-	}
-	if (!OidIsValid(eqfunc))
 		return false;
-	/* Is there a "<" operator with suitable semantics? */
-	func_operator = ordering_oper(attr->atttypid, true);
-	if (func_operator != NULL)
-	{
-		ltopr = oprid(func_operator);
-		ReleaseSysCache(func_operator);
-	}
 	/* Save the operator info for compute_stats routines */
 	mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData));
 	mystats->eqopr = eqopr;
-	mystats->eqfunc = eqfunc;
+	mystats->eqfunc = get_opcode(eqopr);
 	mystats->ltopr = ltopr;
 	stats->extra_data = mystats;

--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -61,7 +61,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.158 2008/05/12 00:00:49 alvherre Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.159 2008/08/02 21:31:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -1496,7 +1496,8 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		if (aggref->aggdistinct)
 		{
-			Oid			eq_function;
+			Oid			lt_opr;
+			Oid			eq_opr;
 			/* We don't implement DISTINCT aggs in the HASHED case */
 			Assert(node->aggstrategy != AGG_HASHED);
@@ -1524,9 +1525,11 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 			 * record it in the Aggref node ... or at latest, do it in the
 			 * planner.
 			 */
-			eq_function = equality_oper_funcid(inputTypes[0]);
+			get_sort_group_operators(inputTypes[0],
-			fmgr_info(eq_function, &(peraggstate->equalfn));
+									 true, true, false,
-			peraggstate->sortOperator = ordering_oper_opid(inputTypes[0]);
+									 &lt_opr, &eq_opr, NULL);
+			fmgr_info(get_opcode(eq_opr), &(peraggstate->equalfn));
+			peraggstate->sortOperator = lt_opr;
 			peraggstate->sortstate = NULL;
 		}

--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.395 2008/07/16 01:30:22 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.396 2008/08/02 21:31:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -1542,24 +1542,13 @@ _copyFkConstraint(FkConstraint *from)
 	return newnode;
 }
-static SortClause *
+static SortGroupClause *
-_copySortClause(SortClause *from)
+_copySortGroupClause(SortGroupClause *from)
 {
-	SortClause *newnode = makeNode(SortClause);
+	SortGroupClause *newnode = makeNode(SortGroupClause);
-	COPY_SCALAR_FIELD(tleSortGroupRef);
-	COPY_SCALAR_FIELD(sortop);
-	COPY_SCALAR_FIELD(nulls_first);
-	return newnode;
-}
-static GroupClause *
-_copyGroupClause(GroupClause *from)
-{
-	GroupClause *newnode = makeNode(GroupClause);
 	COPY_SCALAR_FIELD(tleSortGroupRef);
+	COPY_SCALAR_FIELD(eqop);
 	COPY_SCALAR_FIELD(sortop);
 	COPY_SCALAR_FIELD(nulls_first);
@@ -1861,6 +1850,7 @@ _copyQuery(Query *from)
 	COPY_NODE_FIELD(intoClause);
 	COPY_SCALAR_FIELD(hasAggs);
 	COPY_SCALAR_FIELD(hasSubLinks);
+	COPY_SCALAR_FIELD(hasDistinctOn);
 	COPY_NODE_FIELD(rtable);
 	COPY_NODE_FIELD(jointree);
 	COPY_NODE_FIELD(targetList);
@@ -3586,11 +3576,8 @@ copyObject(void *from)
 		case T_RangeTblEntry:
 			retval = _copyRangeTblEntry(from);
 			break;
-		case T_SortClause:
+		case T_SortGroupClause:
-			retval = _copySortClause(from);
+			retval = _copySortGroupClause(from);
-			break;
-		case T_GroupClause:
-			retval = _copyGroupClause(from);
 			break;
 		case T_RowMarkClause:
 			retval = _copyRowMarkClause(from);

--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -18,7 +18,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.324 2008/07/16 01:30:22 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.325 2008/08/02 21:31:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -756,6 +756,7 @@ _equalQuery(Query *a, Query *b)
 	COMPARE_NODE_FIELD(intoClause);
 	COMPARE_SCALAR_FIELD(hasAggs);
 	COMPARE_SCALAR_FIELD(hasSubLinks);
+	COMPARE_SCALAR_FIELD(hasDistinctOn);
 	COMPARE_NODE_FIELD(rtable);
 	COMPARE_NODE_FIELD(jointree);
 	COMPARE_NODE_FIELD(targetList);
@@ -1885,9 +1886,10 @@ _equalRangeTblEntry(RangeTblEntry *a, RangeTblEntry *b)
 }
 static bool
-_equalSortClause(SortClause *a, SortClause *b)
+_equalSortGroupClause(SortGroupClause *a, SortGroupClause *b)
 {
 	COMPARE_SCALAR_FIELD(tleSortGroupRef);
+	COMPARE_SCALAR_FIELD(eqop);
 	COMPARE_SCALAR_FIELD(sortop);
 	COMPARE_SCALAR_FIELD(nulls_first);
@@ -2515,12 +2517,8 @@ equal(void *a, void *b)
 		case T_RangeTblEntry:
 			retval = _equalRangeTblEntry(a, b);
 			break;
-		case T_SortClause:
+		case T_SortGroupClause:
-			retval = _equalSortClause(a, b);
+			retval = _equalSortGroupClause(a, b);
-			break;
-		case T_GroupClause:
-			/* GroupClause is equivalent to SortClause */
-			retval = _equalSortClause(a, b);
 			break;
 		case T_RowMarkClause:
 			retval = _equalRowMarkClause(a, b);

--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.328 2008/07/17 16:02:12 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.329 2008/08/02 21:31:59 tgl Exp $
 *
 * NOTES
 *	  Every node type that can appear in stored rules' parsetrees *must*
@@ -1732,6 +1732,7 @@ _outQuery(StringInfo str, Query *node)
 	WRITE_NODE_FIELD(intoClause);
 	WRITE_BOOL_FIELD(hasAggs);
 	WRITE_BOOL_FIELD(hasSubLinks);
+	WRITE_BOOL_FIELD(hasDistinctOn);
 	WRITE_NODE_FIELD(rtable);
 	WRITE_NODE_FIELD(jointree);
 	WRITE_NODE_FIELD(targetList);
@@ -1747,21 +1748,12 @@ _outQuery(StringInfo str, Query *node)
 }
 static void
-_outSortClause(StringInfo str, SortClause *node)
+_outSortGroupClause(StringInfo str, SortGroupClause *node)
 {
-	WRITE_NODE_TYPE("SORTCLAUSE");
+	WRITE_NODE_TYPE("SORTGROUPCLAUSE");
-	WRITE_UINT_FIELD(tleSortGroupRef);
-	WRITE_OID_FIELD(sortop);
-	WRITE_BOOL_FIELD(nulls_first);
-}
-static void
-_outGroupClause(StringInfo str, GroupClause *node)
-{
-	WRITE_NODE_TYPE("GROUPCLAUSE");
 	WRITE_UINT_FIELD(tleSortGroupRef);
+	WRITE_OID_FIELD(eqop);
 	WRITE_OID_FIELD(sortop);
 	WRITE_BOOL_FIELD(nulls_first);
 }
@@ -2398,11 +2390,8 @@ _outNode(StringInfo str, void *obj)
 			case T_Query:
 				_outQuery(str, obj);
 				break;
-			case T_SortClause:
+			case T_SortGroupClause:
-				_outSortClause(str, obj);
+				_outSortGroupClause(str, obj);
-				break;
-			case T_GroupClause:
-				_outGroupClause(str, obj);
 				break;
 			case T_RowMarkClause:
 				_outRowMarkClause(str, obj);

--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.210 2008/01/01 19:45:50 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.211 2008/08/02 21:31:59 tgl Exp $
 *
 * NOTES
 *	  Path and Plan nodes do not have any readfuncs support, because we
@@ -142,6 +142,7 @@ _readQuery(void)
 	READ_NODE_FIELD(intoClause);
 	READ_BOOL_FIELD(hasAggs);
 	READ_BOOL_FIELD(hasSubLinks);
+	READ_BOOL_FIELD(hasDistinctOn);
 	READ_NODE_FIELD(rtable);
 	READ_NODE_FIELD(jointree);
 	READ_NODE_FIELD(targetList);
@@ -187,29 +188,15 @@ _readDeclareCursorStmt(void)
 }
 /*
- * _readSortClause
+ * _readSortGroupClause
 */
-static SortClause *
+static SortGroupClause *
-_readSortClause(void)
+_readSortGroupClause(void)
 {
-	READ_LOCALS(SortClause);
+	READ_LOCALS(SortGroupClause);
-	READ_UINT_FIELD(tleSortGroupRef);
-	READ_OID_FIELD(sortop);
-	READ_BOOL_FIELD(nulls_first);
-	READ_DONE();
-}
-/*
- * _readGroupClause
- */
-static GroupClause *
-_readGroupClause(void)
-{
-	READ_LOCALS(GroupClause);
 	READ_UINT_FIELD(tleSortGroupRef);
+	READ_OID_FIELD(eqop);
 	READ_OID_FIELD(sortop);
 	READ_BOOL_FIELD(nulls_first);
@@ -1030,10 +1017,8 @@ parseNodeString(void)
 	if (MATCH("QUERY", 5))
 		return_value = _readQuery();
-	else if (MATCH("SORTCLAUSE", 10))
+	else if (MATCH("SORTGROUPCLAUSE", 15))
-		return_value = _readSortClause();
+		return_value = _readSortGroupClause();
-	else if (MATCH("GROUPCLAUSE", 11))
-		return_value = _readGroupClause();
 	else if (MATCH("ROWMARKCLAUSE", 13))
 		return_value = _readRowMarkClause();
 	else if (MATCH("SETOPERATIONSTMT", 16))

--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
-$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.46 2008/04/09 01:00:46 momjian Exp $
+$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.47 2008/08/02 21:31:59 tgl Exp $
 Optimizer
 =========
@@ -563,8 +563,8 @@ competing Paths are equivalently sorted.
 Pathkeys are also useful to represent an ordering that we wish to achieve,
 since they are easily compared to the pathkeys of a potential candidate
-path.  So, SortClause lists are turned into pathkeys lists for use inside
+path.  So, SortGroupClause lists are turned into pathkeys lists for use
-the optimizer.
+inside the optimizer.
 Because we have to generate pathkeys lists from the sort clauses before
 we've finished EquivalenceClass merging, we cannot use the pointer-equality

--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.171 2008/06/27 03:56:55 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.172 2008/08/02 21:31:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -963,11 +963,12 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
 *
 * 4. If the subquery uses DISTINCT ON, we must not push down any quals that
 * refer to non-DISTINCT output columns, because that could change the set
- * of rows returned.  This condition is vacuous for DISTINCT, because then
+ * of rows returned.  (This condition is vacuous for DISTINCT, because then
- * there are no non-DISTINCT output columns, but unfortunately it's fairly
+ * there are no non-DISTINCT output columns, so we needn't check.  But note
- * expensive to tell the difference between DISTINCT and DISTINCT ON in the
+ * we are assuming that the qual can't distinguish values that the DISTINCT
- * parsetree representation.  It's cheaper to just make sure all the Vars
+ * operator sees as equal.  This is a bit shaky but we have no way to test
- * in the qual refer to DISTINCT columns.
+ * for the case, and it's unlikely enough that we shouldn't refuse the
+ * optimization just because it could theoretically happen.)
 *
 * 5. We must not push down any quals that refer to subselect outputs that
 * return sets, else we'd introduce functions-returning-sets into the
@@ -1030,8 +1031,8 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
 		Assert(tle != NULL);
 		Assert(!tle->resjunk);
-		/* If subquery uses DISTINCT or DISTINCT ON, check point 4 */
+		/* If subquery uses DISTINCT ON, check point 4 */
-		if (subquery->distinctClause != NIL &&
+		if (subquery->hasDistinctOn &&
 			!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
 		{
 			/* non-DISTINCT column, so fail */

--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -10,7 +10,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.10 2008/03/31 16:59:26 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.11 2008/08/02 21:31:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -355,7 +355,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
 *	  class it is a member of; if none, build a new single-member
 *	  EquivalenceClass for it.
 *
- * sortref is the SortGroupRef of the originating SortClause, if any,
+ * sortref is the SortGroupRef of the originating SortGroupClause, if any,
 * or zero if not.
 *
 * This can be used safely both before and after EquivalenceClass merging;

--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -11,7 +11,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.93 2008/01/09 20:42:28 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.94 2008/08/02 21:31:59 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -227,8 +227,8 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys)
 *	  a PathKey.  If canonicalize = true, the result is a "canonical"
 *	  PathKey, otherwise not.  (But note it might be redundant anyway.)
 *
- * If the PathKey is being generated from a SortClause, sortref should be
+ * If the PathKey is being generated from a SortGroupClause, sortref should be
- * the SortClause's SortGroupRef; otherwise zero.
+ * the SortGroupClause's SortGroupRef; otherwise zero.
 *
 * canonicalize should always be TRUE after EquivalenceClass merging has
 * been performed, but FALSE if we haven't done EquivalenceClass merging yet.
@@ -823,7 +823,7 @@ build_join_pathkeys(PlannerInfo *root,
 /*
 * make_pathkeys_for_sortclauses
 *		Generate a pathkeys list that represents the sort order specified
- *		by a list of SortClauses (GroupClauses will work too!)
+ *		by a list of SortGroupClauses
 *
 * If canonicalize is TRUE, the resulting PathKeys are all in canonical form;
 * otherwise not.  canonicalize should always be TRUE after EquivalenceClass
@@ -832,7 +832,7 @@ build_join_pathkeys(PlannerInfo *root,
 * be able to represent requested pathkeys before the equivalence classes have
 * been created for the query.)
 *
- * 'sortclauses' is a list of SortClause or GroupClause nodes
+ * 'sortclauses' is a list of SortGroupClause nodes
 * 'tlist' is the targetlist to find the referenced tlist entries in
 */
 List *
@@ -846,11 +846,12 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
 	foreach(l, sortclauses)
 	{
-		SortClause *sortcl = (SortClause *) lfirst(l);
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
 		Expr	   *sortkey;
 		PathKey    *pathkey;
 		sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
+		Assert(OidIsValid(sortcl->sortop));
 		pathkey = make_pathkey_from_sortinfo(root,
 											 sortkey,
 											 sortcl->sortop,

--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.241 2008/06/27 03:56:55 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.242 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -760,7 +760,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 			Oid			in_oper = lfirst_oid(l);
 			Oid			sortop;
 			TargetEntry *tle;
-			SortClause *sortcl;
+			SortGroupClause *sortcl;
 			sortop = get_ordering_op_for_equality_op(in_oper, false);
 			if (!OidIsValid(sortop))	/* shouldn't happen */
@@ -769,9 +769,10 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 			tle = get_tle_by_resno(subplan->targetlist,
 								   groupColIdx[groupColPos]);
 			Assert(tle != NULL);
-			sortcl = makeNode(SortClause);
+			sortcl = makeNode(SortGroupClause);
 			sortcl->tleSortGroupRef = assignSortGroupRef(tle,
 														 subplan->targetlist);
+			sortcl->eqop = in_oper;
 			sortcl->sortop = sortop;
 			sortcl->nulls_first = false;
 			sortList = lappend(sortList, sortcl);
@@ -2531,6 +2532,8 @@ add_sort_column(AttrNumber colIdx, Oid sortOp, bool nulls_first,
 {
 	int			i;
+	Assert(OidIsValid(sortOp));
 	for (i = 0; i < numCols; i++)
 	{
 		/*
@@ -2753,7 +2756,7 @@ make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys,
 * make_sort_from_sortclauses
 *	  Create sort plan to sort according to given sortclauses
 *
- *	  'sortcls' is a list of SortClauses
+ *	  'sortcls' is a list of SortGroupClauses
 *	  'lefttree' is the node which yields input tuples
 */
 Sort *
@@ -2778,7 +2781,7 @@ make_sort_from_sortclauses(PlannerInfo *root, List *sortcls, Plan *lefttree)
 	foreach(l, sortcls)
 	{
-		SortClause *sortcl = (SortClause *) lfirst(l);
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
 		TargetEntry *tle = get_sortgroupclause_tle(sortcl, sub_tlist);
 		/*
@@ -2802,14 +2805,14 @@ make_sort_from_sortclauses(PlannerInfo *root, List *sortcls, Plan *lefttree)
 * make_sort_from_groupcols
 *	  Create sort plan to sort based on grouping columns
 *
- * 'groupcls' is the list of GroupClauses
+ * 'groupcls' is the list of SortGroupClauses
 * 'grpColIdx' gives the column numbers to use
 *
 * This might look like it could be merged with make_sort_from_sortclauses,
 * but presently we *must* use the grpColIdx[] array to locate sort columns,
 * because the child plan's tlist is not marked with ressortgroupref info
 * appropriate to the grouping node.  So, only the sort ordering info
- * is used from the GroupClause entries.
+ * is used from the SortGroupClause entries.
 */
 Sort *
 make_sort_from_groupcols(PlannerInfo *root,
@@ -2837,7 +2840,7 @@ make_sort_from_groupcols(PlannerInfo *root,
 	foreach(l, groupcls)
 	{
-		GroupClause *grpcl = (GroupClause *) lfirst(l);
+		SortGroupClause *grpcl = (SortGroupClause *) lfirst(l);
 		TargetEntry *tle = get_tle_by_resno(sub_tlist, grpColIdx[grpno]);
 		/*
@@ -3038,7 +3041,7 @@ make_group(PlannerInfo *root,
 }
 /*
- * distinctList is a list of SortClauses, identifying the targetlist items
+ * distinctList is a list of SortGroupClauses, identifying the targetlist items
 * that should be considered by the Unique filter.	The input path must
 * already be sorted accordingly.
 */
@@ -3074,7 +3077,7 @@ make_unique(Plan *lefttree, List *distinctList)
 	plan->righttree = NULL;
 	/*
-	 * convert SortClause list into arrays of attr indexes and equality
+	 * convert SortGroupClause list into arrays of attr indexes and equality
 	 * operators, as wanted by executor
 	 */
 	Assert(numCols > 0);
@@ -3083,14 +3086,12 @@ make_unique(Plan *lefttree, List *distinctList)
 	foreach(slitem, distinctList)
 	{
-		SortClause *sortcl = (SortClause *) lfirst(slitem);
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
 		TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
 		uniqColIdx[keyno] = tle->resno;
-		uniqOperators[keyno] = get_equality_op_for_ordering_op(sortcl->sortop);
+		uniqOperators[keyno] = sortcl->eqop;
-		if (!OidIsValid(uniqOperators[keyno]))	/* shouldn't happen */
+		Assert(OidIsValid(uniqOperators[keyno]));
-			elog(ERROR, "could not find equality operator for ordering operator %u",
-				 sortcl->sortop);
 		keyno++;
 	}
@@ -3102,8 +3103,8 @@ make_unique(Plan *lefttree, List *distinctList)
 }
 /*
- * distinctList is a list of SortClauses, identifying the targetlist items
+ * distinctList is a list of SortGroupClauses, identifying the targetlist
- * that should be considered by the SetOp filter.  The input path must
+ * items that should be considered by the SetOp filter.  The input path must
 * already be sorted accordingly.
 */
 SetOp *
@@ -3140,7 +3141,7 @@ make_setop(SetOpCmd cmd, Plan *lefttree,
 	plan->righttree = NULL;
 	/*
-	 * convert SortClause list into arrays of attr indexes and equality
+	 * convert SortGroupClause list into arrays of attr indexes and equality
 	 * operators, as wanted by executor
 	 */
 	Assert(numCols > 0);
@@ -3149,14 +3150,12 @@ make_setop(SetOpCmd cmd, Plan *lefttree,
 	foreach(slitem, distinctList)
 	{
-		SortClause *sortcl = (SortClause *) lfirst(slitem);
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
 		TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
 		dupColIdx[keyno] = tle->resno;
-		dupOperators[keyno] = get_equality_op_for_ordering_op(sortcl->sortop);
+		dupOperators[keyno] = sortcl->eqop;
-		if (!OidIsValid(dupOperators[keyno]))	/* shouldn't happen */
+		Assert(OidIsValid(dupOperators[keyno]));
-			elog(ERROR, "could not find equality operator for ordering operator %u",
-				 sortcl->sortop);
 		keyno++;
 	}

--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.41 2008/07/10 02:14:03 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.42 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -477,7 +477,7 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info)
 	Plan	   *plan;
 	Plan	   *iplan;
 	TargetEntry *tle;
-	SortClause *sortcl;
+	SortGroupClause *sortcl;
 	/*
 	 * Generate a suitably modified query.	Much of the work here is probably
@@ -492,6 +492,7 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info)
 	subparse->utilityStmt = NULL;
 	subparse->intoClause = NULL;
 	subparse->hasAggs = false;
+	subparse->hasDistinctOn = false;
 	subparse->groupClause = NIL;
 	subparse->havingQual = NULL;
 	subparse->distinctClause = NIL;
@@ -505,8 +506,12 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info)
 	subparse->targetList = list_make1(tle);
 	/* set up the appropriate ORDER BY entry */
-	sortcl = makeNode(SortClause);
+	sortcl = makeNode(SortGroupClause);
 	sortcl->tleSortGroupRef = assignSortGroupRef(tle, subparse->targetList);
+	sortcl->eqop = get_equality_op_for_ordering_op(info->aggsortop, NULL);
+	if (!OidIsValid(sortcl->eqop))		/* shouldn't happen */
+		elog(ERROR, "could not find equality operator for ordering operator %u",
+			 info->aggsortop);
 	sortcl->sortop = info->aggsortop;
 	sortcl->nulls_first = info->nulls_first;
 	subparse->sortClause = list_make1(sortcl);

--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.235 2008/07/31 22:47:56 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.236 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -67,6 +67,7 @@ static bool is_dummy_plan(Plan *plan);
 static double preprocess_limit(PlannerInfo *root,
 				 double tuple_fraction,
 				 int64 *offset_est, int64 *count_est);
+static void preprocess_groupclause(PlannerInfo *root);
 static Oid *extract_grouping_ops(List *groupClause);
 static bool choose_hashed_grouping(PlannerInfo *root,
 					   double tuple_fraction, double limit_tuples,
@@ -846,11 +847,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		Path	   *best_path;
 		long		numGroups = 0;
 		AggClauseCounts agg_counts;
-		int			numGroupCols = list_length(parse->groupClause);
+		int			numGroupCols;
 		bool		use_hashed_grouping = false;
 		MemSet(&agg_counts, 0, sizeof(AggClauseCounts));
+		/* Preprocess GROUP BY clause, if any */
+		if (parse->groupClause)
+			preprocess_groupclause(root);
+		numGroupCols = list_length(parse->groupClause);
 		/* Preprocess targetlist */
 		tlist = preprocess_targetlist(root, tlist);
@@ -1476,6 +1482,88 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction,
 	return tuple_fraction;
 }
+/*
+ * preprocess_groupclause - do preparatory work on GROUP BY clause
+ *
+ * The idea here is to adjust the ordering of the GROUP BY elements
+ * (which in itself is semantically insignificant) to match ORDER BY,
+ * thereby allowing a single sort operation to both implement the ORDER BY
+ * requirement and set up for a Unique step that implements GROUP BY.
+ *
+ * In principle it might be interesting to consider other orderings of the
+ * GROUP BY elements, which could match the sort ordering of other
+ * possible plans (eg an indexscan) and thereby reduce cost.  We don't
+ * bother with that, though.  Hashed grouping will frequently win anyway.
+ */
+static void
+preprocess_groupclause(PlannerInfo *root)
+{
+	Query	   *parse = root->parse;
+	List	   *new_groupclause;
+	bool		partial_match;
+	ListCell   *sl;
+	ListCell   *gl;
+	/* If no ORDER BY, nothing useful to do here anyway */
+	if (parse->sortClause == NIL)
+		return;
+	/*
+	 * Scan the ORDER BY clause and construct a list of matching GROUP BY
+	 * items, but only as far as we can make a matching prefix.
+	 *
+	 * This code assumes that the sortClause contains no duplicate items.
+	 */
+	new_groupclause = NIL;
+	foreach(sl, parse->sortClause)
+	{
+		SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
+		foreach(gl, parse->groupClause)
+		{
+			SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
+			if (equal(gc, sc))
+			{
+				new_groupclause = lappend(new_groupclause, gc);
+				break;
+			}
+		}
+		if (gl == NULL)
+			break;				/* no match, so stop scanning */
+	}
+	/* Did we match all of the ORDER BY list, or just some of it? */
+	partial_match = (sl != NULL);
+	/* If no match at all, no point in reordering GROUP BY */
+	if (new_groupclause == NIL)
+		return;
+	/*
+	 * Add any remaining GROUP BY items to the new list, but only if we
+	 * were able to make a complete match.  In other words, we only
+	 * rearrange the GROUP BY list if the result is that one list is a
+	 * prefix of the other --- otherwise there's no possibility of a
+	 * common sort.
+	 */
+	foreach(gl, parse->groupClause)
+	{
+		SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
+		if (list_member_ptr(new_groupclause, gc))
+			continue;			/* it matched an ORDER BY item */
+		if (partial_match)
+			return;				/* give up, no common sort possible */
+		new_groupclause = lappend(new_groupclause, gc);
+	}
+	/* Success --- install the rearranged GROUP BY list */
+	Assert(list_length(parse->groupClause) == list_length(new_groupclause));
+	parse->groupClause = new_groupclause;
+}
 /*
 * extract_grouping_ops - make an array of the equality operator OIDs
 *		for the GROUP BY clause
@@ -1492,12 +1580,10 @@ extract_grouping_ops(List *groupClause)
 	foreach(glitem, groupClause)
 	{
-		GroupClause *groupcl = (GroupClause *) lfirst(glitem);
+		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
-		groupOperators[colno] = get_equality_op_for_ordering_op(groupcl->sortop);
+		groupOperators[colno] = groupcl->eqop;
-		if (!OidIsValid(groupOperators[colno])) /* shouldn't happen */
+		Assert(OidIsValid(groupOperators[colno]));
-			elog(ERROR, "could not find equality operator for ordering operator %u",
-				 groupcl->sortop);
 		colno++;
 	}
@@ -1738,7 +1824,7 @@ make_subplanTargetList(PlannerInfo *root,
 		foreach(gl, parse->groupClause)
 		{
-			GroupClause *grpcl = (GroupClause *) lfirst(gl);
+			SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
 			Node	   *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
 			TargetEntry *te = NULL;
 			ListCell   *sl;
@@ -1797,7 +1883,7 @@ locate_grouping_columns(PlannerInfo *root,
 	foreach(gl, root->parse->groupClause)
 	{
-		GroupClause *grpcl = (GroupClause *) lfirst(gl);
+		SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
 		Node	   *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
 		TargetEntry *te = NULL;
 		ListCell   *sl;

--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -22,7 +22,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.148 2008/07/31 22:47:56 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.149 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -98,7 +98,7 @@ static List *adjust_inherited_tlist(List *tlist,
 * zero means "all the tuples will be fetched".  Any LIMIT present at the
 * top level has already been factored into tuple_fraction.
 *
- * *sortClauses is an output argument: it is set to a list of SortClauses
+ * *sortClauses is an output argument: it is set to a list of SortGroupClauses
 * representing the result ordering of the topmost set operation.
 */
 Plan *
@@ -152,7 +152,7 @@ plan_set_operations(PlannerInfo *root, double tuple_fraction,
 * junkOK: if true, child resjunk columns may be left in the result
 * flag: if >= 0, add a resjunk output column indicating value of flag
 * refnames_tlist: targetlist to take column names from
- * *sortClauses: receives list of SortClauses for result plan, if any
+ * *sortClauses: receives list of SortGroupClauses for result plan, if any
 *
 * We don't have to care about typmods here: the only allowed difference
 * between set-op input and output typmods is input is a specific typmod
@@ -678,8 +678,11 @@ generate_append_tlist(List *colTypes, bool flag,
 /*
 * generate_setop_sortlist
- *		Build a SortClause list enumerating all the non-resjunk tlist entries,
+ *		Build a SortGroupClause list enumerating all the non-resjunk
- *		using default ordering properties.
+ *		tlist entries, using default ordering properties.
+ *
+ * For now, we require all the items to be sortable.  Eventually we
+ * should implement hashing setops and allow hash-only datatypes.
 */
 static List *
 generate_setop_sortlist(List *targetlist)
@@ -692,11 +695,10 @@ generate_setop_sortlist(List *targetlist)
 		TargetEntry *tle = (TargetEntry *) lfirst(l);
 		if (!tle->resjunk)
-			sortlist = addTargetToSortList(NULL, tle,
+			sortlist = addTargetToGroupList(NULL, tle,
 											sortlist, targetlist,
-										   SORTBY_DEFAULT,
+											true, /* XXX fixme someday */
-										   SORTBY_NULLS_DEFAULT,
+											false);
-										   NIL, false);
 	}
 	return sortlist;
 }

--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.259 2008/05/15 17:37:49 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.260 2008/08/02 21:32:00 tgl Exp $
 *
 * HISTORY
 *	  AUTHOR			DATE			MAJOR EVENT
@@ -1334,85 +1334,6 @@ is_pseudo_constant_clause_relids(Node *clause, Relids relids)
 }
-/*****************************************************************************
- *		Tests on clauses of queries
- *
- * Possibly this code should go someplace else, since this isn't quite the
- * same meaning of "clause" as is used elsewhere in this module.  But I can't
- * think of a better place for it...
- *****************************************************************************/
-/*
- * Test whether a query uses DISTINCT ON, ie, has a distinct-list that is
- * not the same as the set of output columns.
- */
-bool
-has_distinct_on_clause(Query *query)
-{
-	ListCell   *l;
-	/* Is there a DISTINCT clause at all? */
-	if (query->distinctClause == NIL)
-		return false;
-	/*
-	 * If the DISTINCT list contains all the nonjunk targetlist items, and
-	 * nothing else (ie, no junk tlist items), then it's a simple DISTINCT,
-	 * else it's DISTINCT ON.  We do not require the lists to be in the same
-	 * order (since the parser may have adjusted the DISTINCT clause ordering
-	 * to agree with ORDER BY).  Furthermore, a non-DISTINCT junk tlist item
-	 * that is in the sortClause is also evidence of DISTINCT ON, since we
-	 * don't allow ORDER BY on junk tlist items when plain DISTINCT is used.
-	 *
-	 * This code assumes that the DISTINCT list is valid, ie, all its entries
-	 * match some entry of the tlist.
-	 */
-	foreach(l, query->targetList)
-	{
-		TargetEntry *tle = (TargetEntry *) lfirst(l);
-		if (tle->ressortgroupref == 0)
-		{
-			if (tle->resjunk)
-				continue;		/* we can ignore unsorted junk cols */
-			return true;		/* definitely not in DISTINCT list */
-		}
-		if (targetIsInSortList(tle, InvalidOid, query->distinctClause))
-		{
-			if (tle->resjunk)
-				return true;	/* junk TLE in DISTINCT means DISTINCT ON */
-			/* else this TLE is okay, keep looking */
-		}
-		else
-		{
-			/* This TLE is not in DISTINCT list */
-			if (!tle->resjunk)
-				return true;	/* non-junk, non-DISTINCT, so DISTINCT ON */
-			if (targetIsInSortList(tle, InvalidOid, query->sortClause))
-				return true;	/* sorted, non-distinct junk */
-			/* unsorted junk is okay, keep looking */
-		}
-	}
-	/* It's a simple DISTINCT */
-	return false;
-}
-/*
- * Test whether a query uses simple DISTINCT, ie, has a distinct-list that
- * is the same as the set of output columns.
- */
-bool
-has_distinct_clause(Query *query)
-{
-	/* Is there a DISTINCT clause at all? */
-	if (query->distinctClause == NIL)
-		return false;
-	/* It's DISTINCT if it's not DISTINCT ON */
-	return !has_distinct_on_clause(query);
-}
 /*****************************************************************************
 *																			 *
 *		General clause-manipulating routines								 *

--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.143 2008/04/21 20:54:15 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.144 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -935,8 +935,10 @@ translate_sub_tlist(List *tlist, int relid)
 * corresponding upper-level equality operators listed in opids would think
 * the values are distinct.  (Note: the opids entries could be cross-type
 * operators, and thus not exactly the equality operators that the subquery
- * would use itself.  We assume that the subquery is compatible if these
+ * would use itself.  We use equality_ops_are_compatible() to check
- * operators appear in the same btree opfamily as the ones the subquery uses.)
+ * compatibility.  That looks at btree or hash opfamily membership, and so
+ * should give trustworthy answers for all operators that we might need
+ * to deal with here.)
 */
 static bool
 query_is_distinct_for(Query *query, List *colnos, List *opids)
@@ -955,13 +957,13 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 	{
 		foreach(l, query->distinctClause)
 		{
-			SortClause *scl = (SortClause *) lfirst(l);
+			SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
-			TargetEntry *tle = get_sortgroupclause_tle(scl,
+			TargetEntry *tle = get_sortgroupclause_tle(sgc,
 													   query->targetList);
 			opid = distinct_col_search(tle->resno, colnos, opids);
 			if (!OidIsValid(opid) ||
-				!ops_in_same_btree_opfamily(opid, scl->sortop))
+				!equality_ops_are_compatible(opid, sgc->eqop))
 				break;			/* exit early if no match */
 		}
 		if (l == NULL)			/* had matches for all? */
@@ -976,13 +978,13 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 	{
 		foreach(l, query->groupClause)
 		{
-			GroupClause *grpcl = (GroupClause *) lfirst(l);
+			SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
-			TargetEntry *tle = get_sortgroupclause_tle(grpcl,
+			TargetEntry *tle = get_sortgroupclause_tle(sgc,
 													   query->targetList);
 			opid = distinct_col_search(tle->resno, colnos, opids);
 			if (!OidIsValid(opid) ||
-				!ops_in_same_btree_opfamily(opid, grpcl->sortop))
+				!equality_ops_are_compatible(opid, sgc->eqop))
 				break;			/* exit early if no match */
 		}
 		if (l == NULL)			/* had matches for all? */
@@ -1002,10 +1004,11 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 	 * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
 	 * except with ALL.
 	 *
-	 * XXX this code knows that prepunion.c will adopt the default ordering
+	 * XXX this code knows that prepunion.c will adopt the default sort/group
-	 * operator for each column datatype as the sortop.  It'd probably be
+	 * operators for each column datatype to determine uniqueness.  It'd
-	 * better if these operators were chosen at parse time and stored into the
+	 * probably be better if these operators were chosen at parse time and
-	 * parsetree, instead of leaving bits of the planner to decide semantics.
+	 * stored into the parsetree, instead of leaving bits of the planner to
+	 * decide semantics.
 	 */
 	if (query->setOperations)
 	{
@@ -1020,14 +1023,20 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
 			foreach(l, query->targetList)
 			{
 				TargetEntry *tle = (TargetEntry *) lfirst(l);
+				Oid		tle_eq_opr;
 				if (tle->resjunk)
 					continue;	/* ignore resjunk columns */
 				opid = distinct_col_search(tle->resno, colnos, opids);
-				if (!OidIsValid(opid) ||
+				if (!OidIsValid(opid))
-					!ops_in_same_btree_opfamily(opid,
+					break;		/* exit early if no match */
-						   ordering_oper_opid(exprType((Node *) tle->expr))))
+				/* check for compatible semantics */
+				get_sort_group_operators(exprType((Node *) tle->expr),
+										 false, false, false,
+										 NULL, &tle_eq_opr, NULL);
+				if (!OidIsValid(tle_eq_opr) ||
+					!equality_ops_are_compatible(opid, tle_eq_opr))
 					break;		/* exit early if no match */
 			}
 			if (l == NULL)		/* had matches for all? */

--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.78 2008/01/01 19:45:50 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.79 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -156,49 +156,43 @@ get_sortgroupref_tle(Index sortref, List *targetList)
 /*
 * get_sortgroupclause_tle
- *		Find the targetlist entry matching the given SortClause
+ *		Find the targetlist entry matching the given SortGroupClause
- *		(or GroupClause) by ressortgroupref, and return it.
+ *		by ressortgroupref, and return it.
- *
- * Because GroupClause is typedef'd as SortClause, either kind of
- * node can be passed without casting.
 */
 TargetEntry *
-get_sortgroupclause_tle(SortClause *sortClause,
+get_sortgroupclause_tle(SortGroupClause *sgClause,
 						List *targetList)
 {
-	return get_sortgroupref_tle(sortClause->tleSortGroupRef, targetList);
+	return get_sortgroupref_tle(sgClause->tleSortGroupRef, targetList);
 }
 /*
 * get_sortgroupclause_expr
- *		Find the targetlist entry matching the given SortClause
+ *		Find the targetlist entry matching the given SortGroupClause
- *		(or GroupClause) by ressortgroupref, and return its expression.
+ *		by ressortgroupref, and return its expression.
- *
- * Because GroupClause is typedef'd as SortClause, either kind of
- * node can be passed without casting.
 */
 Node *
-get_sortgroupclause_expr(SortClause *sortClause, List *targetList)
+get_sortgroupclause_expr(SortGroupClause *sgClause, List *targetList)
 {
-	TargetEntry *tle = get_sortgroupclause_tle(sortClause, targetList);
+	TargetEntry *tle = get_sortgroupclause_tle(sgClause, targetList);
 	return (Node *) tle->expr;
 }
 /*
 * get_sortgrouplist_exprs
- *		Given a list of SortClauses (or GroupClauses), build a list
+ *		Given a list of SortGroupClauses, build a list
 *		of the referenced targetlist expressions.
 */
 List *
-get_sortgrouplist_exprs(List *sortClauses, List *targetList)
+get_sortgrouplist_exprs(List *sgClauses, List *targetList)
 {
 	List	   *result = NIL;
 	ListCell   *l;
-	foreach(l, sortClauses)
+	foreach(l, sgClauses)
 	{
-		SortClause *sortcl = (SortClause *) lfirst(l);
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
 		Node	   *sortexpr;
 		sortexpr = get_sortgroupclause_expr(sortcl, targetList);

--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -17,7 +17,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- *	$PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.374 2008/07/31 22:47:56 tgl Exp $
+ *	$PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.375 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -724,10 +724,28 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
 											&qry->targetList,
 											qry->sortClause);
+	if (stmt->distinctClause == NIL)
+	{
+		qry->distinctClause = NIL;
+		qry->hasDistinctOn = false;
+	}
+	else if (linitial(stmt->distinctClause) == NULL)
+	{
+		/* We had SELECT DISTINCT */
 		qry->distinctClause = transformDistinctClause(pstate,
+													  &qry->targetList,
+													  qry->sortClause);
+		qry->hasDistinctOn = false;
+	}
+	else
+	{
+		/* We had SELECT DISTINCT ON */
+		qry->distinctClause = transformDistinctOnClause(pstate,
 														stmt->distinctClause,
 														&qry->targetList,
 														qry->sortClause);
+		qry->hasDistinctOn = true;
+	}
 	/* transform LIMIT */
 	qry->limitOffset = transformLimitClause(pstate, stmt->limitOffset,

--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.79 2008/01/01 19:45:50 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.80 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -131,7 +131,7 @@ parseCheckAggregates(ParseState *pstate, Query *qry)
 	 */
 	foreach(l, qry->groupClause)
 	{
-		GroupClause *grpcl = (GroupClause *) lfirst(l);
+		SortGroupClause *grpcl = (SortGroupClause *) lfirst(l);
 		Node	   *expr;
 		expr = get_sortgroupclause_expr(grpcl, qry->targetList);

--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
--- a/src/backend/parser/parse_oper.c
+++ b/src/backend/parser/parse_oper.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/parse_oper.c,v 1.102 2008/04/22 01:34:34 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/parse_oper.c,v 1.103 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -160,239 +160,93 @@ LookupOperNameTypeNames(ParseState *pstate, List *opername,
 }
 /*
- * equality_oper - identify a suitable equality operator for a datatype
+ * get_sort_group_operators - get default sorting/grouping operators for type
 *
- * On failure, return NULL if noError, else report a standard error
+ * We fetch the "<", "=", and ">" operators all at once to reduce lookup
- */
+ * overhead (knowing that most callers will be interested in at least two).
-Operator
+ * However, a given datatype might have only an "=" operator, if it is
-equality_oper(Oid argtype, bool noError)
+ * hashable but not sortable.  (Other combinations of present and missing
-{
+ * operators shouldn't happen, unless the system catalogs are messed up.)
-	TypeCacheEntry *typentry;
-	Oid			oproid;
-	Operator	optup;
-	/*
-	 * Look for an "=" operator for the datatype.  We require it to be an
-	 * exact or binary-compatible match, since most callers are not prepared
-	 * to cope with adding any run-time type coercion steps.
-	 */
-	typentry = lookup_type_cache(argtype, TYPECACHE_EQ_OPR);
-	oproid = typentry->eq_opr;
-	/*
-	 * If the datatype is an array, then we can use array_eq ... but only if
-	 * there is a suitable equality operator for the element type. (This check
-	 * is not in the raw typcache.c code ... should it be?)
-	 */
-	if (oproid == ARRAY_EQ_OP)
-	{
-		Oid			elem_type = get_element_type(argtype);
-		if (OidIsValid(elem_type))
-		{
-			optup = equality_oper(elem_type, true);
-			if (optup != NULL)
-				ReleaseSysCache(optup);
-			else
-				oproid = InvalidOid;	/* element type has no "=" */
-		}
-		else
-			oproid = InvalidOid;	/* bogus array type? */
-	}
-	if (OidIsValid(oproid))
-	{
-		optup = SearchSysCache(OPEROID,
-							   ObjectIdGetDatum(oproid),
-							   0, 0, 0);
-		if (optup == NULL)		/* should not fail */
-			elog(ERROR, "cache lookup failed for operator %u", oproid);
-		return optup;
-	}
-	if (!noError)
-		ereport(ERROR,
-				(errcode(ERRCODE_UNDEFINED_FUNCTION),
-				 errmsg("could not identify an equality operator for type %s",
-						format_type_be(argtype))));
-	return NULL;
-}
-/*
- * ordering_oper - identify a suitable sorting operator ("<") for a datatype
 *
- * On failure, return NULL if noError, else report a standard error
+ * If an operator is missing and the corresponding needXX flag is true,
- */
+ * throw a standard error message, else return InvalidOid.
-Operator
-ordering_oper(Oid argtype, bool noError)
-{
-	TypeCacheEntry *typentry;
-	Oid			oproid;
-	Operator	optup;
-	/*
-	 * Look for a "<" operator for the datatype.  We require it to be an exact
-	 * or binary-compatible match, since most callers are not prepared to cope
-	 * with adding any run-time type coercion steps.
 *
-	 * Note: the search algorithm used by typcache.c ensures that if a "<"
+ * Callers can pass NULL pointers for any results they don't care to get.
-	 * operator is returned, it will be consistent with the "=" operator
-	 * returned by equality_oper.  This is critical for sorting and grouping
-	 * purposes.
-	 */
-	typentry = lookup_type_cache(argtype, TYPECACHE_LT_OPR);
-	oproid = typentry->lt_opr;
-	/*
-	 * If the datatype is an array, then we can use array_lt ... but only if
-	 * there is a suitable less-than operator for the element type. (This
-	 * check is not in the raw typcache.c code ... should it be?)
-	 */
-	if (oproid == ARRAY_LT_OP)
-	{
-		Oid			elem_type = get_element_type(argtype);
-		if (OidIsValid(elem_type))
-		{
-			optup = ordering_oper(elem_type, true);
-			if (optup != NULL)
-				ReleaseSysCache(optup);
-			else
-				oproid = InvalidOid;	/* element type has no "<" */
-		}
-		else
-			oproid = InvalidOid;	/* bogus array type? */
-	}
-	if (OidIsValid(oproid))
-	{
-		optup = SearchSysCache(OPEROID,
-							   ObjectIdGetDatum(oproid),
-							   0, 0, 0);
-		if (optup == NULL)		/* should not fail */
-			elog(ERROR, "cache lookup failed for operator %u", oproid);
-		return optup;
-	}
-	if (!noError)
-		ereport(ERROR,
-				(errcode(ERRCODE_UNDEFINED_FUNCTION),
-				 errmsg("could not identify an ordering operator for type %s",
-						format_type_be(argtype)),
-		 errhint("Use an explicit ordering operator or modify the query.")));
-	return NULL;
-}
-/*
- * reverse_ordering_oper - identify DESC sort operator (">") for a datatype
 *
- * On failure, return NULL if noError, else report a standard error
+ * Note: the results are guaranteed to be exact or binary-compatible matches,
+ * since most callers are not prepared to cope with adding any run-time type
+ * coercion steps.
 */
-Operator
+void
-reverse_ordering_oper(Oid argtype, bool noError)
+get_sort_group_operators(Oid argtype,
+						 bool needLT, bool needEQ, bool needGT,
+						 Oid *ltOpr, Oid *eqOpr, Oid *gtOpr)
 {
 	TypeCacheEntry *typentry;
-	Oid			oproid;
+	Oid			lt_opr;
-	Operator	optup;
+	Oid			eq_opr;
+	Oid			gt_opr;
 	/*
-	 * Look for a ">" operator for the datatype.  We require it to be an exact
+	 * Look up the operators using the type cache.
-	 * or binary-compatible match, since most callers are not prepared to cope
-	 * with adding any run-time type coercion steps.
 	 *
-	 * Note: the search algorithm used by typcache.c ensures that if a ">"
+	 * Note: the search algorithm used by typcache.c ensures that the results
-	 * operator is returned, it will be consistent with the "=" operator
+	 * are consistent, ie all from the same opclass.
-	 * returned by equality_oper.  This is critical for sorting and grouping
-	 * purposes.
 	 */
-	typentry = lookup_type_cache(argtype, TYPECACHE_GT_OPR);
+	typentry = lookup_type_cache(argtype,
-	oproid = typentry->gt_opr;
+					TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR | TYPECACHE_GT_OPR);
+	lt_opr = typentry->lt_opr;
+	eq_opr = typentry->eq_opr;
+	gt_opr = typentry->gt_opr;
 	/*
-	 * If the datatype is an array, then we can use array_gt ... but only if
+	 * If the datatype is an array, then we can use array_lt and friends ...
-	 * there is a suitable greater-than operator for the element type. (This
+	 * but only if there are suitable operators for the element type.  (This
-	 * check is not in the raw typcache.c code ... should it be?)
+	 * check is not in the raw typcache.c code ... should it be?)  Testing
+	 * all three operator IDs here should be redundant.
 	 */
-	if (oproid == ARRAY_GT_OP)
+	if (lt_opr == ARRAY_LT_OP ||
+		eq_opr == ARRAY_EQ_OP ||
+		gt_opr == ARRAY_GT_OP)
 	{
 		Oid			elem_type = get_element_type(argtype);
 		if (OidIsValid(elem_type))
 		{
-			optup = reverse_ordering_oper(elem_type, true);
+			typentry = lookup_type_cache(elem_type,
-			if (optup != NULL)
+					TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR | TYPECACHE_GT_OPR);
-				ReleaseSysCache(optup);
+			if (!OidIsValid(typentry->lt_opr))
-			else
+				lt_opr = InvalidOid;	/* element type has no "<" */
-				oproid = InvalidOid;	/* element type has no ">" */
+			if (!OidIsValid(typentry->eq_opr))
+				eq_opr = InvalidOid;	/* element type has no "=" */
+			if (!OidIsValid(typentry->gt_opr))
+				gt_opr = InvalidOid;	/* element type has no ">" */
 		}
 		else
-			oproid = InvalidOid;	/* bogus array type? */
+			lt_opr = eq_opr = gt_opr = InvalidOid;	/* bogus array type? */
 	}
-	if (OidIsValid(oproid))
+	/* Report errors if needed */
-	{
+	if ((needLT && !OidIsValid(lt_opr)) ||
-		optup = SearchSysCache(OPEROID,
+		(needGT && !OidIsValid(gt_opr)))
-							   ObjectIdGetDatum(oproid),
-							   0, 0, 0);
-		if (optup == NULL)		/* should not fail */
-			elog(ERROR, "cache lookup failed for operator %u", oproid);
-		return optup;
-	}
-	if (!noError)
 		ereport(ERROR,
 				(errcode(ERRCODE_UNDEFINED_FUNCTION),
 				 errmsg("could not identify an ordering operator for type %s",
 						format_type_be(argtype)),
 		 errhint("Use an explicit ordering operator or modify the query.")));
-	return NULL;
+	if (needEQ && !OidIsValid(eq_opr))
-}
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_FUNCTION),
-/*
+				 errmsg("could not identify an equality operator for type %s",
- * equality_oper_funcid - convenience routine for oprfuncid(equality_oper())
+						format_type_be(argtype))));
- */
-Oid
-equality_oper_funcid(Oid argtype)
-{
-	Operator	optup;
-	Oid			result;
-	optup = equality_oper(argtype, false);
-	result = oprfuncid(optup);
-	ReleaseSysCache(optup);
-	return result;
-}
-/*
- * ordering_oper_opid - convenience routine for oprid(ordering_oper())
- *
- * This was formerly called any_ordering_op()
- */
-Oid
-ordering_oper_opid(Oid argtype)
-{
-	Operator	optup;
-	Oid			result;
-	optup = ordering_oper(argtype, false);
-	result = oprid(optup);
-	ReleaseSysCache(optup);
-	return result;
-}
-/*
- * reverse_ordering_oper_opid - convenience routine for oprid(reverse_ordering_oper())
- */
-Oid
-reverse_ordering_oper_opid(Oid argtype)
-{
-	Operator	optup;
-	Oid			result;
-	optup = reverse_ordering_oper(argtype, false);
+	/* Return results as needed */
-	result = oprid(optup);
+	if (ltOpr)
-	ReleaseSysCache(optup);
+		*ltOpr = lt_opr;
-	return result;
+	if (eqOpr)
+		*eqOpr = eq_opr;
+	if (gtOpr)
+		*gtOpr = gt_opr;
 }

--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.278 2008/07/18 03:32:52 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.279 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -157,7 +157,7 @@ static void get_target_list(List *targetList, deparse_context *context,
 static void get_setop_query(Node *setOp, Query *query,
 				deparse_context *context,
 				TupleDesc resultDesc);
-static Node *get_rule_sortgroupclause(SortClause *srt, List *tlist,
+static Node *get_rule_sortgroupclause(SortGroupClause *srt, List *tlist,
 						 bool force_colno,
 						 deparse_context *context);
 static char *get_variable(Var *var, int levelsup, bool showstar,
@@ -2056,7 +2056,7 @@ get_select_query_def(Query *query, deparse_context *context,
 		sep = "";
 		foreach(l, query->sortClause)
 		{
-			SortClause *srt = (SortClause *) lfirst(l);
+			SortGroupClause *srt = (SortGroupClause *) lfirst(l);
 			Node	   *sortexpr;
 			Oid			sortcoltype;
 			TypeCacheEntry *typentry;
@@ -2178,13 +2178,13 @@ get_basic_select_query(Query *query, deparse_context *context,
 	/* Add the DISTINCT clause if given */
 	if (query->distinctClause != NIL)
 	{
-		if (has_distinct_on_clause(query))
+		if (query->hasDistinctOn)
 		{
 			appendStringInfo(buf, " DISTINCT ON (");
 			sep = "";
 			foreach(l, query->distinctClause)
 			{
-				SortClause *srt = (SortClause *) lfirst(l);
+				SortGroupClause *srt = (SortGroupClause *) lfirst(l);
 				appendStringInfoString(buf, sep);
 				get_rule_sortgroupclause(srt, query->targetList,
@@ -2219,7 +2219,7 @@ get_basic_select_query(Query *query, deparse_context *context,
 		sep = "";
 		foreach(l, query->groupClause)
 		{
-			GroupClause *grp = (GroupClause *) lfirst(l);
+			SortGroupClause *grp = (SortGroupClause *) lfirst(l);
 			appendStringInfoString(buf, sep);
 			get_rule_sortgroupclause(grp, query->targetList,
@@ -2398,7 +2398,7 @@ get_setop_query(Node *setOp, Query *query, deparse_context *context,
 * Also returns the expression tree, so caller need not find it again.
 */
 static Node *
-get_rule_sortgroupclause(SortClause *srt, List *tlist, bool force_colno,
+get_rule_sortgroupclause(SortGroupClause *srt, List *tlist, bool force_colno,
 						 deparse_context *context)
 {
 	StringInfo	buf = context->buf;

--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.158 2008/07/30 17:05:04 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.159 2008/08/02 21:32:00 tgl Exp $
 *
 * NOTES
 *	  Eventually, the index information should go through here, too.
@@ -254,11 +254,14 @@ get_compare_function_for_ordering_op(Oid opno, Oid *cmpfunc, bool *reverse)
 *		Get the OID of the datatype-specific btree equality operator
 *		associated with an ordering operator (a "<" or ">" operator).
 *
+ * If "reverse" isn't NULL, also set *reverse to FALSE if the operator is "<",
+ * TRUE if it's ">"
+ *
 * Returns InvalidOid if no matching equality operator can be found.
 * (This indicates that the operator is not a valid ordering operator.)
 */
 Oid
-get_equality_op_for_ordering_op(Oid opno)
+get_equality_op_for_ordering_op(Oid opno, bool *reverse)
 {
 	Oid			result = InvalidOid;
 	Oid			opfamily;
@@ -274,6 +277,8 @@ get_equality_op_for_ordering_op(Oid opno)
 									 opcintype,
 									 opcintype,
 									 BTEqualStrategyNumber);
+		if (reverse)
+			*reverse = (strategy == BTGreaterStrategyNumber);
 	}
 	return result;
@@ -666,38 +671,51 @@ get_op_btree_interpretation(Oid opno, List **opfamilies, List **opstrats)
 }
 /*
- * ops_in_same_btree_opfamily
+ * equality_ops_are_compatible
- *		Return TRUE if there exists a btree opfamily containing both operators.
+ *		Return TRUE if the two given equality operators have compatible
- *		(This implies that they have compatible notions of equality.)
+ *		semantics.
+ *
+ * This is trivially true if they are the same operator.  Otherwise,
+ * we look to see if they can be found in the same btree or hash opfamily.
+ * Either finding allows us to assume that they have compatible notions
+ * of equality.  (The reason we need to do these pushups is that one might
+ * be a cross-type operator; for instance int24eq vs int4eq.)
 */
 bool
-ops_in_same_btree_opfamily(Oid opno1, Oid opno2)
+equality_ops_are_compatible(Oid opno1, Oid opno2)
 {
-	bool		result = false;
+	bool		result;
 	CatCList   *catlist;
 	int			i;
+	/* Easy if they're the same operator */
+	if (opno1 == opno2)
+		return true;
 	/*
 	 * We search through all the pg_amop entries for opno1.
 	 */
 	catlist = SearchSysCacheList(AMOPOPID, 1,
 								 ObjectIdGetDatum(opno1),
 								 0, 0, 0);
+	result = false;
 	for (i = 0; i < catlist->n_members; i++)
 	{
 		HeapTuple	op_tuple = &catlist->members[i]->tuple;
 		Form_pg_amop op_form = (Form_pg_amop) GETSTRUCT(op_tuple);
-		/* must be btree */
+		/* must be btree or hash */
-		if (op_form->amopmethod != BTREE_AM_OID)
+		if (op_form->amopmethod == BTREE_AM_OID ||
-			continue;
+			op_form->amopmethod == HASH_AM_OID)
+		{
 			if (op_in_opfamily(opno2, op_form->amopfamily))
 			{
 				result = true;
 				break;
 			}
 		}
+	}
 	ReleaseSysCacheList(catlist);

--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.473 2008/07/30 19:35:13 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.474 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -53,6 +53,6 @@
 */
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200807301
+#define CATALOG_VERSION_NO	200808011
 #endif
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.206 2008/03/20 21:42:48 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.207 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -336,8 +336,7 @@ typedef enum NodeTag
 	T_Constraint,
 	T_DefElem,
 	T_RangeTblEntry,
-	T_SortClause,
+	T_SortGroupClause,
-	T_GroupClause,
 	T_FkConstraint,
 	T_PrivGrantee,
 	T_FuncWithArgs,

--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.369 2008/07/31 22:47:56 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.370 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -108,6 +108,7 @@ typedef struct Query
 	bool		hasAggs;		/* has aggregates in tlist or havingQual */
 	bool		hasSubLinks;	/* has subquery SubLink */
+	bool		hasDistinctOn;	/* distinctClause is from DISTINCT ON */
 	List	   *rtable;			/* list of range table entries */
 	FromExpr   *jointree;		/* table join tree (FROM and WHERE clauses) */
@@ -116,13 +117,13 @@ typedef struct Query
 	List	   *returningList;	/* return-values list (of TargetEntry) */
-	List	   *groupClause;	/* a list of GroupClause's */
+	List	   *groupClause;	/* a list of SortGroupClause's */
 	Node	   *havingQual;		/* qualifications applied to groups */
-	List	   *distinctClause; /* a list of SortClause's */
+	List	   *distinctClause; /* a list of SortGroupClause's */
-	List	   *sortClause;		/* a list of SortClause's */
+	List	   *sortClause;		/* a list of SortGroupClause's */
 	Node	   *limitOffset;	/* # of result tuples to skip (int8 expr) */
 	Node	   *limitCount;		/* # of result tuples to return (int8 expr) */
@@ -607,47 +608,58 @@ typedef struct RangeTblEntry
 } RangeTblEntry;
 /*
- * SortClause -
+ * SortGroupClause -
- *	   representation of ORDER BY clauses
+ *	   representation of ORDER BY, GROUP BY, DISTINCT, DISTINCT ON items
+ *
+ * You might think that ORDER BY is only interested in defining ordering,
+ * and GROUP/DISTINCT are only interested in defining equality.  However,
+ * one way to implement grouping is to sort and then apply a "uniq"-like
+ * filter.  So it's also interesting to keep track of possible sort operators
+ * for GROUP/DISTINCT, and in particular to try to sort for the grouping
+ * in a way that will also yield a requested ORDER BY ordering.  So we need
+ * to be able to compare ORDER BY and GROUP/DISTINCT lists, which motivates
+ * the decision to give them the same representation.
 *
 * tleSortGroupRef must match ressortgroupref of exactly one entry of the
- * associated targetlist; that is the expression to be sorted (or grouped) by.
+ *		query's targetlist; that is the expression to be sorted or grouped by.
- * sortop is the OID of the ordering operator (a "<" or ">" operator).
+ * eqop is the OID of the equality operator.
- * nulls_first means about what you'd expect.
+ * sortop is the OID of the ordering operator (a "<" or ">" operator),
+ *		or InvalidOid if not available.
+ * nulls_first means about what you'd expect.  If sortop is InvalidOid
+ *		then nulls_first is meaningless and should be set to false.
+ *
+ * In an ORDER BY item, all fields must be valid.  (The eqop isn't essential
+ * here, but it's cheap to get it along with the sortop, and requiring it
+ * to be valid eases comparisons to grouping items.)
+ *
+ * In a grouping item, eqop must be valid.  If the eqop is a btree equality
+ * operator, then sortop should be set to a compatible ordering operator.
+ * We prefer to set eqop/sortop/nulls_first to match any ORDER BY item that
+ * the query presents for the same tlist item.  If there is none, we just
+ * use the default ordering op for the datatype.
 *
- * SortClauses are also used to identify targets that we will do a "Unique"
+ * If the tlist item's type has a hash opclass but no btree opclass, then
- * filter step on (for SELECT DISTINCT and SELECT DISTINCT ON).  The
+ * we will set eqop to the hash equality operator, sortop to InvalidOid,
- * distinctClause list is a list of SortClauses for the expressions to be
+ * and nulls_first to false.  A grouping item of this kind can only be
- * unique-ified.  (As per comment for GroupClause, this overspecifies the
+ * implemented by hashing, and of course it'll never match an ORDER BY item.
- * semantics.)  In SELECT DISTINCT, the distinctClause list is typically
+ *
- * longer than the ORDER BY list, while in SELECT DISTINCT ON it's typically
+ * A query might have both ORDER BY and DISTINCT (or DISTINCT ON) clauses.
- * shorter.  The two lists must match up to the end of the shorter one ---
+ * In SELECT DISTINCT, the distinctClause list is as long or longer than the
- * the parser rearranges the distinctClause if necessary to make this true.
+ * sortClause list, while in SELECT DISTINCT ON it's typically shorter.
- * (This restriction ensures that only one sort step is needed to both
+ * The two lists must match up to the end of the shorter one --- the parser
- * satisfy the ORDER BY and set up for the Unique step.  This is semantically
+ * rearranges the distinctClause if necessary to make this true.  (This
- * necessary for DISTINCT ON, and offers no real drawback for DISTINCT.)
+ * restriction ensures that only one sort step is needed to both satisfy the
+ * ORDER BY and set up for the Unique step.  This is semantically necessary
+ * for DISTINCT ON, and presents no real drawback for DISTINCT.)
 */
-typedef struct SortClause
+typedef struct SortGroupClause
 {
 	NodeTag		type;
 	Index		tleSortGroupRef;	/* reference into targetlist */
-	Oid			sortop;			/* the ordering operator ('<' op) */
+	Oid			eqop;				/* the equality operator ('=' op) */
+	Oid			sortop;				/* the ordering operator ('<' op), or 0 */
 	bool		nulls_first;		/* do NULLs come before normal values? */
-} SortClause;
+} SortGroupClause;
-/*
- * GroupClause -
- *	   representation of GROUP BY clauses
- *
- * GroupClause is exactly like SortClause except for the nodetag value.
- * We have routines that operate interchangeably on both.
- *
- * XXX SortClause overspecifies the semantics so far as GROUP BY is concerned
- * (ditto for DISTINCT).  It'd be better to specify an equality operator not
- * an ordering operator.  However, the two implementations are tightly entwined
- * at the moment ... breaking them apart is work for another day.
- */
-typedef SortClause GroupClause;
 /*
 * RowMarkClause -

--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -10,7 +10,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/nodes/primnodes.h,v 1.137 2008/01/01 19:45:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/primnodes.h,v 1.138 2008/08/02 21:32:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -969,14 +969,14 @@ typedef struct CurrentOfExpr
 * a specific system-generated name (such as "ctid") or NULL; anything else
 * risks confusing ExecGetJunkAttribute!
 *
- * ressortgroupref is used in the representation of ORDER BY and
+ * ressortgroupref is used in the representation of ORDER BY, GROUP BY, and
- * GROUP BY items.	Targetlist entries with ressortgroupref=0 are not
+ * DISTINCT items.  Targetlist entries with ressortgroupref=0 are not
- * sort/group items.  If ressortgroupref>0, then this item is an ORDER BY or
+ * sort/group items.  If ressortgroupref>0, then this item is an ORDER BY,
- * GROUP BY value.	No two entries in a targetlist may have the same nonzero
+ * GROUP BY, and/or DISTINCT target value.  No two entries in a targetlist
- * ressortgroupref --- but there is no particular meaning to the nonzero
+ * may have the same nonzero ressortgroupref --- but there is no particular
- * values, except as tags.	(For example, one must not assume that lower
+ * meaning to the nonzero values, except as tags.  (For example, one must
- * ressortgroupref means a more significant sort key.)	The order of the
+ * not assume that lower ressortgroupref means a more significant sort key.)
- * associated SortClause or GroupClause lists determine the semantics.
+ * The order of the associated SortGroupClause lists determine the semantics.
 *
 * resorigtbl/resorigcol identify the source of the column, if it is a
 * simple reference to a column of a base table (or view).	If it is not

--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.90 2008/04/01 00:48:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.91 2008/08/02 21:32:01 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -63,9 +63,6 @@ extern Relids find_nonnullable_rels(Node *clause);
 extern bool is_pseudo_constant_clause(Node *clause);
 extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids);
-extern bool has_distinct_clause(Query *query);
-extern bool has_distinct_on_clause(Query *query);
 extern int	NumRelids(Node *clause);
 extern void CommuteOpExpr(OpExpr *clause);

--- a/src/include/optimizer/tlist.h
+++ b/src/include/optimizer/tlist.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.49 2008/01/01 19:45:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.50 2008/08/02 21:32:01 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -25,11 +25,11 @@ extern List *add_to_flat_tlist(List *tlist, List *vars);
 extern TargetEntry *get_sortgroupref_tle(Index sortref,
 					 List *targetList);
-extern TargetEntry *get_sortgroupclause_tle(SortClause *sortClause,
+extern TargetEntry *get_sortgroupclause_tle(SortGroupClause *sgClause,
 						List *targetList);
-extern Node *get_sortgroupclause_expr(SortClause *sortClause,
+extern Node *get_sortgroupclause_expr(SortGroupClause *sgClause,
 						 List *targetList);
-extern List *get_sortgrouplist_exprs(List *sortClauses,
+extern List *get_sortgrouplist_exprs(List *sgClauses,
 						List *targetList);
 extern bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK);

--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.50 2008/07/31 22:47:56 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.51 2008/08/02 21:32:01 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -30,13 +30,14 @@ extern List *transformGroupClause(ParseState *pstate, List *grouplist,
 					 List **targetlist, List *sortClause);
 extern List *transformSortClause(ParseState *pstate, List *orderlist,
 					List **targetlist, bool resolveUnknown);
-extern List *transformDistinctClause(ParseState *pstate, List *distinctlist,
+extern List *transformDistinctClause(ParseState *pstate,
+						List **targetlist, List *sortClause);
+extern List *transformDistinctOnClause(ParseState *pstate, List *distinctlist,
 						List **targetlist, List *sortClause);
-extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle,
+extern List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
-					List *sortlist, List *targetlist,
+					 List *grouplist, List *targetlist,
-					SortByDir sortby_dir, SortByNulls sortby_nulls,
+					 bool requireSortOp, bool resolveUnknown);
-					List *sortby_opname, bool resolveUnknown);
 extern Index assignSortGroupRef(TargetEntry *tle, List *tlist);
 extern bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList);

--- a/src/include/parser/parse_oper.h
+++ b/src/include/parser/parse_oper.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/parser/parse_oper.h,v 1.42 2008/01/01 19:45:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/parser/parse_oper.h,v 1.43 2008/08/02 21:32:01 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -45,16 +45,13 @@ extern Operator compatible_oper(ParseState *pstate, List *op,
 /* currently no need for compatible_left_oper/compatible_right_oper */
-/* Routines for identifying "=", "<", ">" operators for a type */
+/* Routines for identifying "<", "=", ">" operators for a type */
-extern Operator equality_oper(Oid argtype, bool noError);
+extern void get_sort_group_operators(Oid argtype,
-extern Operator ordering_oper(Oid argtype, bool noError);
+						 bool needLT, bool needEQ, bool needGT,
-extern Operator reverse_ordering_oper(Oid argtype, bool noError);
+						 Oid *ltOpr, Oid *eqOpr, Oid *gtOpr);
 /* Convenience routines for common calls on the above */
 extern Oid	compatible_oper_opid(List *op, Oid arg1, Oid arg2, bool noError);
-extern Oid	equality_oper_funcid(Oid argtype);
-extern Oid	ordering_oper_opid(Oid argtype);
-extern Oid	reverse_ordering_oper_opid(Oid argtype);
 /* Extract operator OID or underlying-function OID from an Operator tuple */
 extern Oid	oprid(Operator op);

--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -6,7 +6,7 @@
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.124 2008/07/30 17:05:05 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.125 2008/08/02 21:32:01 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -38,7 +38,7 @@ extern bool get_ordering_op_properties(Oid opno,
 						   Oid *opfamily, Oid *opcintype, int16 *strategy);
 extern bool get_compare_function_for_ordering_op(Oid opno,
 									 Oid *cmpfunc, bool *reverse);
-extern Oid	get_equality_op_for_ordering_op(Oid opno);
+extern Oid	get_equality_op_for_ordering_op(Oid opno, bool *reverse);
 extern Oid	get_ordering_op_for_equality_op(Oid opno, bool use_lhs_type);
 extern List *get_mergejoin_opfamilies(Oid opno);
 extern bool get_compatible_hash_operators(Oid opno,
@@ -47,7 +47,7 @@ extern bool get_op_hash_functions(Oid opno,
 					  RegProcedure *lhs_procno, RegProcedure *rhs_procno);
 extern void get_op_btree_interpretation(Oid opno,
 							List **opfamilies, List **opstrats);
-extern bool ops_in_same_btree_opfamily(Oid opno1, Oid opno2);
+extern bool equality_ops_are_compatible(Oid opno1, Oid opno2);
 extern Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype,
 				  int16 procnum);
 extern char *get_attname(Oid relid, AttrNumber attnum);