Extend abbreviated key infrastructure to datum tuplesorts.

Andrew Gierth, reviewed by Peter Geoghegan and by me.

Extend abbreviated key infrastructure to datum tuplesorts.
Andrew Gierth, reviewed by Peter Geoghegan and by me.
78efd5c1 · Robert Haas · 0bb8528b · 78efd5c1 · 78efd5c1 · 78efd5c1
Commit 78efd5c1 authored May 13, 2015 by Robert Haas
3 changed files
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -363,10 +363,6 @@ initialize_aggregates(AggState *aggstate,
 			 * We use a plain Datum sorter when there's a single input column;
 			 * otherwise sort the full tuple.  (See comments for
 			 * process_ordered_aggregate_single.)
-			 *
-			 * In the future, we should consider forcing the
-			 * tuplesort_begin_heap() case when the abbreviated key
-			 * optimization can thereby be used, even when numInputs is 1.
 			 */
 			peraggstate->sortstate =
 				(peraggstate->numInputs == 1) ?

--- a/src/backend/utils/adt/orderedsetaggs.c
+++ b/src/backend/utils/adt/orderedsetaggs.c
@@ -268,10 +268,6 @@ ordered_set_startup(FunctionCallInfo fcinfo, bool use_tuples)
 	/*
 	 * Initialize tuplesort object.
-	 *
-	 * In the future, we should consider forcing the tuplesort_begin_heap()
-	 * case when the abbreviated key optimization can thereby be used, even
-	 * when !use_tuples.
 	 */
 	if (use_tuples)
 		osastate->sortstate = tuplesort_begin_heap(qstate->tupdesc,

--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -147,13 +147,18 @@ bool		optimize_bounded_sort = true;
 * case where the first key determines the comparison result.  Note that
 * for a pass-by-reference datatype, datum1 points into the "tuple" storage.
 *
+ * There is one special case: when the sort support infrastructure provides an
+ * "abbreviated key" representation, where the key is (typically) a pass by
+ * value proxy for a pass by reference type.  In this case, the abbreviated key
+ * is stored in datum1 in place of the actual first key column.
+ *
 * When sorting single Datums, the data value is represented directly by
- * datum1/isnull1.  If the datatype is pass-by-reference and isnull1 is false,
+ * datum1/isnull1 for pass by value types (or null values).  If the datatype is
- * then datum1 points to a separately palloc'd data value that is also pointed
+ * pass-by-reference and isnull1 is false, then "tuple" points to a separately
- * to by the "tuple" pointer; otherwise "tuple" is NULL.  There is one special
+ * palloc'd data value, otherwise "tuple" is NULL.  The value of datum1 is then
- * case:  when the sort support infrastructure provides an "abbreviated key"
+ * either the same pointer as "tuple", or is an abbreviated key value as
- * representation, where the key is (typically) a pass by value proxy for a
+ * described above.  Accordingly, "tuple" is always used in preference to
- * pass by reference type.
+ * datum1 as the authoritative value for pass-by-reference cases.
 *
 * While building initial runs, tupindex holds the tuple's run number.  During
 * merge passes, we re-use it to hold the input tape number that each tuple in
@@ -901,30 +906,36 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
 	state->copytup = copytup_datum;
 	state->writetup = writetup_datum;
 	state->readtup = readtup_datum;
+	state->abbrevNext = 10;
 	state->datumType = datumType;
-	/* Prepare SortSupport data */
-	state->onlyKey = (SortSupport) palloc0(sizeof(SortSupportData));
-	state->onlyKey->ssup_cxt = CurrentMemoryContext;
-	state->onlyKey->ssup_collation = sortCollation;
-	state->onlyKey->ssup_nulls_first = nullsFirstFlag;
-	/*
-	 * Conversion to abbreviated representation infeasible in the Datum case.
-	 * It must be possible to subsequently fetch original datum values within
-	 * tuplesort_getdatum(), which would require special-case preservation of
-	 * original values.
-	 */
-	state->onlyKey->abbreviate = false;
-	PrepareSortSupportFromOrderingOp(sortOperator, state->onlyKey);
 	/* lookup necessary attributes of the datum type */
 	get_typlenbyval(datumType, &typlen, &typbyval);
 	state->datumTypeLen = typlen;
 	state->datumTypeByVal = typbyval;
+	/* Prepare SortSupport data */
+	state->sortKeys = (SortSupport) palloc0(sizeof(SortSupportData));
+	state->sortKeys->ssup_cxt = CurrentMemoryContext;
+	state->sortKeys->ssup_collation = sortCollation;
+	state->sortKeys->ssup_nulls_first = nullsFirstFlag;
+	/* abbreviation is possible here only for by-reference types */
+	state->sortKeys->abbreviate = !typbyval;
+	PrepareSortSupportFromOrderingOp(sortOperator, state->sortKeys);
+	/*
+	 * The "onlyKey" optimization cannot be used with abbreviated keys, since
+	 * tie-breaker comparisons may be required.  Typically, the optimization is
+	 * only of value to pass-by-value types anyway, whereas abbreviated keys
+	 * are typically only of value to pass-by-reference types.
+	 */
+	if (!state->sortKeys->abbrev_converter)
+		state->onlyKey = state->sortKeys;
 	MemoryContextSwitchTo(oldcontext);
 	return state;
@@ -1307,9 +1318,17 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
 	SortTuple	stup;
 	/*
-	 * If it's a pass-by-reference value, copy it into memory we control, and
+	 * Pass-by-value types or null values are just stored directly in
-	 * decrease availMem.  Then call the common code.
+	 * stup.datum1 (and stup.tuple is not used and set to NULL).
+	 *
+	 * Non-null pass-by-reference values need to be copied into memory we
+	 * control, and possibly abbreviated. The copied value is pointed to by
+	 * stup.tuple and is treated as the canonical copy (e.g. to return via
+	 * tuplesort_getdatum or when writing to tape); stup.datum1 gets the
+	 * abbreviated value if abbreviation is happening, otherwise it's identical
+	 * to stup.tuple.
 	 */
 	if (isNull || state->datumTypeByVal)
 	{
 		stup.datum1 = val;
@@ -1318,10 +1337,44 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
 	}
 	else
 	{
-		stup.datum1 = datumCopy(val, false, state->datumTypeLen);
+		Datum	original = datumCopy(val, false, state->datumTypeLen);
 		stup.isnull1 = false;
-		stup.tuple = DatumGetPointer(stup.datum1);
+		stup.tuple = DatumGetPointer(original);
 		USEMEM(state, GetMemoryChunkSpace(stup.tuple));
+		if (!state->sortKeys->abbrev_converter)
+		{
+			stup.datum1 = original;
+		}
+		else if (!consider_abort_common(state))
+		{
+			/* Store abbreviated key representation */
+			stup.datum1 = state->sortKeys->abbrev_converter(original,
+															state->sortKeys);
+		}
+		else
+		{
+			/* Abort abbreviation */
+			int		i;
+			stup.datum1 = original;
+			/*
+			 * Set state to be consistent with never trying abbreviation.
+			 *
+			 * Alter datum1 representation in already-copied tuples, so as to
+			 * ensure a consistent representation (current tuple was just handled).
+			 * Note that we rely on all tuples copied so far actually being
+			 * contained within memtuples array.
+			 */
+			for (i = 0; i < state->memtupcount; i++)
+			{
+				SortTuple *mtup = &state->memtuples[i];
+				mtup->datum1 = PointerGetDatum(mtup->tuple);
+			}
+		}
 	}
 	puttuple_common(state, &stup);
@@ -1886,10 +1939,12 @@ tuplesort_getdatum(Tuplesortstate *state, bool forward,
 	}
 	else
 	{
+		/* use stup.tuple because stup.datum1 may be an abbreviation */
 		if (should_free)
-			*val = stup.datum1;
+			*val = PointerGetDatum(stup.tuple);
 		else
-			*val = datumCopy(stup.datum1, false, state->datumTypeLen);
+			*val = datumCopy(PointerGetDatum(stup.tuple), false, state->datumTypeLen);
 		*isNull = false;
 	}
@@ -3715,9 +3770,22 @@ readtup_index(Tuplesortstate *state, SortTuple *stup,
 static int
 comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
 {
-	return ApplySortComparator(a->datum1, a->isnull1,
+	int		compare;
-							   b->datum1, b->isnull1,
-							   state->onlyKey);
+	compare = ApplySortComparator(a->datum1, a->isnull1,
+								  b->datum1, b->isnull1,
+								  state->sortKeys);
+	if (compare != 0)
+		return compare;
+	/* if we have abbreviations, then "tuple" has the original value */
+	if (state->sortKeys->abbrev_converter)
+		compare = ApplySortAbbrevFullComparator(PointerGetDatum(a->tuple), a->isnull1,
+												PointerGetDatum(b->tuple), b->isnull1,
+												state->sortKeys);
+	return compare;
 }
 static void
@@ -3746,8 +3814,8 @@ writetup_datum(Tuplesortstate *state, int tapenum, SortTuple *stup)
 	}
 	else
 	{
-		waddr = DatumGetPointer(stup->datum1);
+		waddr = stup->tuple;
-		tuplen = datumGetSize(stup->datum1, false, state->datumTypeLen);
+		tuplen = datumGetSize(PointerGetDatum(stup->tuple), false, state->datumTypeLen);
 		Assert(tuplen != 0);
 	}