Commit 78efd5c1 authored by Robert Haas's avatar Robert Haas

Extend abbreviated key infrastructure to datum tuplesorts.

Andrew Gierth, reviewed by Peter Geoghegan and by me.
parent 0bb8528b
...@@ -363,10 +363,6 @@ initialize_aggregates(AggState *aggstate, ...@@ -363,10 +363,6 @@ initialize_aggregates(AggState *aggstate,
* We use a plain Datum sorter when there's a single input column; * We use a plain Datum sorter when there's a single input column;
* otherwise sort the full tuple. (See comments for * otherwise sort the full tuple. (See comments for
* process_ordered_aggregate_single.) * process_ordered_aggregate_single.)
*
* In the future, we should consider forcing the
* tuplesort_begin_heap() case when the abbreviated key
* optimization can thereby be used, even when numInputs is 1.
*/ */
peraggstate->sortstate = peraggstate->sortstate =
(peraggstate->numInputs == 1) ? (peraggstate->numInputs == 1) ?
......
...@@ -268,10 +268,6 @@ ordered_set_startup(FunctionCallInfo fcinfo, bool use_tuples) ...@@ -268,10 +268,6 @@ ordered_set_startup(FunctionCallInfo fcinfo, bool use_tuples)
/* /*
* Initialize tuplesort object. * Initialize tuplesort object.
*
* In the future, we should consider forcing the tuplesort_begin_heap()
* case when the abbreviated key optimization can thereby be used, even
* when !use_tuples.
*/ */
if (use_tuples) if (use_tuples)
osastate->sortstate = tuplesort_begin_heap(qstate->tupdesc, osastate->sortstate = tuplesort_begin_heap(qstate->tupdesc,
......
...@@ -147,13 +147,18 @@ bool optimize_bounded_sort = true; ...@@ -147,13 +147,18 @@ bool optimize_bounded_sort = true;
* case where the first key determines the comparison result. Note that * case where the first key determines the comparison result. Note that
* for a pass-by-reference datatype, datum1 points into the "tuple" storage. * for a pass-by-reference datatype, datum1 points into the "tuple" storage.
* *
* There is one special case: when the sort support infrastructure provides an
* "abbreviated key" representation, where the key is (typically) a pass by
* value proxy for a pass by reference type. In this case, the abbreviated key
* is stored in datum1 in place of the actual first key column.
*
* When sorting single Datums, the data value is represented directly by * When sorting single Datums, the data value is represented directly by
* datum1/isnull1. If the datatype is pass-by-reference and isnull1 is false, * datum1/isnull1 for pass by value types (or null values). If the datatype is
* then datum1 points to a separately palloc'd data value that is also pointed * pass-by-reference and isnull1 is false, then "tuple" points to a separately
* to by the "tuple" pointer; otherwise "tuple" is NULL. There is one special * palloc'd data value, otherwise "tuple" is NULL. The value of datum1 is then
* case: when the sort support infrastructure provides an "abbreviated key" * either the same pointer as "tuple", or is an abbreviated key value as
* representation, where the key is (typically) a pass by value proxy for a * described above. Accordingly, "tuple" is always used in preference to
* pass by reference type. * datum1 as the authoritative value for pass-by-reference cases.
* *
* While building initial runs, tupindex holds the tuple's run number. During * While building initial runs, tupindex holds the tuple's run number. During
* merge passes, we re-use it to hold the input tape number that each tuple in * merge passes, we re-use it to hold the input tape number that each tuple in
...@@ -901,30 +906,36 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, ...@@ -901,30 +906,36 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
state->copytup = copytup_datum; state->copytup = copytup_datum;
state->writetup = writetup_datum; state->writetup = writetup_datum;
state->readtup = readtup_datum; state->readtup = readtup_datum;
state->abbrevNext = 10;
state->datumType = datumType; state->datumType = datumType;
/* Prepare SortSupport data */
state->onlyKey = (SortSupport) palloc0(sizeof(SortSupportData));
state->onlyKey->ssup_cxt = CurrentMemoryContext;
state->onlyKey->ssup_collation = sortCollation;
state->onlyKey->ssup_nulls_first = nullsFirstFlag;
/*
* Conversion to abbreviated representation infeasible in the Datum case.
* It must be possible to subsequently fetch original datum values within
* tuplesort_getdatum(), which would require special-case preservation of
* original values.
*/
state->onlyKey->abbreviate = false;
PrepareSortSupportFromOrderingOp(sortOperator, state->onlyKey);
/* lookup necessary attributes of the datum type */ /* lookup necessary attributes of the datum type */
get_typlenbyval(datumType, &typlen, &typbyval); get_typlenbyval(datumType, &typlen, &typbyval);
state->datumTypeLen = typlen; state->datumTypeLen = typlen;
state->datumTypeByVal = typbyval; state->datumTypeByVal = typbyval;
/* Prepare SortSupport data */
state->sortKeys = (SortSupport) palloc0(sizeof(SortSupportData));
state->sortKeys->ssup_cxt = CurrentMemoryContext;
state->sortKeys->ssup_collation = sortCollation;
state->sortKeys->ssup_nulls_first = nullsFirstFlag;
/* abbreviation is possible here only for by-reference types */
state->sortKeys->abbreviate = !typbyval;
PrepareSortSupportFromOrderingOp(sortOperator, state->sortKeys);
/*
* The "onlyKey" optimization cannot be used with abbreviated keys, since
* tie-breaker comparisons may be required. Typically, the optimization is
* only of value to pass-by-value types anyway, whereas abbreviated keys
* are typically only of value to pass-by-reference types.
*/
if (!state->sortKeys->abbrev_converter)
state->onlyKey = state->sortKeys;
MemoryContextSwitchTo(oldcontext); MemoryContextSwitchTo(oldcontext);
return state; return state;
...@@ -1307,9 +1318,17 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull) ...@@ -1307,9 +1318,17 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
SortTuple stup; SortTuple stup;
/* /*
* If it's a pass-by-reference value, copy it into memory we control, and * Pass-by-value types or null values are just stored directly in
* decrease availMem. Then call the common code. * stup.datum1 (and stup.tuple is not used and set to NULL).
*
* Non-null pass-by-reference values need to be copied into memory we
* control, and possibly abbreviated. The copied value is pointed to by
* stup.tuple and is treated as the canonical copy (e.g. to return via
* tuplesort_getdatum or when writing to tape); stup.datum1 gets the
* abbreviated value if abbreviation is happening, otherwise it's identical
* to stup.tuple.
*/ */
if (isNull || state->datumTypeByVal) if (isNull || state->datumTypeByVal)
{ {
stup.datum1 = val; stup.datum1 = val;
...@@ -1318,10 +1337,44 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull) ...@@ -1318,10 +1337,44 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
} }
else else
{ {
stup.datum1 = datumCopy(val, false, state->datumTypeLen); Datum original = datumCopy(val, false, state->datumTypeLen);
stup.isnull1 = false; stup.isnull1 = false;
stup.tuple = DatumGetPointer(stup.datum1); stup.tuple = DatumGetPointer(original);
USEMEM(state, GetMemoryChunkSpace(stup.tuple)); USEMEM(state, GetMemoryChunkSpace(stup.tuple));
if (!state->sortKeys->abbrev_converter)
{
stup.datum1 = original;
}
else if (!consider_abort_common(state))
{
/* Store abbreviated key representation */
stup.datum1 = state->sortKeys->abbrev_converter(original,
state->sortKeys);
}
else
{
/* Abort abbreviation */
int i;
stup.datum1 = original;
/*
* Set state to be consistent with never trying abbreviation.
*
* Alter datum1 representation in already-copied tuples, so as to
* ensure a consistent representation (current tuple was just handled).
* Note that we rely on all tuples copied so far actually being
* contained within memtuples array.
*/
for (i = 0; i < state->memtupcount; i++)
{
SortTuple *mtup = &state->memtuples[i];
mtup->datum1 = PointerGetDatum(mtup->tuple);
}
}
} }
puttuple_common(state, &stup); puttuple_common(state, &stup);
...@@ -1886,10 +1939,12 @@ tuplesort_getdatum(Tuplesortstate *state, bool forward, ...@@ -1886,10 +1939,12 @@ tuplesort_getdatum(Tuplesortstate *state, bool forward,
} }
else else
{ {
/* use stup.tuple because stup.datum1 may be an abbreviation */
if (should_free) if (should_free)
*val = stup.datum1; *val = PointerGetDatum(stup.tuple);
else else
*val = datumCopy(stup.datum1, false, state->datumTypeLen); *val = datumCopy(PointerGetDatum(stup.tuple), false, state->datumTypeLen);
*isNull = false; *isNull = false;
} }
...@@ -3715,9 +3770,22 @@ readtup_index(Tuplesortstate *state, SortTuple *stup, ...@@ -3715,9 +3770,22 @@ readtup_index(Tuplesortstate *state, SortTuple *stup,
static int static int
comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
{ {
return ApplySortComparator(a->datum1, a->isnull1, int compare;
b->datum1, b->isnull1,
state->onlyKey); compare = ApplySortComparator(a->datum1, a->isnull1,
b->datum1, b->isnull1,
state->sortKeys);
if (compare != 0)
return compare;
/* if we have abbreviations, then "tuple" has the original value */
if (state->sortKeys->abbrev_converter)
compare = ApplySortAbbrevFullComparator(PointerGetDatum(a->tuple), a->isnull1,
PointerGetDatum(b->tuple), b->isnull1,
state->sortKeys);
return compare;
} }
static void static void
...@@ -3746,8 +3814,8 @@ writetup_datum(Tuplesortstate *state, int tapenum, SortTuple *stup) ...@@ -3746,8 +3814,8 @@ writetup_datum(Tuplesortstate *state, int tapenum, SortTuple *stup)
} }
else else
{ {
waddr = DatumGetPointer(stup->datum1); waddr = stup->tuple;
tuplen = datumGetSize(stup->datum1, false, state->datumTypeLen); tuplen = datumGetSize(PointerGetDatum(stup->tuple), false, state->datumTypeLen);
Assert(tuplen != 0); Assert(tuplen != 0);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment