Commit a76ef15d authored by Robert Haas's avatar Robert Haas

Add sort support routine for the UUID data type.

This introduces a simple encoding scheme to produce abbreviated keys:
pack as many bytes of each UUID as will fit into a Datum.  On
little-endian machines, a byteswap is also performed; the abbreviated
comparator can therefore just consist of a simple 3-way unsigned integer
comparison.

The purpose of this change is to speed up sorting data on a column
of type UUID.

Peter Geoghegan
parent 5644419b
......@@ -14,8 +14,12 @@
#include "postgres.h"
#include "access/hash.h"
#include "lib/hyperloglog.h"
#include "libpq/pqformat.h"
#include "port/pg_bswap.h"
#include "utils/builtins.h"
#include "utils/guc.h"
#include "utils/sortsupport.h"
#include "utils/uuid.h"
/* uuid size in bytes */
......@@ -27,8 +31,21 @@ struct pg_uuid_t
unsigned char data[UUID_LEN];
};
/* sortsupport for uuid */
typedef struct
{
int64 input_count; /* number of non-null values seen */
bool estimating; /* true if estimating cardinality */
hyperLogLogState abbr_card; /* cardinality estimator */
} uuid_sortsupport_state;
static void string_to_uuid(const char *source, pg_uuid_t *uuid);
static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
static int uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
Datum
uuid_in(PG_FUNCTION_ARGS)
......@@ -222,6 +239,176 @@ uuid_cmp(PG_FUNCTION_ARGS)
PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
}
/*
* Sort support strategy routine
*/
Datum
uuid_sortsupport(PG_FUNCTION_ARGS)
{
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
ssup->comparator = uuid_fast_cmp;
ssup->ssup_extra = NULL;
if (ssup->abbreviate)
{
uuid_sortsupport_state *uss;
MemoryContext oldcontext;
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
uss = palloc(sizeof(uuid_sortsupport_state));
uss->input_count = 0;
uss->estimating = true;
initHyperLogLog(&uss->abbr_card, 10);
ssup->ssup_extra = uss;
ssup->comparator = uuid_cmp_abbrev;
ssup->abbrev_converter = uuid_abbrev_convert;
ssup->abbrev_abort = uuid_abbrev_abort;
ssup->abbrev_full_comparator = uuid_fast_cmp;
MemoryContextSwitchTo(oldcontext);
}
PG_RETURN_VOID();
}
/*
* SortSupport comparison func
*/
static int
uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
{
pg_uuid_t *arg1 = DatumGetUUIDP(x);
pg_uuid_t *arg2 = DatumGetUUIDP(y);
return uuid_internal_cmp(arg1, arg2);
}
/*
* Abbreviated key comparison func
*/
static int
uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
{
if (x > y)
return 1;
else if (x == y)
return 0;
else
return -1;
}
/*
* Callback for estimating effectiveness of abbreviated key optimization.
*
* We pay no attention to the cardinality of the non-abbreviated data, because
* there is no equality fast-path within authoritative uuid comparator.
*/
static bool
uuid_abbrev_abort(int memtupcount, SortSupport ssup)
{
uuid_sortsupport_state *uss = ssup->ssup_extra;
double abbr_card;
if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
return false;
abbr_card = estimateHyperLogLog(&uss->abbr_card);
/*
* If we have >100k distinct values, then even if we were sorting many
* billion rows we'd likely still break even, and the penalty of undoing
* that many rows of abbrevs would probably not be worth it. Stop even
* counting at that point.
*/
if (abbr_card > 100000.0)
{
#ifdef TRACE_SORT
if (trace_sort)
elog(LOG,
"uuid_abbrev: estimation ends at cardinality %f"
" after " INT64_FORMAT " values (%d rows)",
abbr_card, uss->input_count, memtupcount);
#endif
uss->estimating = false;
return false;
}
/*
* Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
* fudge factor allows us to abort earlier on genuinely pathological data
* where we've had exactly one abbreviated value in the first 2k (non-null)
* rows.
*/
if (abbr_card < uss->input_count / 2000.0 + 0.5)
{
#ifdef TRACE_SORT
if (trace_sort)
elog(LOG,
"uuid_abbrev: aborting abbreviation at cardinality %f"
" below threshold %f after " INT64_FORMAT " values (%d rows)",
abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
memtupcount);
#endif
return true;
}
#ifdef TRACE_SORT
if (trace_sort)
elog(LOG,
"uuid_abbrev: cardinality %f after " INT64_FORMAT
" values (%d rows)", abbr_card, uss->input_count, memtupcount);
#endif
return false;
}
/*
* Conversion routine for sortsupport. Converts original uuid representation
* to abbreviated key representation. Our encoding strategy is simple -- pack
* the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
* machines, the bytes are stored in reverse order), and treat it as an
* unsigned integer.
*/
static Datum
uuid_abbrev_convert(Datum original, SortSupport ssup)
{
uuid_sortsupport_state *uss = ssup->ssup_extra;
pg_uuid_t *authoritative = DatumGetUUIDP(original);
Datum res;
memcpy((char *) &res, authoritative->data, sizeof(Datum));
uss->input_count += 1;
if (uss->estimating)
{
uint32 tmp;
#if SIZEOF_DATUM == 8
tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
#else /* SIZEOF_DATUM != 8 */
tmp = (uint32) res;
#endif
addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
}
/*
* Byteswap on little-endian machines.
*
* This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way
* comparator) works correctly on all platforms. If we didn't do this, the
* comparator would have to call memcmp() with a pair of pointers to the
* first byte of each abbreviated key, which is slower.
*/
res = DatumBigEndianToNative(res);
return res;
}
/* hash index support */
Datum
uuid_hash(PG_FUNCTION_ARGS)
......
......@@ -134,6 +134,7 @@ DATA(insert ( 2233 703 703 1 380 ));
DATA(insert ( 2234 704 704 1 381 ));
DATA(insert ( 2789 27 27 1 2794 ));
DATA(insert ( 2968 2950 2950 1 2960 ));
DATA(insert ( 2968 2950 2950 2 3300 ));
DATA(insert ( 2994 2249 2249 1 2987 ));
DATA(insert ( 3194 2249 2249 1 3187 ));
DATA(insert ( 3253 3220 3220 1 3251 ));
......
......@@ -4467,6 +4467,8 @@ DATA(insert OID = 2958 ( uuid_gt PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0
DATA(insert OID = 2959 ( uuid_ne PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "2950 2950" _null_ _null_ _null_ _null_ _null_ uuid_ne _null_ _null_ _null_ ));
DATA(insert OID = 2960 ( uuid_cmp PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 23 "2950 2950" _null_ _null_ _null_ _null_ _null_ uuid_cmp _null_ _null_ _null_ ));
DESCR("less-equal-greater");
DATA(insert OID = 3300 ( uuid_sortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ uuid_sortsupport _null_ _null_ _null_ ));
DESCR("sort support");
DATA(insert OID = 2961 ( uuid_recv PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2950 "2281" _null_ _null_ _null_ _null_ _null_ uuid_recv _null_ _null_ _null_ ));
DESCR("I/O");
DATA(insert OID = 2962 ( uuid_send PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 17 "2950" _null_ _null_ _null_ _null_ _null_ uuid_send _null_ _null_ _null_ ));
......
......@@ -1174,6 +1174,7 @@ extern Datum uuid_ge(PG_FUNCTION_ARGS);
extern Datum uuid_gt(PG_FUNCTION_ARGS);
extern Datum uuid_ne(PG_FUNCTION_ARGS);
extern Datum uuid_cmp(PG_FUNCTION_ARGS);
extern Datum uuid_sortsupport(PG_FUNCTION_ARGS);
extern Datum uuid_hash(PG_FUNCTION_ARGS);
/* windowfuncs.c */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment