Commit b34e37bf authored by Robert Haas's avatar Robert Haas

Add sortsupport routines for text.

This provides a small but worthwhile speedup when sorting text, at least
in cases to which the sortsupport machinery applies.

Robert Haas and Peter Geoghegan
parent a4287a68
......@@ -28,7 +28,9 @@
#include "utils/builtins.h"
#include "utils/bytea.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/pg_locale.h"
#include "utils/sortsupport.h"
/* GUC variable */
......@@ -50,12 +52,32 @@ typedef struct
int skiptable[256]; /* skip distance for given mismatched char */
} TextPositionState;
typedef struct
{
char *buf1; /* 1st string */
char *buf2; /* 2nd string */
int buflen1;
int buflen2;
#ifdef HAVE_LOCALE_T
pg_locale_t locale;
#endif
} TextSortSupport;
/*
* This should be large enough that most strings will fit, but small enough
* that we feel comfortable putting it on the stack
*/
#define TEXTBUFLEN 1024
#define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
#define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
#define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
#define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
#define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
static void btsortsupport_worker(SortSupport ssup, Oid collid);
static int bttextfastcmp_c(Datum x, Datum y, SortSupport ssup);
static int bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup);
static int32 text_length(Datum str);
static text *text_catenate(text *t1, text *t2);
static text *text_substring(Datum str,
......@@ -1356,10 +1378,8 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
}
else
{
#define STACKBUFLEN 1024
char a1buf[STACKBUFLEN];
char a2buf[STACKBUFLEN];
char a1buf[TEXTBUFLEN];
char a2buf[TEXTBUFLEN];
char *a1p,
*a2p;
......@@ -1393,24 +1413,24 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
int a2len;
int r;
if (len1 >= STACKBUFLEN / 2)
if (len1 >= TEXTBUFLEN / 2)
{
a1len = len1 * 2 + 2;
a1p = palloc(a1len);
}
else
{
a1len = STACKBUFLEN;
a1len = TEXTBUFLEN;
a1p = a1buf;
}
if (len2 >= STACKBUFLEN / 2)
if (len2 >= TEXTBUFLEN / 2)
{
a2len = len2 * 2 + 2;
a2p = palloc(a2len);
}
else
{
a2len = STACKBUFLEN;
a2len = TEXTBUFLEN;
a2p = a2buf;
}
......@@ -1475,11 +1495,11 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
}
#endif /* WIN32 */
if (len1 >= STACKBUFLEN)
if (len1 >= TEXTBUFLEN)
a1p = (char *) palloc(len1 + 1);
else
a1p = a1buf;
if (len2 >= STACKBUFLEN)
if (len2 >= TEXTBUFLEN)
a2p = (char *) palloc(len2 + 1);
else
a2p = a2buf;
......@@ -1683,6 +1703,186 @@ bttextcmp(PG_FUNCTION_ARGS)
PG_RETURN_INT32(result);
}
Datum
bttextsortsupport(PG_FUNCTION_ARGS)
{
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
Oid collid = ssup->ssup_collation;
MemoryContext oldcontext;
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
btsortsupport_worker(ssup, collid);
MemoryContextSwitchTo(oldcontext);
PG_RETURN_VOID();
}
static void
btsortsupport_worker(SortSupport ssup, Oid collid)
{
TextSortSupport *tss;
/*
* If LC_COLLATE = C, we can make things quite a bit faster by using
* memcmp() rather than strcoll(). To minimize the per-comparison
* overhead, we make this decision just once for the whole sort.
*/
if (lc_collate_is_c(collid))
{
ssup->comparator = bttextfastcmp_c;
return;
}
/*
* WIN32 requires complex hacks when the database encoding is UTF-8 (except
* when using the "C" collation). For now, we don't optimize that case.
*/
#ifdef WIN32
if (GetDatabaseEncoding() == PG_UTF8)
return;
#endif
/*
* We may need a collation-sensitive comparison. To make things faster,
* we'll figure out the collation based on the locale id and cache the
* result. Also, since strxfrm()/strcoll() require NUL-terminated inputs,
* prepare one or two palloc'd buffers to use as temporary workspace. In
* the ad-hoc comparison case we only use palloc'd buffers when we need
* more space than we're comfortable allocating on the stack, but here we
* can keep the buffers around for the whole sort, so it makes sense to
* allocate them once and use them unconditionally.
*/
tss = palloc(sizeof(TextSortSupport));
#ifdef HAVE_LOCALE_T
tss->locale = 0;
#endif
if (collid != DEFAULT_COLLATION_OID)
{
if (!OidIsValid(collid))
{
/*
* This typically means that the parser could not resolve a
* conflict of implicit collations, so report it that way.
*/
ereport(ERROR,
(errcode(ERRCODE_INDETERMINATE_COLLATION),
errmsg("could not determine which collation to use for string comparison"),
errhint("Use the COLLATE clause to set the collation explicitly.")));
}
#ifdef HAVE_LOCALE_T
tss->locale = pg_newlocale_from_collation(collid);
#endif
}
tss->buf1 = palloc(TEXTBUFLEN);
tss->buflen1 = TEXTBUFLEN;
tss->buf2 = palloc(TEXTBUFLEN);
tss->buflen2 = TEXTBUFLEN;
ssup->ssup_extra = tss;
ssup->comparator = bttextfastcmp_locale;
}
/*
* sortsupport comparison func (for C locale case)
*/
static int
bttextfastcmp_c(Datum x, Datum y, SortSupport ssup)
{
text *arg1 = DatumGetTextPP(x);
text *arg2 = DatumGetTextPP(y);
char *a1p,
*a2p;
int len1,
len2,
result;
a1p = VARDATA_ANY(arg1);
a2p = VARDATA_ANY(arg2);
len1 = VARSIZE_ANY_EXHDR(arg1);
len2 = VARSIZE_ANY_EXHDR(arg2);
result = memcmp(a1p, a2p, Min(len1, len2));
if ((result == 0) && (len1 != len2))
result = (len1 < len2) ? -1 : 1;
/* We can't afford to leak memory here. */
if (PointerGetDatum(arg1) != x)
pfree(arg1);
if (PointerGetDatum(arg2) != y)
pfree(arg2);
return result;
}
/*
* sortsupport comparison func (for locale case)
*/
static int
bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
{
text *arg1 = DatumGetTextPP(x);
text *arg2 = DatumGetTextPP(y);
TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
/* working state */
char *a1p,
*a2p;
int len1,
len2,
result;
a1p = VARDATA_ANY(arg1);
a2p = VARDATA_ANY(arg2);
len1 = VARSIZE_ANY_EXHDR(arg1);
len2 = VARSIZE_ANY_EXHDR(arg2);
if (len1 >= tss->buflen1)
{
pfree(tss->buf1);
tss->buflen1 = Max(len1 + 1, Min(tss->buflen1 * 2, MaxAllocSize));
tss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen1);
}
if (len2 >= tss->buflen2)
{
pfree(tss->buf2);
tss->buflen1 = Max(len2 + 1, Min(tss->buflen2 * 2, MaxAllocSize));
tss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen2);
}
memcpy(tss->buf1, a1p, len1);
tss->buf1[len1] = '\0';
memcpy(tss->buf2, a2p, len2);
tss->buf2[len2] = '\0';
#ifdef HAVE_LOCALE_T
if (tss->locale)
result = strcoll_l(tss->buf1, tss->buf2, tss->locale);
else
#endif
result = strcoll(tss->buf1, tss->buf2);
/*
* In some locales strcoll() can claim that nonidentical strings are equal.
* Believing that would be bad news for a number of reasons, so we follow
* Perl's lead and sort "equal" strings according to strcmp().
*/
if (result == 0)
result = strcmp(tss->buf1, tss->buf2);
/* We can't afford to leak memory here. */
if (PointerGetDatum(arg1) != x)
pfree(arg1);
if (PointerGetDatum(arg2) != y)
pfree(arg2);
return result;
}
Datum
text_larger(PG_FUNCTION_ARGS)
......
......@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201407151
#define CATALOG_VERSION_NO 201408141
#endif
......@@ -122,6 +122,7 @@ DATA(insert ( 1989 26 26 1 356 ));
DATA(insert ( 1989 26 26 2 3134 ));
DATA(insert ( 1991 30 30 1 404 ));
DATA(insert ( 1994 25 25 1 360 ));
DATA(insert ( 1994 25 25 2 3255 ));
DATA(insert ( 1996 1083 1083 1 1107 ));
DATA(insert ( 2000 1266 1266 1 1358 ));
DATA(insert ( 2002 1562 1562 1 1672 ));
......
......@@ -614,6 +614,8 @@ DATA(insert OID = 3135 ( btnamesortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i
DESCR("sort support");
DATA(insert OID = 360 ( bttextcmp PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "25 25" _null_ _null_ _null_ _null_ bttextcmp _null_ _null_ _null_ ));
DESCR("less-equal-greater");
DATA(insert OID = 3255 ( bttextsortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2278 "2281" _null_ _null_ _null_ _null_ bttextsortsupport _null_ _null_ _null_ ));
DESCR("sort support");
DATA(insert OID = 377 ( cash_cmp PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "790 790" _null_ _null_ _null_ _null_ cash_cmp _null_ _null_ _null_ ));
DESCR("less-equal-greater");
DATA(insert OID = 380 ( btreltimecmp PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "703 703" _null_ _null_ _null_ _null_ btreltimecmp _null_ _null_ _null_ ));
......
......@@ -316,6 +316,7 @@ extern Datum bttintervalcmp(PG_FUNCTION_ARGS);
extern Datum btcharcmp(PG_FUNCTION_ARGS);
extern Datum btnamecmp(PG_FUNCTION_ARGS);
extern Datum bttextcmp(PG_FUNCTION_ARGS);
extern Datum bttextsortsupport(PG_FUNCTION_ARGS);
/*
* Per-opclass sort support functions for new btrees. Like the
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment