Commit 9221f9d4 authored by Tom Lane's avatar Tom Lane

Make contrib/btree_gist's GiST penalty function a bit saner.

The previous coding supposed that the first differing bytes in two varlena
datums must have the same sign difference as their overall comparison
result.  This is obviously bogus for text strings in non-C locales, and
probably wrong for numeric, and even for bytea I think it was wrong on
machines where char is signed.  When the assumption failed, the function
could deliver a zero or negative penalty in situations where such a result
is quite ridiculous, leading the core GiST code to make very bad page-split
decisions.

To fix, take the absolute values of the byte-level differences.  Also,
switch the code to using unsigned char not just char, so that the behavior
will be consistent whether char is signed or not.

Per investigation of a trouble report from Tomas Vondra.  Back-patch to all
supported branches.
parent 94f565dc
...@@ -108,14 +108,12 @@ gbt_var_leaf2node(GBT_VARKEY *leaf, const gbtree_vinfo *tinfo) ...@@ -108,14 +108,12 @@ gbt_var_leaf2node(GBT_VARKEY *leaf, const gbtree_vinfo *tinfo)
static int32 static int32
gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo) gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
{ {
GBT_VARKEY_R r = gbt_var_key_readable(node); GBT_VARKEY_R r = gbt_var_key_readable(node);
int32 i = 0; int32 i = 0;
int32 l = 0; int32 l = 0;
int32 t1len = VARSIZE(r.lower) - VARHDRSZ; int32 t1len = VARSIZE(r.lower) - VARHDRSZ;
int32 t2len = VARSIZE(r.upper) - VARHDRSZ; int32 t2len = VARSIZE(r.upper) - VARHDRSZ;
int32 ml = Min(t1len, t2len); int32 ml = Min(t1len, t2len);
char *p1 = VARDATA(r.lower); char *p1 = VARDATA(r.lower);
char *p2 = VARDATA(r.upper); char *p2 = VARDATA(r.upper);
...@@ -126,7 +124,6 @@ gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo) ...@@ -126,7 +124,6 @@ gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
{ {
if (tinfo->eml > 1 && l == 0) if (tinfo->eml > 1 && l == 0)
{ {
if ((l = pg_mblen(p1)) != pg_mblen(p2)) if ((l = pg_mblen(p1)) != pg_mblen(p2))
{ {
return i; return i;
...@@ -369,13 +366,14 @@ gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n, ...@@ -369,13 +366,14 @@ gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n,
GBT_VARKEY *newe = (GBT_VARKEY *) DatumGetPointer(n->key); GBT_VARKEY *newe = (GBT_VARKEY *) DatumGetPointer(n->key);
GBT_VARKEY_R ok, GBT_VARKEY_R ok,
nk; nk;
GBT_VARKEY *tmp = NULL;
*res = 0.0; *res = 0.0;
nk = gbt_var_key_readable(newe); nk = gbt_var_key_readable(newe);
if (nk.lower == nk.upper) /* leaf */ if (nk.lower == nk.upper) /* leaf */
{ {
GBT_VARKEY *tmp;
tmp = gbt_var_leaf2node(newe, tinfo); tmp = gbt_var_leaf2node(newe, tinfo);
if (tmp != newe) if (tmp != newe)
nk = gbt_var_key_readable(tmp); nk = gbt_var_key_readable(tmp);
...@@ -390,7 +388,7 @@ gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n, ...@@ -390,7 +388,7 @@ gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n,
gbt_bytea_pf_match(ok.upper, nk.upper, tinfo)))) gbt_bytea_pf_match(ok.upper, nk.upper, tinfo))))
{ {
Datum d = PointerGetDatum(0); Datum d = PointerGetDatum(0);
double dres = 0.0; double dres;
int32 ol, int32 ol,
ul; ul;
...@@ -401,20 +399,18 @@ gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n, ...@@ -401,20 +399,18 @@ gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n,
if (ul < ol) if (ul < ol)
{ {
dres = (ol - ul); /* lost of common prefix len */ dres = (ol - ul); /* reduction of common prefix len */
} }
else else
{ {
GBT_VARKEY_R uk = gbt_var_key_readable((GBT_VARKEY *) DatumGetPointer(d)); GBT_VARKEY_R uk = gbt_var_key_readable((GBT_VARKEY *) DatumGetPointer(d));
unsigned char tmp[4];
char tmp[4]; tmp[0] = (unsigned char) (((VARSIZE(ok.lower) - VARHDRSZ) <= ul) ? 0 : (VARDATA(ok.lower)[ul]));
tmp[1] = (unsigned char) (((VARSIZE(uk.lower) - VARHDRSZ) <= ul) ? 0 : (VARDATA(uk.lower)[ul]));
tmp[0] = ((VARSIZE(ok.lower) - VARHDRSZ) == ul) ? (CHAR_MIN) : (VARDATA(ok.lower)[ul]); tmp[2] = (unsigned char) (((VARSIZE(ok.upper) - VARHDRSZ) <= ul) ? 0 : (VARDATA(ok.upper)[ul]));
tmp[1] = ((VARSIZE(uk.lower) - VARHDRSZ) == ul) ? (CHAR_MIN) : (VARDATA(uk.lower)[ul]); tmp[3] = (unsigned char) (((VARSIZE(uk.upper) - VARHDRSZ) <= ul) ? 0 : (VARDATA(uk.upper)[ul]));
tmp[2] = ((VARSIZE(ok.upper) - VARHDRSZ) == ul) ? (CHAR_MIN) : (VARDATA(ok.upper)[ul]); dres = Abs(tmp[0] - tmp[1]) + Abs(tmp[3] - tmp[2]);
tmp[3] = ((VARSIZE(uk.upper) - VARHDRSZ) == ul) ? (CHAR_MIN) : (VARDATA(uk.upper)[ul]);
dres = (tmp[0] - tmp[1]) +
(tmp[3] - tmp[2]);
dres /= 256.0; dres /= 256.0;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment