Commit 58d9acc1 authored by Tom Lane's avatar Tom Lane

Fix assorted issues in convert_to_scalar().

If convert_to_scalar is passed a pair of datatypes it can't cope with,
its former behavior was just to elog(ERROR).  While this is OK so far as
the core code is concerned, there's extension code that would like to use
scalarltsel/scalargtsel/etc as selectivity estimators for operators that
work on non-core datatypes, and this behavior is a show-stopper for that
use-case.  If we simply allow convert_to_scalar to return FALSE instead of
outright failing, then the main logic of scalarltsel/scalargtsel will work
fine for any operator that behaves like a scalar inequality comparison.
The lack of conversion capability will mean that we can't estimate to
better than histogram-bin-width precision, since the code will effectively
assume that the comparison constant falls at the middle of its bin.  But
that's still a lot better than nothing.  (Someday we should provide a way
for extension code to supply a custom version of convert_to_scalar, but
today is not that day.)

While poking at this issue, we noted that the existing code for handling
type bytea in convert_to_scalar is several bricks shy of a load.
It assumes without checking that if the comparison value is type bytea,
the bounds values are too; in the worst case this could lead to a crash.
It also fails to detoast the input values, so that the comparison result is
complete garbage if any input is toasted out-of-line, compressed, or even
just short-header.  I'm not sure how often such cases actually occur ---
the bounds values, at least, are probably safe since they are elements of
an array and hence can't be toasted.  But that doesn't make this code OK.

Back-patch to all supported branches, partly because author requested that,
but mostly because of the bytea bugs.  The change in API for the exposed
routine convert_network_to_scalar() is theoretically a back-patch hazard,
but it seems pretty unlikely that any third-party code is calling that
function directly.

Tomas Vondra, with some adjustments by me

Discussion: https://postgr.es/m/b68441b6-d18f-13ab-b43b-9a72188a4e02@2ndquadrant.com
parent 7726147f
...@@ -99,9 +99,11 @@ gbt_inet_compress(PG_FUNCTION_ARGS) ...@@ -99,9 +99,11 @@ gbt_inet_compress(PG_FUNCTION_ARGS)
if (entry->leafkey) if (entry->leafkey)
{ {
inetKEY *r = (inetKEY *) palloc(sizeof(inetKEY)); inetKEY *r = (inetKEY *) palloc(sizeof(inetKEY));
bool failure = false;
retval = palloc(sizeof(GISTENTRY)); retval = palloc(sizeof(GISTENTRY));
r->lower = convert_network_to_scalar(entry->key, INETOID); r->lower = convert_network_to_scalar(entry->key, INETOID, &failure);
Assert(!failure);
r->upper = r->lower; r->upper = r->lower;
gistentryinit(*retval, PointerGetDatum(r), gistentryinit(*retval, PointerGetDatum(r),
entry->rel, entry->page, entry->rel, entry->page,
...@@ -118,13 +120,18 @@ Datum ...@@ -118,13 +120,18 @@ Datum
gbt_inet_consistent(PG_FUNCTION_ARGS) gbt_inet_consistent(PG_FUNCTION_ARGS)
{ {
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
double query = convert_network_to_scalar(PG_GETARG_DATUM(1), INETOID); Datum dquery = PG_GETARG_DATUM(1);
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
/* Oid subtype = PG_GETARG_OID(3); */ /* Oid subtype = PG_GETARG_OID(3); */
bool *recheck = (bool *) PG_GETARG_POINTER(4); bool *recheck = (bool *) PG_GETARG_POINTER(4);
inetKEY *kkk = (inetKEY *) DatumGetPointer(entry->key); inetKEY *kkk = (inetKEY *) DatumGetPointer(entry->key);
GBT_NUMKEY_R key; GBT_NUMKEY_R key;
double query;
bool failure = false;
query = convert_network_to_scalar(dquery, INETOID, &failure);
Assert(!failure);
/* All cases served by this function are inexact */ /* All cases served by this function are inexact */
*recheck = true; *recheck = true;
......
...@@ -902,9 +902,12 @@ inet_merge(PG_FUNCTION_ARGS) ...@@ -902,9 +902,12 @@ inet_merge(PG_FUNCTION_ARGS)
* Convert a value of a network datatype to an approximate scalar value. * Convert a value of a network datatype to an approximate scalar value.
* This is used for estimating selectivities of inequality operators * This is used for estimating selectivities of inequality operators
* involving network types. * involving network types.
*
* On failure (e.g., unsupported typid), set *failure to true;
* otherwise, that variable is not changed.
*/ */
double double
convert_network_to_scalar(Datum value, Oid typid) convert_network_to_scalar(Datum value, Oid typid, bool *failure)
{ {
switch (typid) switch (typid)
{ {
...@@ -931,8 +934,6 @@ convert_network_to_scalar(Datum value, Oid typid) ...@@ -931,8 +934,6 @@ convert_network_to_scalar(Datum value, Oid typid)
res += ip_addr(ip)[i]; res += ip_addr(ip)[i];
} }
return res; return res;
break;
} }
case MACADDROID: case MACADDROID:
{ {
...@@ -956,11 +957,7 @@ convert_network_to_scalar(Datum value, Oid typid) ...@@ -956,11 +957,7 @@ convert_network_to_scalar(Datum value, Oid typid)
} }
} }
/* *failure = true;
* Can't get here unless someone tries to use scalarineqsel() on an
* operator with one network and one non-network operand.
*/
elog(ERROR, "unsupported type: %u", typid);
return 0; return 0;
} }
......
...@@ -176,7 +176,7 @@ static bool estimate_multivariate_ndistinct(PlannerInfo *root, ...@@ -176,7 +176,7 @@ static bool estimate_multivariate_ndistinct(PlannerInfo *root,
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
Datum lobound, Datum hibound, Oid boundstypid, Datum lobound, Datum hibound, Oid boundstypid,
double *scaledlobound, double *scaledhibound); double *scaledlobound, double *scaledhibound);
static double convert_numeric_to_scalar(Datum value, Oid typid); static double convert_numeric_to_scalar(Datum value, Oid typid, bool *failure);
static void convert_string_to_scalar(char *value, static void convert_string_to_scalar(char *value,
double *scaledvalue, double *scaledvalue,
char *lobound, char *lobound,
...@@ -193,8 +193,9 @@ static double convert_one_string_to_scalar(char *value, ...@@ -193,8 +193,9 @@ static double convert_one_string_to_scalar(char *value,
int rangelo, int rangehi); int rangelo, int rangehi);
static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen, static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
int rangelo, int rangehi); int rangelo, int rangehi);
static char *convert_string_datum(Datum value, Oid typid); static char *convert_string_datum(Datum value, Oid typid, bool *failure);
static double convert_timevalue_to_scalar(Datum value, Oid typid); static double convert_timevalue_to_scalar(Datum value, Oid typid,
bool *failure);
static void examine_simple_variable(PlannerInfo *root, Var *var, static void examine_simple_variable(PlannerInfo *root, Var *var,
VariableStatData *vardata); VariableStatData *vardata);
static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
...@@ -556,7 +557,8 @@ neqsel(PG_FUNCTION_ARGS) ...@@ -556,7 +557,8 @@ neqsel(PG_FUNCTION_ARGS)
* *
* This routine works for any datatype (or pair of datatypes) known to * This routine works for any datatype (or pair of datatypes) known to
* convert_to_scalar(). If it is applied to some other datatype, * convert_to_scalar(). If it is applied to some other datatype,
* it will return a default estimate. * it will return an approximate estimate based on assuming that the constant
* value falls in the middle of the bin identified by binary search.
*/ */
static double static double
scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq, scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
...@@ -4033,10 +4035,15 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, ...@@ -4033,10 +4035,15 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
Datum lobound, Datum hibound, Oid boundstypid, Datum lobound, Datum hibound, Oid boundstypid,
double *scaledlobound, double *scaledhibound) double *scaledlobound, double *scaledhibound)
{ {
bool failure = false;
/* /*
* Both the valuetypid and the boundstypid should exactly match the * Both the valuetypid and the boundstypid should exactly match the
* declared input type(s) of the operator we are invoked for, so we just * declared input type(s) of the operator we are invoked for. However,
* error out if either is not recognized. * extensions might try to use scalarineqsel as estimator for operators
* with input type(s) we don't handle here; in such cases, we want to
* return false, not fail. In any case, we mustn't assume that valuetypid
* and boundstypid are identical.
* *
* XXX The histogram we are interpolating between points of could belong * XXX The histogram we are interpolating between points of could belong
* to a column that's only binary-compatible with the declared type. In * to a column that's only binary-compatible with the declared type. In
...@@ -4071,10 +4078,13 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, ...@@ -4071,10 +4078,13 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
case REGDICTIONARYOID: case REGDICTIONARYOID:
case REGROLEOID: case REGROLEOID:
case REGNAMESPACEOID: case REGNAMESPACEOID:
*scaledvalue = convert_numeric_to_scalar(value, valuetypid); *scaledvalue = convert_numeric_to_scalar(value, valuetypid,
*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid); &failure);
*scaledhibound = convert_numeric_to_scalar(hibound, boundstypid); *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid,
return true; &failure);
*scaledhibound = convert_numeric_to_scalar(hibound, boundstypid,
&failure);
return !failure;
/* /*
* Built-in string types * Built-in string types
...@@ -4085,9 +4095,20 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, ...@@ -4085,9 +4095,20 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
case TEXTOID: case TEXTOID:
case NAMEOID: case NAMEOID:
{ {
char *valstr = convert_string_datum(value, valuetypid); char *valstr = convert_string_datum(value, valuetypid,
char *lostr = convert_string_datum(lobound, boundstypid); &failure);
char *histr = convert_string_datum(hibound, boundstypid); char *lostr = convert_string_datum(lobound, boundstypid,
&failure);
char *histr = convert_string_datum(hibound, boundstypid,
&failure);
/*
* Bail out if any of the values is not of string type. We
* might leak converted strings for the other value(s), but
* that's not worth troubling over.
*/
if (failure)
return false;
convert_string_to_scalar(valstr, scaledvalue, convert_string_to_scalar(valstr, scaledvalue,
lostr, scaledlobound, lostr, scaledlobound,
...@@ -4103,6 +4124,9 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, ...@@ -4103,6 +4124,9 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
*/ */
case BYTEAOID: case BYTEAOID:
{ {
/* We only support bytea vs bytea comparison */
if (boundstypid != BYTEAOID)
return false;
convert_bytea_to_scalar(value, scaledvalue, convert_bytea_to_scalar(value, scaledvalue,
lobound, scaledlobound, lobound, scaledlobound,
hibound, scaledhibound); hibound, scaledhibound);
...@@ -4121,10 +4145,13 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, ...@@ -4121,10 +4145,13 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
case TINTERVALOID: case TINTERVALOID:
case TIMEOID: case TIMEOID:
case TIMETZOID: case TIMETZOID:
*scaledvalue = convert_timevalue_to_scalar(value, valuetypid); *scaledvalue = convert_timevalue_to_scalar(value, valuetypid,
*scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid); &failure);
*scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid); *scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid,
return true; &failure);
*scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid,
&failure);
return !failure;
/* /*
* Built-in network types * Built-in network types
...@@ -4133,10 +4160,13 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, ...@@ -4133,10 +4160,13 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
case CIDROID: case CIDROID:
case MACADDROID: case MACADDROID:
case MACADDR8OID: case MACADDR8OID:
*scaledvalue = convert_network_to_scalar(value, valuetypid); *scaledvalue = convert_network_to_scalar(value, valuetypid,
*scaledlobound = convert_network_to_scalar(lobound, boundstypid); &failure);
*scaledhibound = convert_network_to_scalar(hibound, boundstypid); *scaledlobound = convert_network_to_scalar(lobound, boundstypid,
return true; &failure);
*scaledhibound = convert_network_to_scalar(hibound, boundstypid,
&failure);
return !failure;
} }
/* Don't know how to convert */ /* Don't know how to convert */
*scaledvalue = *scaledlobound = *scaledhibound = 0; *scaledvalue = *scaledlobound = *scaledhibound = 0;
...@@ -4145,9 +4175,12 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, ...@@ -4145,9 +4175,12 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
/* /*
* Do convert_to_scalar()'s work for any numeric data type. * Do convert_to_scalar()'s work for any numeric data type.
*
* On failure (e.g., unsupported typid), set *failure to true;
* otherwise, that variable is not changed.
*/ */
static double static double
convert_numeric_to_scalar(Datum value, Oid typid) convert_numeric_to_scalar(Datum value, Oid typid, bool *failure)
{ {
switch (typid) switch (typid)
{ {
...@@ -4183,11 +4216,7 @@ convert_numeric_to_scalar(Datum value, Oid typid) ...@@ -4183,11 +4216,7 @@ convert_numeric_to_scalar(Datum value, Oid typid)
return (double) DatumGetObjectId(value); return (double) DatumGetObjectId(value);
} }
/* *failure = true;
* Can't get here unless someone tries to use scalarineqsel() on an
* operator with one numeric and one non-numeric operand.
*/
elog(ERROR, "unsupported type: %u", typid);
return 0; return 0;
} }
...@@ -4336,11 +4365,14 @@ convert_one_string_to_scalar(char *value, int rangelo, int rangehi) ...@@ -4336,11 +4365,14 @@ convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
/* /*
* Convert a string-type Datum into a palloc'd, null-terminated string. * Convert a string-type Datum into a palloc'd, null-terminated string.
* *
* On failure (e.g., unsupported typid), set *failure to true;
* otherwise, that variable is not changed. (We'll return NULL on failure.)
*
* When using a non-C locale, we must pass the string through strxfrm() * When using a non-C locale, we must pass the string through strxfrm()
* before continuing, so as to generate correct locale-specific results. * before continuing, so as to generate correct locale-specific results.
*/ */
static char * static char *
convert_string_datum(Datum value, Oid typid) convert_string_datum(Datum value, Oid typid, bool *failure)
{ {
char *val; char *val;
...@@ -4364,12 +4396,7 @@ convert_string_datum(Datum value, Oid typid) ...@@ -4364,12 +4396,7 @@ convert_string_datum(Datum value, Oid typid)
break; break;
} }
default: default:
*failure = true;
/*
* Can't get here unless someone tries to use scalarineqsel() on
* an operator with one string and one non-string operand.
*/
elog(ERROR, "unsupported type: %u", typid);
return NULL; return NULL;
} }
...@@ -4446,16 +4473,19 @@ convert_bytea_to_scalar(Datum value, ...@@ -4446,16 +4473,19 @@ convert_bytea_to_scalar(Datum value,
Datum hibound, Datum hibound,
double *scaledhibound) double *scaledhibound)
{ {
bytea *valuep = DatumGetByteaPP(value);
bytea *loboundp = DatumGetByteaPP(lobound);
bytea *hiboundp = DatumGetByteaPP(hibound);
int rangelo, int rangelo,
rangehi, rangehi,
valuelen = VARSIZE(DatumGetPointer(value)) - VARHDRSZ, valuelen = VARSIZE_ANY_EXHDR(valuep),
loboundlen = VARSIZE(DatumGetPointer(lobound)) - VARHDRSZ, loboundlen = VARSIZE_ANY_EXHDR(loboundp),
hiboundlen = VARSIZE(DatumGetPointer(hibound)) - VARHDRSZ, hiboundlen = VARSIZE_ANY_EXHDR(hiboundp),
i, i,
minlen; minlen;
unsigned char *valstr = (unsigned char *) VARDATA(DatumGetPointer(value)), unsigned char *valstr = (unsigned char *) VARDATA_ANY(valuep);
*lostr = (unsigned char *) VARDATA(DatumGetPointer(lobound)), unsigned char *lostr = (unsigned char *) VARDATA_ANY(loboundp);
*histr = (unsigned char *) VARDATA(DatumGetPointer(hibound)); unsigned char *histr = (unsigned char *) VARDATA_ANY(hiboundp);
/* /*
* Assume bytea data is uniformly distributed across all byte values. * Assume bytea data is uniformly distributed across all byte values.
...@@ -4522,9 +4552,12 @@ convert_one_bytea_to_scalar(unsigned char *value, int valuelen, ...@@ -4522,9 +4552,12 @@ convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
/* /*
* Do convert_to_scalar()'s work for any timevalue data type. * Do convert_to_scalar()'s work for any timevalue data type.
*
* On failure (e.g., unsupported typid), set *failure to true;
* otherwise, that variable is not changed.
*/ */
static double static double
convert_timevalue_to_scalar(Datum value, Oid typid) convert_timevalue_to_scalar(Datum value, Oid typid, bool *failure)
{ {
switch (typid) switch (typid)
{ {
...@@ -4570,11 +4603,7 @@ convert_timevalue_to_scalar(Datum value, Oid typid) ...@@ -4570,11 +4603,7 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
} }
} }
/* *failure = true;
* Can't get here unless someone tries to use scalarineqsel() on an
* operator with one timevalue and one non-timevalue operand.
*/
elog(ERROR, "unsupported type: %u", typid);
return 0; return 0;
} }
......
...@@ -103,7 +103,7 @@ extern int inet_net_pton(int af, const char *src, ...@@ -103,7 +103,7 @@ extern int inet_net_pton(int af, const char *src,
void *dst, size_t size); void *dst, size_t size);
/* network.c */ /* network.c */
extern double convert_network_to_scalar(Datum value, Oid typid); extern double convert_network_to_scalar(Datum value, Oid typid, bool *failure);
extern Datum network_scan_first(Datum in); extern Datum network_scan_first(Datum in);
extern Datum network_scan_last(Datum in); extern Datum network_scan_last(Datum in);
extern void clean_ipv6_addr(int addr_family, char *addr); extern void clean_ipv6_addr(int addr_family, char *addr);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment