Commit cad764f3 authored by Tom Lane's avatar Tom Lane

Improve selectivity estimation involving string constants: pay attention

to more than one character, and try to do the right thing in non-ASCII
locales.
parent 1d5e7a6f
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.60 2000/03/20 15:42:46 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.61 2000/03/23 00:55:42 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -48,6 +48,8 @@ ...@@ -48,6 +48,8 @@
/* default selectivity estimate for inequalities such as "A < b" */ /* default selectivity estimate for inequalities such as "A < b" */
#define DEFAULT_INEQ_SEL (1.0 / 3.0) #define DEFAULT_INEQ_SEL (1.0 / 3.0)
static bool convert_string_to_scalar(char *str, int strlength,
double *scaleval);
static void getattproperties(Oid relid, AttrNumber attnum, static void getattproperties(Oid relid, AttrNumber attnum,
Oid *typid, Oid *typid,
int *typlen, int *typlen,
...@@ -472,9 +474,8 @@ scalargtjoinsel(Oid opid, ...@@ -472,9 +474,8 @@ scalargtjoinsel(Oid opid,
* All numeric datatypes are simply converted to their equivalent * All numeric datatypes are simply converted to their equivalent
* "double" values. * "double" values.
* *
* String datatypes are converted to a crude scale using their first character * String datatypes are converted by convert_string_to_scalar(),
* (only if it is in the ASCII range, to try to avoid problems with non-ASCII * which is explained below.
* collating sequences).
* *
* The several datatypes representing absolute times are all converted * The several datatypes representing absolute times are all converted
* to Timestamp, which is actually a double, and then we just use that * to Timestamp, which is actually a double, and then we just use that
...@@ -525,40 +526,25 @@ convert_to_scalar(Datum value, Oid typid, ...@@ -525,40 +526,25 @@ convert_to_scalar(Datum value, Oid typid,
*/ */
case CHAROID: case CHAROID:
{ {
char ch = DatumGetChar(value); char ch = DatumGetChar(value);
if (ch >= 0 && ch < 127) return convert_string_to_scalar(&ch, 1, scaleval);
{
*scaleval = (double) ch;
return true;
}
break;
} }
case BPCHAROID: case BPCHAROID:
case VARCHAROID: case VARCHAROID:
case TEXTOID: case TEXTOID:
if (VARSIZE(DatumGetPointer(value)) > VARHDRSZ) {
{ char *str = (char *) VARDATA(DatumGetPointer(value));
char ch = * (char *) VARDATA(DatumGetPointer(value)); int strlength = VARSIZE(DatumGetPointer(value)) - VARHDRSZ;
if (ch >= 0 && ch < 127) return convert_string_to_scalar(str, strlength, scaleval);
{ }
*scaleval = (double) ch;
return true;
}
}
break;
case NAMEOID: case NAMEOID:
{ {
NameData *nm = (NameData *) DatumGetPointer(value); NameData *nm = (NameData *) DatumGetPointer(value);
char ch = NameStr(*nm)[0];
if (ch >= 0 && ch < 127) return convert_string_to_scalar(NameStr(*nm), strlen(NameStr(*nm)),
{ scaleval);
*scaleval = (double) ch;
return true;
}
break;
} }
/* /*
...@@ -644,6 +630,88 @@ convert_to_scalar(Datum value, Oid typid, ...@@ -644,6 +630,88 @@ convert_to_scalar(Datum value, Oid typid,
return false; return false;
} }
/*
* Do convert_to_scalar()'s work for any character-string data type.
*
* String datatypes are converted to a scale that ranges from 0 to 1, where
* we visualize the bytes of the string as fractional base-256 digits.
* It's sufficient to consider the first few bytes, since double has only
* limited precision (and we can't expect huge accuracy in our selectivity
* predictions anyway!)
*
* If USE_LOCALE is defined, we must pass the string through strxfrm()
* before doing the computation, so as to generate correct locale-specific
* results.
*/
static bool
convert_string_to_scalar(char *str, int strlength,
double *scaleval)
{
unsigned char *sptr;
int slen;
#ifdef USE_LOCALE
char *rawstr;
char *xfrmstr;
size_t xfrmsize;
size_t xfrmlen;
#endif
double num,
denom;
if (strlength <= 0)
{
*scaleval = 0; /* empty string has scalar value 0 */
return true;
}
#ifdef USE_LOCALE
/* Need a null-terminated string to pass to strxfrm() */
rawstr = (char *) palloc(strlength + 1);
memcpy(rawstr, str, strlength);
rawstr[strlength] = '\0';
/* Guess that transformed string is not much bigger */
xfrmsize = strlength + 32; /* arbitrary pad value here... */
xfrmstr = (char *) palloc(xfrmsize);
xfrmlen = strxfrm(xfrmstr, rawstr, xfrmsize);
if (xfrmlen >= xfrmsize)
{
/* Oops, didn't make it */
pfree(xfrmstr);
xfrmstr = (char *) palloc(xfrmlen+1);
xfrmlen = strxfrm(xfrmstr, rawstr, xfrmlen+1);
}
pfree(rawstr);
sptr = (unsigned char *) xfrmstr;
slen = xfrmlen;
#else
sptr = (unsigned char *) str;
slen = strlength;
#endif
/* No need to consider more than about 8 bytes (sizeof double) */
if (slen > 8)
slen = 8;
/* Convert initial characters to fraction */
num = 0.0;
denom = 256.0;
while (slen-- > 0)
{
num += ((double) (*sptr++)) / denom;
denom *= 256.0;
}
#ifdef USE_LOCALE
pfree(xfrmstr);
#endif
*scaleval = num;
return true;
}
/* /*
* getattproperties * getattproperties
* Retrieve pg_attribute properties for an attribute, * Retrieve pg_attribute properties for an attribute,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment