Commit 51db6455 authored by Tom Lane's avatar Tom Lane

Repair error noticed by Roberto Cornacchia: selectivity code

was rejecting negative attnums as bogus, which of course they are not.
Add code to get_attdisbursion to produce a useful value for OID attribute,
since VACUUM does not store stats for system attributes.
Also, repair bug that's been in eqjoinsel for a long time: it was taking
the max of the two columns' disbursions, whereas it should use the min.
parent 45500964
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.25 1999/07/25 23:07:24 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.26 1999/09/09 02:35:47 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -230,24 +230,24 @@ compute_clause_selec(Query *root, Node *clause) ...@@ -230,24 +230,24 @@ compute_clause_selec(Query *root, Node *clause)
int flag; int flag;
get_relattval(clause, 0, &relidx, &attno, &constval, &flag); get_relattval(clause, 0, &relidx, &attno, &constval, &flag);
if (relidx <= 0 || attno <= 0) if (relidx && attno)
s1 = (Cost) restriction_selectivity(oprrest,
opno,
getrelid(relidx,
root->rtable),
attno,
constval,
flag);
else
{ {
/* /*
* attno can be Invalid if the clause had a function in it, * attno can be 0 if the clause had a function in it,
* i.e. WHERE myFunc(f) = 10 * i.e. WHERE myFunc(f) = 10
* *
* XXX should be FIXED to use function selectivity * XXX should be FIXED to use function selectivity
*/ */
s1 = (Cost) (0.5); s1 = (Cost) (0.5);
} }
else
s1 = (Cost) restriction_selectivity(oprrest,
opno,
getrelid(relidx,
root->rtable),
attno,
constval,
flag);
} }
} }
else else
...@@ -274,7 +274,8 @@ compute_clause_selec(Query *root, Node *clause) ...@@ -274,7 +274,8 @@ compute_clause_selec(Query *root, Node *clause)
attno2; attno2;
get_rels_atts(clause, &relid1, &attno1, &relid2, &attno2); get_rels_atts(clause, &relid1, &attno1, &relid2, &attno2);
if (relid1 > 0 && relid2 > 0 && attno1 > 0 && attno2 > 0) if (relid1 && relid2 && attno1 && attno2)
s1 = (Cost) join_selectivity(oprjoin, s1 = (Cost) join_selectivity(oprjoin,
opno, opno,
getrelid(relid1, getrelid(relid1,
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.50 1999/08/26 05:09:05 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.51 1999/09/09 02:35:53 tgl Exp $
* *
* HISTORY * HISTORY
* AUTHOR DATE MAJOR EVENT * AUTHOR DATE MAJOR EVENT
...@@ -116,7 +116,10 @@ make_opclause(Oper *op, Var *leftop, Var *rightop) ...@@ -116,7 +116,10 @@ make_opclause(Oper *op, Var *leftop, Var *rightop)
* *
* Returns the left operand of a clause of the form (op expr expr) * Returns the left operand of a clause of the form (op expr expr)
* or (op expr) * or (op expr)
* NB: it is assumed (for now) that all expr must be Var nodes *
* NB: for historical reasons, the result is declared Var *, even
* though many callers can cope with results that are not Vars.
* The result really ought to be declared Expr * or Node *.
*/ */
Var * Var *
get_leftop(Expr *clause) get_leftop(Expr *clause)
...@@ -549,8 +552,11 @@ NumRelids(Node *clause) ...@@ -549,8 +552,11 @@ NumRelids(Node *clause)
* if the "something" is a constant, the value of the constant * if the "something" is a constant, the value of the constant
* flags indicating whether a constant was found, and on which side. * flags indicating whether a constant was found, and on which side.
* Default values are returned if the expression is too complicated, * Default values are returned if the expression is too complicated,
* specifically -1 for the relid and attno, 0 for the constant value. * specifically 0 for the relid and attno, 0 for the constant value.
* Note that InvalidAttrNumber is *not* -1, but 0. *
* Note that negative attno values are *not* invalid, but represent
* system attributes such as OID. It's sufficient to check for relid=0
* to determine whether the routine succeeded.
*/ */
void void
get_relattval(Node *clause, get_relattval(Node *clause,
...@@ -610,8 +616,8 @@ get_relattval(Node *clause, ...@@ -610,8 +616,8 @@ get_relattval(Node *clause,
{ {
/* Duh, it's too complicated for me... */ /* Duh, it's too complicated for me... */
default_results: default_results:
*relid = -1; *relid = 0;
*attno = -1; *attno = 0;
*constval = 0; *constval = 0;
*flag = 0; *flag = 0;
return; return;
...@@ -663,7 +669,7 @@ static int is_single_func(Node *node) ...@@ -663,7 +669,7 @@ static int is_single_func(Node *node)
* for a joinclause. * for a joinclause.
* *
* If the clause is not of the form (var op var) or if any of the vars * If the clause is not of the form (var op var) or if any of the vars
* refer to nested attributes, then -1's are returned. * refer to nested attributes, then zeroes are returned.
* *
*/ */
void void
...@@ -674,10 +680,10 @@ get_rels_atts(Node *clause, ...@@ -674,10 +680,10 @@ get_rels_atts(Node *clause,
AttrNumber *attno2) AttrNumber *attno2)
{ {
/* set default values */ /* set default values */
*relid1 = -1; *relid1 = 0;
*attno1 = -1; *attno1 = 0;
*relid2 = -1; *relid2 = 0;
*attno2 = -1; *attno2 = 0;
if (is_opclause(clause)) if (is_opclause(clause))
{ {
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.36 1999/07/25 23:07:26 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.37 1999/09/09 02:35:53 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -288,7 +288,7 @@ index_selectivity(Query *root, ...@@ -288,7 +288,7 @@ index_selectivity(Query *root,
} }
/* /*
* restriction_selectivity in lisp system. * restriction_selectivity
* *
* NOTE: The routine is now merged with RestrictionClauseSelectivity * NOTE: The routine is now merged with RestrictionClauseSelectivity
* as defined in plancat.c * as defined in plancat.c
...@@ -298,7 +298,7 @@ index_selectivity(Query *root, ...@@ -298,7 +298,7 @@ index_selectivity(Query *root,
* operator relation, by calling the function manager. * operator relation, by calling the function manager.
* *
* XXX The assumption in the selectivity procedures is that if the * XXX The assumption in the selectivity procedures is that if the
* relation OIDs or attribute numbers are -1, then the clause * relation OIDs or attribute numbers are 0, then the clause
* isn't of the form (op var const). * isn't of the form (op var const).
*/ */
Cost Cost
...@@ -337,7 +337,7 @@ restriction_selectivity(Oid functionObjectId, ...@@ -337,7 +337,7 @@ restriction_selectivity(Oid functionObjectId,
* information. * information.
* *
* XXX The assumption in the selectivity procedures is that if the * XXX The assumption in the selectivity procedures is that if the
* relation OIDs or attribute numbers are -1, then the clause * relation OIDs or attribute numbers are 0, then the clause
* isn't of the form (op var var). * isn't of the form (op var var).
*/ */
Cost Cost
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.39 1999/08/21 00:56:18 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.40 1999/09/09 02:35:58 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include "utils/syscache.h" #include "utils/syscache.h"
/* N is not a valid var/constant or relation id */ /* N is not a valid var/constant or relation id */
#define NONVALUE(N) ((N) == -1) #define NONVALUE(N) ((N) == 0)
/* are we looking at a functional index selectivity request? */ /* are we looking at a functional index selectivity request? */
#define FunctionalSelectivity(nIndKeys,attNum) ((attNum)==InvalidAttrNumber) #define FunctionalSelectivity(nIndKeys,attNum) ((attNum)==InvalidAttrNumber)
...@@ -170,7 +170,9 @@ eqsel(Oid opid, ...@@ -170,7 +170,9 @@ eqsel(Oid opid,
else else
{ {
/* No VACUUM ANALYZE stats available, so make a guess using /* No VACUUM ANALYZE stats available, so make a guess using
* the disbursion stat (if we have that, which is unlikely...) * the disbursion stat (if we have that, which is unlikely
* for a normal attribute; but for a system attribute we may
* be able to estimate it).
*/ */
selec = get_attdisbursion(relid, attno, 0.01); selec = get_attdisbursion(relid, attno, 0.01);
} }
...@@ -366,7 +368,7 @@ eqjoinsel(Oid opid, ...@@ -366,7 +368,7 @@ eqjoinsel(Oid opid,
float64 result; float64 result;
float64data num1, float64data num1,
num2, num2,
max; min;
result = (float64) palloc(sizeof(float64data)); result = (float64) palloc(sizeof(float64data));
if (NONVALUE(attno1) || NONVALUE(relid1) || if (NONVALUE(attno1) || NONVALUE(relid1) ||
...@@ -376,11 +378,23 @@ eqjoinsel(Oid opid, ...@@ -376,11 +378,23 @@ eqjoinsel(Oid opid,
{ {
num1 = get_attdisbursion(relid1, attno1, 0.01); num1 = get_attdisbursion(relid1, attno1, 0.01);
num2 = get_attdisbursion(relid2, attno2, 0.01); num2 = get_attdisbursion(relid2, attno2, 0.01);
max = (num1 > num2) ? num1 : num2; /*
if (max <= 0) * The join selectivity cannot be more than num2, since each
*result = 1.0; * tuple in table 1 could match no more than num2 fraction of
else * tuples in table 2 (and that's only if the table-1 tuple
*result = max; * matches the most common value in table 2, so probably it's
* less). By the same reasoning it is not more than num1.
* The min is therefore an upper bound.
*
* XXX can we make a better estimate here? Using the nullfrac
* statistic might be helpful, for example. Assuming the operator
* is strict (does not succeed for null inputs) then the selectivity
* couldn't be more than (1-nullfrac1)*(1-nullfrac2), which might
* be usefully small if there are many nulls. How about applying
* the operator to the most common values?
*/
min = (num1 < num2) ? num1 : num2;
*result = min;
} }
return result; return result;
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Copyright (c) 1994, Regents of the University of California * Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.33 1999/08/16 02:06:25 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.34 1999/09/09 02:36:04 tgl Exp $
* *
* NOTES * NOTES
* Eventually, the index information should go through here, too. * Eventually, the index information should go through here, too.
...@@ -223,6 +223,14 @@ get_attdisbursion(Oid relid, AttrNumber attnum, double min_estimate) ...@@ -223,6 +223,14 @@ get_attdisbursion(Oid relid, AttrNumber attnum, double min_estimate)
if (disbursion < 0.0) /* VACUUM thinks there are no duplicates */ if (disbursion < 0.0) /* VACUUM thinks there are no duplicates */
return 1.0 / (double) ntuples; return 1.0 / (double) ntuples;
/*
* VACUUM ANALYZE does not compute disbursion for system attributes,
* but some of them can reasonably be assumed unique anyway.
*/
if (attnum == ObjectIdAttributeNumber ||
attnum == SelfItemPointerAttributeNumber)
return 1.0 / (double) ntuples;
/* /*
* VACUUM ANALYZE has not been run for this table. * VACUUM ANALYZE has not been run for this table.
* Produce an estimate = 1/numtuples. This may produce * Produce an estimate = 1/numtuples. This may produce
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment