Commit ccaa3569 authored by Tomas Vondra's avatar Tomas Vondra

Recognize some OR clauses as compatible with functional dependencies

Since commit 8f321bd1 functional dependencies can handle IN clauses,
which however introduced a possible (and surprising) inconsistency,
because IN clauses may be expressed as an OR clause, which are still
considered incompatible. For example

  a IN (1, 2, 3)

may be rewritten as

  (a = 1 OR a = 2 OR a = 3)

The IN clause will work fine with functional dependencies, but the OR
clause will force the estimation to fall back to plain per-column
estimates, possibly introducing significant estimation errors.

This commit recognizes OR clauses equivalent to an IN clause (when all
arugments are compatible and reference the same attribute) as a special
case, compatible with functional dependencies. This allows applying
functional dependencies, just like for IN clauses.

This does not eliminate the difference in estimating the clause itself,
i.e. IN clause and OR clause still use different formulas. It would be
possible to change that (for these special OR clauses), but that's not
really about extended statistics - it was always like this. Moreover the
errors are usually much smaller compared to ignoring dependencies.

Author: Tomas Vondra
Reviewed-by: Dean Rasheed
Discussion: https://www.postgresql.org/message-id/flat/13902317.Eha0YfKkKy%40pierred-pdoc
parent dbe0d989
...@@ -753,24 +753,27 @@ pg_dependencies_send(PG_FUNCTION_ARGS) ...@@ -753,24 +753,27 @@ pg_dependencies_send(PG_FUNCTION_ARGS)
static bool static bool
dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
{ {
RestrictInfo *rinfo = (RestrictInfo *) clause;
Var *var; Var *var;
if (!IsA(rinfo, RestrictInfo)) if (IsA(clause, RestrictInfo))
return false; {
RestrictInfo *rinfo = (RestrictInfo *) clause;
/* Pseudoconstants are not interesting (they couldn't contain a Var) */ /* Pseudoconstants are not interesting (they couldn't contain a Var) */
if (rinfo->pseudoconstant) if (rinfo->pseudoconstant)
return false; return false;
/* Clauses referencing multiple, or no, varnos are incompatible */ /* Clauses referencing multiple, or no, varnos are incompatible */
if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON) if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
return false; return false;
if (is_opclause(rinfo->clause)) clause = (Node *) rinfo->clause;
}
if (is_opclause(clause))
{ {
/* If it's an opclause, check for Var = Const or Const = Var. */ /* If it's an opclause, check for Var = Const or Const = Var. */
OpExpr *expr = (OpExpr *) rinfo->clause; OpExpr *expr = (OpExpr *) clause;
/* Only expressions with two arguments are candidates. */ /* Only expressions with two arguments are candidates. */
if (list_length(expr->args) != 2) if (list_length(expr->args) != 2)
...@@ -801,10 +804,10 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) ...@@ -801,10 +804,10 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
/* OK to proceed with checking "var" */ /* OK to proceed with checking "var" */
} }
else if (IsA(rinfo->clause, ScalarArrayOpExpr)) else if (IsA(clause, ScalarArrayOpExpr))
{ {
/* If it's an scalar array operator, check for Var IN Const. */ /* If it's an scalar array operator, check for Var IN Const. */
ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) rinfo->clause; ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
/* /*
* Reject ALL() variant, we only care about ANY/IN. * Reject ALL() variant, we only care about ANY/IN.
...@@ -839,13 +842,43 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) ...@@ -839,13 +842,43 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
/* OK to proceed with checking "var" */ /* OK to proceed with checking "var" */
} }
else if (is_notclause(rinfo->clause)) else if (is_orclause(clause))
{
BoolExpr *expr = (BoolExpr *) clause;
ListCell *lc;
/* start with no attribute number */
*attnum = InvalidAttrNumber;
foreach(lc, expr->args)
{
AttrNumber clause_attnum;
/*
* Had we found incompatible clause in the arguments, treat the
* whole clause as incompatible.
*/
if (!dependency_is_compatible_clause((Node *) lfirst(lc),
relid, &clause_attnum))
return false;
if (*attnum == InvalidAttrNumber)
*attnum = clause_attnum;
if (*attnum != clause_attnum)
return false;
}
/* the Var is already checked by the recursive call */
return true;
}
else if (is_notclause(clause))
{ {
/* /*
* "NOT x" can be interpreted as "x = false", so get the argument and * "NOT x" can be interpreted as "x = false", so get the argument and
* proceed with seeing if it's a suitable Var. * proceed with seeing if it's a suitable Var.
*/ */
var = (Var *) get_notclausearg(rinfo->clause); var = (Var *) get_notclausearg(clause);
} }
else else
{ {
...@@ -853,7 +886,7 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) ...@@ -853,7 +886,7 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
* A boolean expression "x" can be interpreted as "x = true", so * A boolean expression "x" can be interpreted as "x = true", so
* proceed with seeing if it's a suitable Var. * proceed with seeing if it's a suitable Var.
*/ */
var = (Var *) rinfo->clause; var = (Var *) clause;
} }
/* /*
......
...@@ -458,6 +458,32 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ...@@ -458,6 +458,32 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
3 | 400 3 | 400
(1 row) (1 row)
-- OR clauses referencing the same attribute
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1''');
estimated | actual
-----------+--------
2 | 100
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')');
estimated | actual
-----------+--------
4 | 100
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')');
estimated | actual
-----------+--------
8 | 200
(1 row)
-- OR clauses referencing different attributes
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1''');
estimated | actual
-----------+--------
3 | 100
(1 row)
-- ANY -- ANY
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1'''); SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');
estimated | actual estimated | actual
...@@ -592,6 +618,32 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ...@@ -592,6 +618,32 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
400 | 400 400 | 400
(1 row) (1 row)
-- OR clauses referencing the same attribute
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1''');
estimated | actual
-----------+--------
99 | 100
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')');
estimated | actual
-----------+--------
99 | 100
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')');
estimated | actual
-----------+--------
197 | 200
(1 row)
-- OR clauses referencing different attributes are incompatible
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1''');
estimated | actual
-----------+--------
3 | 100
(1 row)
-- ANY -- ANY
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1'''); SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');
estimated | actual estimated | actual
......
...@@ -286,6 +286,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ...@@ -286,6 +286,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)'); SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)');
-- OR clauses referencing the same attribute
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1''');
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')');
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')');
-- OR clauses referencing different attributes
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1''');
-- ANY -- ANY
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1'''); SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');
...@@ -338,6 +348,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ...@@ -338,6 +348,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)'); SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)');
-- OR clauses referencing the same attribute
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1''');
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')');
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')');
-- OR clauses referencing different attributes are incompatible
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1''');
-- ANY -- ANY
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1'''); SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment