Commit 50ba70a9 authored by Tomas Vondra's avatar Tomas Vondra

Identify simple column references in extended statistics

Until now, when defining extended statistics, everything except a plain
column reference was treated as complex expression. So for example "a"
was a column reference, but "(a)" would be an expression. In most cases
this does not matter much, but there were a couple strange consequences.
For example

    CREATE STATISTICS s ON a FROM t;

would fail, because extended stats require at least two columns. But

    CREATE STATISTICS s ON (a) FROM t;

would succeed, because that requirement does not apply to expressions.
Moreover, that statistics object is useless - the optimizer will always
use the regular statistics collected for attribute "a".

So do a bit more work to identify those expressions referencing a single
column, and translate them to a simple column reference. Backpatch to
14, where support for extended statistics on expressions was introduced.

Reported-by: Justin Pryzby
Backpatch-through: 14
Discussion: https://postgr.es/m/20210816013255.GS10479%40telsasoft.com
parent d760d942
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include "optimizer/optimizer.h" #include "optimizer/optimizer.h"
#include "statistics/statistics.h" #include "statistics/statistics.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/fmgroids.h" #include "utils/fmgroids.h"
#include "utils/inval.h" #include "utils/inval.h"
#include "utils/memutils.h" #include "utils/memutils.h"
...@@ -210,13 +211,15 @@ CreateStatistics(CreateStatsStmt *stmt) ...@@ -210,13 +211,15 @@ CreateStatistics(CreateStatsStmt *stmt)
/* /*
* Convert the expression list to a simple array of attnums, but also keep * Convert the expression list to a simple array of attnums, but also keep
* a list of more complex expressions. While at it, enforce some * a list of more complex expressions. While at it, enforce some
* constraints. * constraints - we don't allow extended statistics on system attributes,
* and we require the data type to have less-than operator.
* *
* XXX We do only the bare minimum to separate simple attribute and * There are many ways how to "mask" a simple attribute refenrece as an
* complex expressions - for example "(a)" will be treated as a complex * expression, for example "(a+0)" etc. We can't possibly detect all of
* expression. No matter how elaborate the check is, there'll always be a * them, but we handle at least the simple case with attribute in parens.
* way around it, if the user is determined (consider e.g. "(a+0)"), so * There'll always be a way around this, if the user is determined (like
* it's not worth protecting against it. * the "(a+0)" example), but this makes it somewhat consistent with how
* indexes treat attributes/expressions.
*/ */
foreach(cell, stmt->exprs) foreach(cell, stmt->exprs)
{ {
...@@ -257,6 +260,28 @@ CreateStatistics(CreateStatsStmt *stmt) ...@@ -257,6 +260,28 @@ CreateStatistics(CreateStatsStmt *stmt)
nattnums++; nattnums++;
ReleaseSysCache(atttuple); ReleaseSysCache(atttuple);
} }
else if (IsA(selem->expr, Var)) /* column reference in parens */
{
Var *var = (Var *) selem->expr;
TypeCacheEntry *type;
/* Disallow use of system attributes in extended stats */
if (var->varattno <= 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("statistics creation on system columns is not supported")));
/* Disallow data types without a less-than operator */
type = lookup_type_cache(var->vartype, TYPECACHE_LT_OPR);
if (type->lt_opr == InvalidOid)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
get_attname(relid, var->varattno, false), format_type_be(var->vartype))));
attnums[nattnums] = var->varattno;
nattnums++;
}
else /* expression */ else /* expression */
{ {
Node *expr = selem->expr; Node *expr = selem->expr;
......
...@@ -55,6 +55,8 @@ ERROR: duplicate expression in statistics definition ...@@ -55,6 +55,8 @@ ERROR: duplicate expression in statistics definition
CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test; CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test;
ERROR: unrecognized statistics kind "unrecognized" ERROR: unrecognized statistics kind "unrecognized"
-- incorrect expressions -- incorrect expressions
CREATE STATISTICS tst ON (y) FROM ext_stats_test; -- single column reference
ERROR: extended statistics require at least 2 columns
CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses
ERROR: syntax error at or near "+" ERROR: syntax error at or near "+"
LINE 1: CREATE STATISTICS tst ON y + z FROM ext_stats_test; LINE 1: CREATE STATISTICS tst ON y + z FROM ext_stats_test;
......
...@@ -41,6 +41,7 @@ CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x') ...@@ -41,6 +41,7 @@ CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x')
CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), y FROM ext_stats_test; CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), y FROM ext_stats_test;
CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test; CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test;
-- incorrect expressions -- incorrect expressions
CREATE STATISTICS tst ON (y) FROM ext_stats_test; -- single column reference
CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses
CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; -- tuple expression CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; -- tuple expression
DROP TABLE ext_stats_test; DROP TABLE ext_stats_test;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment