Commit aaa67618 authored by Tomas Vondra's avatar Tomas Vondra

Apply all available functional dependencies

When considering functional dependencies during selectivity estimation,
it's not necessary to bother with selecting the best extended statistic
object and then use just dependencies from it. We can simply consider
all applicable functional dependencies at once.

This means we need to deserialie all (applicable) dependencies before
applying them to the clauses. This is a bit more expensive than picking
the best statistics and deserializing dependencies for it. To minimize
the additional cost, we ignore statistics that are not applicable.

Author: Tomas Vondra
Reviewed-by: Mark Dilger
Discussion: https://postgr.es/m/20191028152048.jc6pqv5hb7j77ocp@development
parent 652686a3
......@@ -77,8 +77,8 @@ static bool dependency_implies_attribute(MVDependency *dependency,
AttrNumber attnum);
static bool dependency_is_compatible_clause(Node *clause, Index relid,
AttrNumber *attnum);
static MVDependency *find_strongest_dependency(StatisticExtInfo *stats,
MVDependencies *dependencies,
static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
int ndependencies,
Bitmapset *attnums);
static void
......@@ -862,10 +862,10 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
* (see the comment in dependencies_clauselist_selectivity).
*/
static MVDependency *
find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies,
find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
Bitmapset *attnums)
{
int i;
int i, j;
MVDependency *strongest = NULL;
/* number of attnums in clauses */
......@@ -876,36 +876,39 @@ find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies,
* fully-matched dependencies. We do the cheap checks first, before
* matching it against the attnums.
*/
for (i = 0; i < dependencies->ndeps; i++)
for (i = 0; i < ndependencies; i++)
{
MVDependency *dependency = dependencies->deps[i];
/*
* Skip dependencies referencing more attributes than available
* clauses, as those can't be fully matched.
*/
if (dependency->nattributes > nattnums)
continue;
if (strongest)
for (j = 0; j < dependencies[i]->ndeps; j++)
{
/* skip dependencies on fewer attributes than the strongest. */
if (dependency->nattributes < strongest->nattributes)
continue;
MVDependency *dependency = dependencies[i]->deps[j];
/* also skip weaker dependencies when attribute count matches */
if (strongest->nattributes == dependency->nattributes &&
strongest->degree > dependency->degree)
/*
* Skip dependencies referencing more attributes than available
* clauses, as those can't be fully matched.
*/
if (dependency->nattributes > nattnums)
continue;
}
/*
* this dependency is stronger, but we must still check that it's
* fully matched to these attnums. We perform this check last as it's
* slightly more expensive than the previous checks.
*/
if (dependency_is_fully_matched(dependency, attnums))
strongest = dependency; /* save new best match */
if (strongest)
{
/* skip dependencies on fewer attributes than the strongest. */
if (dependency->nattributes < strongest->nattributes)
continue;
/* also skip weaker dependencies when attribute count matches */
if (strongest->nattributes == dependency->nattributes &&
strongest->degree > dependency->degree)
continue;
}
/*
* this dependency is stronger, but we must still check that it's
* fully matched to these attnums. We perform this check last as it's
* slightly more expensive than the previous checks.
*/
if (dependency_is_fully_matched(dependency, attnums))
strongest = dependency; /* save new best match */
}
}
return strongest;
......@@ -949,10 +952,11 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
Selectivity s1 = 1.0;
ListCell *l;
Bitmapset *clauses_attnums = NULL;
StatisticExtInfo *stat;
MVDependencies *dependencies;
Bitmapset **list_attnums;
int listidx;
MVDependencies **dependencies = NULL;
int ndependencies = 0;
int i;
/* check if there's any stats that might be useful for us. */
if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
......@@ -1001,20 +1005,50 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
return 1.0;
}
/* find the best suited statistics object for these attnums */
stat = choose_best_statistics(rel->statlist, STATS_EXT_DEPENDENCIES,
list_attnums, list_length(clauses));
/*
* Load all functional dependencies matching at least two parameters. We
* can simply consider all dependencies at once, without having to search
* for the best statistics object.
*
* To not waste cycles and memory, we deserialize dependencies only for
* statistics that match at least two attributes. The array is allocated
* with the assumption that all objects match - we could grow the array
* to make it just the right size, but it's likely wasteful anyway thanks
* to moving the freed chunks to freelists etc.
*/
ndependencies = 0;
dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
list_length(rel->statlist));
foreach(l,rel->statlist)
{
StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
Bitmapset *matched;
int num_matched;
/* skip statistics that are not of the correct type */
if (stat->kind != STATS_EXT_DEPENDENCIES)
continue;
matched = bms_intersect(clauses_attnums, stat->keys);
num_matched = bms_num_members(matched);
bms_free(matched);
/* skip objects matching fewer than two attributes from clauses */
if (num_matched < 2)
continue;
dependencies[ndependencies++]
= statext_dependencies_load(stat->statOid);
}
/* if no matching stats could be found then we've nothing to do */
if (!stat)
if (!ndependencies)
{
pfree(list_attnums);
return 1.0;
}
/* load the dependency items stored in the statistics object */
dependencies = statext_dependencies_load(stat->statOid);
/*
* Apply the dependencies recursively, starting with the widest/strongest
* ones, and proceeding to the smaller/weaker ones. At the end of each
......@@ -1027,7 +1061,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
MVDependency *dependency;
/* the widest/strongest dependency, fully matched by clauses */
dependency = find_strongest_dependency(stat, dependencies,
dependency = find_strongest_dependency(dependencies, ndependencies,
clauses_attnums);
/* if no suitable dependency was found, we're done */
......@@ -1097,6 +1131,10 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
s1 *= (dependency->degree + (1 - dependency->degree) * s2);
}
/* free deserialized functional dependencies (and then the array) */
for (i = 0; i < ndependencies; i++)
pfree(dependencies[i]);
pfree(dependencies);
pfree(list_attnums);
......
......@@ -451,6 +451,63 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
50 | 50
(1 row)
-- check the ability to use multiple functional dependencies
CREATE TABLE functional_dependencies_multi (
a INTEGER,
b INTEGER,
c INTEGER,
d INTEGER
);
INSERT INTO functional_dependencies_multi (a, b, c, d)
SELECT
mod(i,7),
mod(i,7),
mod(i,11),
mod(i,11)
FROM generate_series(1,5000) s(i);
ANALYZE functional_dependencies_multi;
-- estimates without any functional dependencies
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0');
estimated | actual
-----------+--------
102 | 714
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0');
estimated | actual
-----------+--------
41 | 454
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
estimated | actual
-----------+--------
1 | 64
(1 row)
-- create separate functional dependencies
CREATE STATISTICS functional_dependencies_multi_1 (dependencies) ON a, b FROM functional_dependencies_multi;
CREATE STATISTICS functional_dependencies_multi_2 (dependencies) ON c, d FROM functional_dependencies_multi;
ANALYZE functional_dependencies_multi;
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0');
estimated | actual
-----------+--------
714 | 714
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0');
estimated | actual
-----------+--------
454 | 454
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
estimated | actual
-----------+--------
65 | 64
(1 row)
DROP TABLE functional_dependencies_multi;
-- MCV lists
CREATE TABLE mcv_lists (
filler1 TEXT,
......
......@@ -291,6 +291,41 @@ ANALYZE functional_dependencies;
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
-- check the ability to use multiple functional dependencies
CREATE TABLE functional_dependencies_multi (
a INTEGER,
b INTEGER,
c INTEGER,
d INTEGER
);
INSERT INTO functional_dependencies_multi (a, b, c, d)
SELECT
mod(i,7),
mod(i,7),
mod(i,11),
mod(i,11)
FROM generate_series(1,5000) s(i);
ANALYZE functional_dependencies_multi;
-- estimates without any functional dependencies
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0');
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0');
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
-- create separate functional dependencies
CREATE STATISTICS functional_dependencies_multi_1 (dependencies) ON a, b FROM functional_dependencies_multi;
CREATE STATISTICS functional_dependencies_multi_2 (dependencies) ON c, d FROM functional_dependencies_multi;
ANALYZE functional_dependencies_multi;
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0');
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0');
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
DROP TABLE functional_dependencies_multi;
-- MCV lists
CREATE TABLE mcv_lists (
filler1 TEXT,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment