Commit aaa67618 authored by Tomas Vondra's avatar Tomas Vondra

Apply all available functional dependencies

When considering functional dependencies during selectivity estimation,
it's not necessary to bother with selecting the best extended statistic
object and then use just dependencies from it. We can simply consider
all applicable functional dependencies at once.

This means we need to deserialie all (applicable) dependencies before
applying them to the clauses. This is a bit more expensive than picking
the best statistics and deserializing dependencies for it. To minimize
the additional cost, we ignore statistics that are not applicable.

Author: Tomas Vondra
Reviewed-by: Mark Dilger
Discussion: https://postgr.es/m/20191028152048.jc6pqv5hb7j77ocp@development
parent 652686a3
...@@ -77,8 +77,8 @@ static bool dependency_implies_attribute(MVDependency *dependency, ...@@ -77,8 +77,8 @@ static bool dependency_implies_attribute(MVDependency *dependency,
AttrNumber attnum); AttrNumber attnum);
static bool dependency_is_compatible_clause(Node *clause, Index relid, static bool dependency_is_compatible_clause(Node *clause, Index relid,
AttrNumber *attnum); AttrNumber *attnum);
static MVDependency *find_strongest_dependency(StatisticExtInfo *stats, static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
MVDependencies *dependencies, int ndependencies,
Bitmapset *attnums); Bitmapset *attnums);
static void static void
...@@ -862,10 +862,10 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) ...@@ -862,10 +862,10 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
* (see the comment in dependencies_clauselist_selectivity). * (see the comment in dependencies_clauselist_selectivity).
*/ */
static MVDependency * static MVDependency *
find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies, find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
Bitmapset *attnums) Bitmapset *attnums)
{ {
int i; int i, j;
MVDependency *strongest = NULL; MVDependency *strongest = NULL;
/* number of attnums in clauses */ /* number of attnums in clauses */
...@@ -876,9 +876,11 @@ find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies, ...@@ -876,9 +876,11 @@ find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies,
* fully-matched dependencies. We do the cheap checks first, before * fully-matched dependencies. We do the cheap checks first, before
* matching it against the attnums. * matching it against the attnums.
*/ */
for (i = 0; i < dependencies->ndeps; i++) for (i = 0; i < ndependencies; i++)
{ {
MVDependency *dependency = dependencies->deps[i]; for (j = 0; j < dependencies[i]->ndeps; j++)
{
MVDependency *dependency = dependencies[i]->deps[j];
/* /*
* Skip dependencies referencing more attributes than available * Skip dependencies referencing more attributes than available
...@@ -907,6 +909,7 @@ find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies, ...@@ -907,6 +909,7 @@ find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies,
if (dependency_is_fully_matched(dependency, attnums)) if (dependency_is_fully_matched(dependency, attnums))
strongest = dependency; /* save new best match */ strongest = dependency; /* save new best match */
} }
}
return strongest; return strongest;
} }
...@@ -949,10 +952,11 @@ dependencies_clauselist_selectivity(PlannerInfo *root, ...@@ -949,10 +952,11 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
Selectivity s1 = 1.0; Selectivity s1 = 1.0;
ListCell *l; ListCell *l;
Bitmapset *clauses_attnums = NULL; Bitmapset *clauses_attnums = NULL;
StatisticExtInfo *stat;
MVDependencies *dependencies;
Bitmapset **list_attnums; Bitmapset **list_attnums;
int listidx; int listidx;
MVDependencies **dependencies = NULL;
int ndependencies = 0;
int i;
/* check if there's any stats that might be useful for us. */ /* check if there's any stats that might be useful for us. */
if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES)) if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
...@@ -1001,20 +1005,50 @@ dependencies_clauselist_selectivity(PlannerInfo *root, ...@@ -1001,20 +1005,50 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
return 1.0; return 1.0;
} }
/* find the best suited statistics object for these attnums */ /*
stat = choose_best_statistics(rel->statlist, STATS_EXT_DEPENDENCIES, * Load all functional dependencies matching at least two parameters. We
list_attnums, list_length(clauses)); * can simply consider all dependencies at once, without having to search
* for the best statistics object.
*
* To not waste cycles and memory, we deserialize dependencies only for
* statistics that match at least two attributes. The array is allocated
* with the assumption that all objects match - we could grow the array
* to make it just the right size, but it's likely wasteful anyway thanks
* to moving the freed chunks to freelists etc.
*/
ndependencies = 0;
dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
list_length(rel->statlist));
foreach(l,rel->statlist)
{
StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
Bitmapset *matched;
int num_matched;
/* skip statistics that are not of the correct type */
if (stat->kind != STATS_EXT_DEPENDENCIES)
continue;
matched = bms_intersect(clauses_attnums, stat->keys);
num_matched = bms_num_members(matched);
bms_free(matched);
/* skip objects matching fewer than two attributes from clauses */
if (num_matched < 2)
continue;
dependencies[ndependencies++]
= statext_dependencies_load(stat->statOid);
}
/* if no matching stats could be found then we've nothing to do */ /* if no matching stats could be found then we've nothing to do */
if (!stat) if (!ndependencies)
{ {
pfree(list_attnums); pfree(list_attnums);
return 1.0; return 1.0;
} }
/* load the dependency items stored in the statistics object */
dependencies = statext_dependencies_load(stat->statOid);
/* /*
* Apply the dependencies recursively, starting with the widest/strongest * Apply the dependencies recursively, starting with the widest/strongest
* ones, and proceeding to the smaller/weaker ones. At the end of each * ones, and proceeding to the smaller/weaker ones. At the end of each
...@@ -1027,7 +1061,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root, ...@@ -1027,7 +1061,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
MVDependency *dependency; MVDependency *dependency;
/* the widest/strongest dependency, fully matched by clauses */ /* the widest/strongest dependency, fully matched by clauses */
dependency = find_strongest_dependency(stat, dependencies, dependency = find_strongest_dependency(dependencies, ndependencies,
clauses_attnums); clauses_attnums);
/* if no suitable dependency was found, we're done */ /* if no suitable dependency was found, we're done */
...@@ -1097,6 +1131,10 @@ dependencies_clauselist_selectivity(PlannerInfo *root, ...@@ -1097,6 +1131,10 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
s1 *= (dependency->degree + (1 - dependency->degree) * s2); s1 *= (dependency->degree + (1 - dependency->degree) * s2);
} }
/* free deserialized functional dependencies (and then the array) */
for (i = 0; i < ndependencies; i++)
pfree(dependencies[i]);
pfree(dependencies); pfree(dependencies);
pfree(list_attnums); pfree(list_attnums);
......
...@@ -451,6 +451,63 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ...@@ -451,6 +451,63 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
50 | 50 50 | 50
(1 row) (1 row)
-- check the ability to use multiple functional dependencies
CREATE TABLE functional_dependencies_multi (
a INTEGER,
b INTEGER,
c INTEGER,
d INTEGER
);
INSERT INTO functional_dependencies_multi (a, b, c, d)
SELECT
mod(i,7),
mod(i,7),
mod(i,11),
mod(i,11)
FROM generate_series(1,5000) s(i);
ANALYZE functional_dependencies_multi;
-- estimates without any functional dependencies
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0');
estimated | actual
-----------+--------
102 | 714
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0');
estimated | actual
-----------+--------
41 | 454
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
estimated | actual
-----------+--------
1 | 64
(1 row)
-- create separate functional dependencies
CREATE STATISTICS functional_dependencies_multi_1 (dependencies) ON a, b FROM functional_dependencies_multi;
CREATE STATISTICS functional_dependencies_multi_2 (dependencies) ON c, d FROM functional_dependencies_multi;
ANALYZE functional_dependencies_multi;
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0');
estimated | actual
-----------+--------
714 | 714
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0');
estimated | actual
-----------+--------
454 | 454
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
estimated | actual
-----------+--------
65 | 64
(1 row)
DROP TABLE functional_dependencies_multi;
-- MCV lists -- MCV lists
CREATE TABLE mcv_lists ( CREATE TABLE mcv_lists (
filler1 TEXT, filler1 TEXT,
......
...@@ -291,6 +291,41 @@ ANALYZE functional_dependencies; ...@@ -291,6 +291,41 @@ ANALYZE functional_dependencies;
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1'); SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
-- check the ability to use multiple functional dependencies
CREATE TABLE functional_dependencies_multi (
a INTEGER,
b INTEGER,
c INTEGER,
d INTEGER
);
INSERT INTO functional_dependencies_multi (a, b, c, d)
SELECT
mod(i,7),
mod(i,7),
mod(i,11),
mod(i,11)
FROM generate_series(1,5000) s(i);
ANALYZE functional_dependencies_multi;
-- estimates without any functional dependencies
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0');
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0');
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
-- create separate functional dependencies
CREATE STATISTICS functional_dependencies_multi_1 (dependencies) ON a, b FROM functional_dependencies_multi;
CREATE STATISTICS functional_dependencies_multi_2 (dependencies) ON c, d FROM functional_dependencies_multi;
ANALYZE functional_dependencies_multi;
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0');
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0');
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
DROP TABLE functional_dependencies_multi;
-- MCV lists -- MCV lists
CREATE TABLE mcv_lists ( CREATE TABLE mcv_lists (
filler1 TEXT, filler1 TEXT,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment