Commit 9888192f authored by Tom Lane's avatar Tom Lane

Instead of trying to force WHERE clauses into CNF or DNF normal form,

just look for common clauses that can be pulled out of ORs.  Per recent
discussion, extracting common clauses seems to be the only really useful
effect of normalization, and if we do it explicitly then we can avoid
cluttering the qual with partially-redundant duplicated expressions, which
was an unpleasant side-effect of the old approach.
parent 7af16b2a
...@@ -200,15 +200,11 @@ planner() ...@@ -200,15 +200,11 @@ planner()
do final cleanup after planning. do final cleanup after planning.
-subquery_planner() -subquery_planner()
pull up subqueries from rangetable, if possible pull up subqueries from rangetable, if possible
simplify constant expressions
canonicalize qual canonicalize qual
Attempt to reduce WHERE clause to either CNF or DNF canonical form. Attempt to simplify WHERE clause to the most useful form; this includes
CNF (top-level-AND) is preferred, since the optimizer can then use flattening nested AND/ORs and detecting clauses that are duplicated in
any of the AND subclauses to filter tuples; but quals that are in different branches of an OR.
or close to DNF form will suffer exponential expansion if we try to simplify constant expressions
force them to CNF. In pathological cases either transform may expand
the qual unreasonably; so we may have to leave it un-normalized,
thereby reducing the accuracy of selectivity estimates.
process sublinks process sublinks
convert Vars of outer query levels into Params convert Vars of outer query levels into Params
--grouping_planner() --grouping_planner()
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/prep/prepqual.c,v 1.40 2003/12/28 21:57:37 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepqual.c,v 1.41 2003/12/30 21:49:19 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "optimizer/prep.h" #include "optimizer/prep.h"
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
static Node *flatten_andors_mutator(Node *node, void *context); static Node *flatten_andors_mutator(Node *node, void *context);
static void flatten_andors_and_walker(FastList *out_list, List *andlist); static void flatten_andors_and_walker(FastList *out_list, List *andlist);
static void flatten_andors_or_walker(FastList *out_list, List *orlist); static void flatten_andors_or_walker(FastList *out_list, List *orlist);
...@@ -29,73 +30,33 @@ static List *pull_ors(List *orlist); ...@@ -29,73 +30,33 @@ static List *pull_ors(List *orlist);
static void pull_ors_walker(FastList *out_list, List *orlist); static void pull_ors_walker(FastList *out_list, List *orlist);
static Expr *find_nots(Expr *qual); static Expr *find_nots(Expr *qual);
static Expr *push_nots(Expr *qual); static Expr *push_nots(Expr *qual);
static Expr *find_ors(Expr *qual); static Expr *find_duplicate_ors(Expr *qual);
static Expr *or_normalize(List *orlist); static Expr *process_duplicate_ors(List *orlist);
static Expr *find_ands(Expr *qual);
static Expr *and_normalize(List *andlist);
static Expr *qual_cleanup(Expr *qual);
static List *remove_duplicates(List *list);
static void count_bool_nodes(Expr *qual, double *nodes,
double *cnfnodes, double *dnfnodes);
/*****************************************************************************
*
* CNF/DNF CONVERSION ROUTINES
*
* These routines convert an arbitrary boolean expression into
* conjunctive normal form or disjunctive normal form.
*
* Normalization is only carried out in the top AND/OR/NOT portion
* of the given tree; we do not attempt to normalize boolean expressions
* that may appear as arguments of operators or functions in the tree.
*
* Query qualifications (WHERE clauses) are ordinarily transformed into
* CNF, ie, AND-of-ORs form, because then the optimizer can use any one
* of the independent AND clauses as a filtering qualification. However,
* quals that are naturally expressed as OR-of-ANDs can suffer an
* exponential growth in size in this transformation, so we also consider
* converting to DNF (OR-of-ANDs), and we may also leave well enough alone
* if both transforms cause unreasonable growth. The OR-of-ANDs format
* is useful for indexscan implementation, so we prefer that format when
* there is just one relation involved.
*
* canonicalize_qual() does "smart" conversion to either CNF or DNF, per
* the above considerations, while cnfify() and dnfify() simply perform
* the demanded transformation. The latter two are presently dead code.
*****************************************************************************/
/* /*
* canonicalize_qual * canonicalize_qual
* Convert a qualification to the most useful normalized form. * Convert a qualification expression to the most useful form.
* *
* Returns the modified qualification. * The name of this routine is a holdover from a time when it would try to
* force the expression into canonical AND-of-ORs or OR-of-ANDs form.
* Eventually, we recognized that that had more theoretical purity than
* actual usefulness, and so now the transformation doesn't involve any
* notion of reaching a canonical form.
* *
* XXX This code could be much smarter, at the cost of also being slower, * Returns the modified qualification.
* if we tried to compute selectivities and/or see whether there are
* actually indexes to support an indexscan implementation of a DNF qual.
* We could even try converting the CNF clauses that mention a single
* relation into a single DNF clause to see if that looks cheaper to
* implement. For now, though, we just try to avoid doing anything
* quite as stupid as unconditionally converting to CNF was...
*/ */
Expr * Expr *
canonicalize_qual(Expr *qual) canonicalize_qual(Expr *qual)
{ {
Expr *newqual; Expr *newqual;
double nodes,
cnfnodes,
dnfnodes;
bool cnfok,
dnfok;
/* Quick exit for empty qual */ /* Quick exit for empty qual */
if (qual == NULL) if (qual == NULL)
return NULL; return NULL;
/* /*
* Flatten AND and OR groups throughout the tree. This improvement is * Flatten AND and OR groups throughout the expression tree.
* always worthwhile, so do it unconditionally.
*/ */
newqual = (Expr *) flatten_andors((Node *) qual); newqual = (Expr *) flatten_andors((Node *) qual);
...@@ -108,152 +69,14 @@ canonicalize_qual(Expr *qual) ...@@ -108,152 +69,14 @@ canonicalize_qual(Expr *qual)
newqual = find_nots(newqual); newqual = find_nots(newqual);
/* /*
* Choose whether to convert to CNF, or DNF, or leave well enough * Pull up redundant subclauses in OR-of-AND trees. Again, we do this
* alone. * only within the top-level AND/OR structure.
*
* We make an approximate estimate of the number of bottom-level nodes
* that will appear in the CNF and DNF forms of the query.
*/ */
count_bool_nodes(newqual, &nodes, &cnfnodes, &dnfnodes); newqual = find_duplicate_ors(newqual);
/*
* First heuristic is to forget about *both* normal forms if there are
* a huge number of terms in the qual clause. This would only happen
* with machine-generated queries, presumably; and most likely such a
* query is already in either CNF or DNF.
*/
cnfok = dnfok = true;
if (nodes >= 500.0)
cnfok = dnfok = false;
/*
* Second heuristic is to forget about either CNF or DNF if it shows
* unreasonable growth compared to the original form of the qual,
* where we define "unreasonable" a tad arbitrarily as 4x more
* operators.
*/
if (cnfnodes >= 4.0 * nodes)
cnfok = false;
if (dnfnodes >= 4.0 * nodes)
dnfok = false;
/*
* Third heuristic is to prefer DNF if top level is already an OR, and
* only one relation is mentioned, and DNF is no larger than the CNF
* representation. (Pretty shaky; can we improve on this?)
*/
if (cnfok && dnfok && dnfnodes <= cnfnodes &&
or_clause((Node *) newqual) &&
NumRelids((Node *) newqual) == 1)
cnfok = false;
/*
* Otherwise, we prefer CNF.
*
* XXX obviously, these rules could be improved upon.
*/
if (cnfok)
{
/*
* Normalize into conjunctive normal form, and clean up the
* result.
*/
newqual = qual_cleanup(find_ors(newqual));
}
else if (dnfok)
{
/*
* Normalize into disjunctive normal form, and clean up the
* result.
*/
newqual = qual_cleanup(find_ands(newqual));
}
return newqual; return newqual;
} }
#ifdef NOT_USED
/*
* cnfify
* Convert a qualification to conjunctive normal form by applying
* successive normalizations.
*
* Returns the modified qualification.
*
* If 'removeAndFlag' is true then it removes explicit AND at the top level,
* producing a list of implicitly-ANDed conditions. Otherwise, a regular
* boolean expression is returned. Since most callers pass 'true', we
* prefer to declare the result as List *, not Expr *.
*/
List *
cnfify(Expr *qual, bool removeAndFlag)
{
Expr *newqual;
if (qual == NULL)
return NIL;
/*
* Flatten AND and OR groups throughout the tree. This improvement is
* always worthwhile.
*/
newqual = flatten_andors(qual);
/*
* Push down NOTs. We do this only in the top-level boolean
* expression, without examining arguments of operators/functions.
*/
newqual = find_nots(newqual);
/* Normalize into conjunctive normal form. */
newqual = find_ors(newqual);
/* Clean up the result. */
newqual = qual_cleanup(newqual);
if (removeAndFlag)
newqual = (Expr *) make_ands_implicit(newqual);
return (List *) newqual;
}
#endif
#ifdef NOT_USED
/*
* dnfify
* Convert a qualification to disjunctive normal form by applying
* successive normalizations.
*
* Returns the modified qualification.
*
* We do not offer a 'removeOrFlag' in this case; the usages are
* different.
*/
static Expr *
dnfify(Expr *qual)
{
Expr *newqual;
if (qual == NULL)
return NULL;
/*
* Flatten AND and OR groups throughout the tree. This improvement is
* always worthwhile.
*/
newqual = flatten_andors(qual);
/*
* Push down NOTs. We do this only in the top-level boolean
* expression, without examining arguments of operators/functions.
*/
newqual = find_nots(newqual);
/* Normalize into disjunctive normal form. */
newqual = find_ands(newqual);
/* Clean up the result. */
newqual = qual_cleanup(newqual);
return newqual;
}
#endif
/*-------------------- /*--------------------
* The parser regards AND and OR as purely binary operators, so a qual like * The parser regards AND and OR as purely binary operators, so a qual like
...@@ -271,13 +94,16 @@ dnfify(Expr *qual) ...@@ -271,13 +94,16 @@ dnfify(Expr *qual)
*-------------------- *--------------------
*/ */
/*-------------------- /*
* flatten_andors * flatten_andors
* Given an expression tree, simplify nested AND/OR clauses into flat * Given an expression tree, simplify nested AND/OR clauses into flat
* AND/OR clauses with more arguments. The entire tree is processed. * AND/OR clauses with more arguments. The entire tree is processed.
* *
* Returns the rebuilt expr (note original structure is not touched). * Returns the rebuilt expr (note original structure is not touched).
*-------------------- *
* This is exported so that other modules can perform the part of
* canonicalize_qual processing that applies to entire trees, rather
* than just the top-level boolean expressions.
*/ */
Node * Node *
flatten_andors(Node *node) flatten_andors(Node *node)
...@@ -413,6 +239,7 @@ pull_ors_walker(FastList *out_list, List *orlist) ...@@ -413,6 +239,7 @@ pull_ors_walker(FastList *out_list, List *orlist)
} }
} }
/* /*
* find_nots * find_nots
* Traverse the qualification, looking for NOTs to take care of. * Traverse the qualification, looking for NOTs to take care of.
...@@ -469,7 +296,7 @@ push_nots(Expr *qual) ...@@ -469,7 +296,7 @@ push_nots(Expr *qual)
* possible? */ * possible? */
/* /*
* Negate an operator clause if possible: ("NOT" (< A B)) => (> A B) * Negate an operator clause if possible: (NOT (< A B)) => (> A B)
* Otherwise, retain the clause as it is (the NOT can't be pushed * Otherwise, retain the clause as it is (the NOT can't be pushed
* down any farther). * down any farther).
*/ */
...@@ -491,8 +318,8 @@ push_nots(Expr *qual) ...@@ -491,8 +318,8 @@ push_nots(Expr *qual)
{ {
/*-------------------- /*--------------------
* Apply DeMorgan's Laws: * Apply DeMorgan's Laws:
* ("NOT" ("AND" A B)) => ("OR" ("NOT" A) ("NOT" B)) * (NOT (AND A B)) => (OR (NOT A) (NOT B))
* ("NOT" ("OR" A B)) => ("AND" ("NOT" A) ("NOT" B)) * (NOT (OR A B)) => (AND (NOT A) (NOT B))
* i.e., swap AND for OR and negate all the subclauses. * i.e., swap AND for OR and negate all the subclauses.
*-------------------- *--------------------
*/ */
...@@ -532,63 +359,90 @@ push_nots(Expr *qual) ...@@ -532,63 +359,90 @@ push_nots(Expr *qual)
} }
} }
/*
* find_ors /*--------------------
* Given a qualification tree with the NOTs pushed down, convert it * The following code attempts to apply the inverse OR distributive law:
* to a tree in CNF by repeatedly applying the rule: * ((A AND B) OR (A AND C)) => (A AND (B OR C))
* ("OR" A ("AND" B C)) => ("AND" ("OR" A B) ("OR" A C)) * That is, locate OR clauses in which every subclause contains an
* identical term, and pull out the duplicated terms.
* *
* Note that 'or' clauses will always be turned into 'and' clauses * This may seem like a fairly useless activity, but it turns out to be
* if they contain any 'and' subclauses. Also, we do not descend * applicable to many machine-generated queries, and there are also queries
* below the top-level AND/OR structure. * in some of the TPC benchmarks that need it. This was in fact almost the
* sole useful side-effect of the old prepqual code that tried to force
* the query into canonical AND-of-ORs form: the canonical equivalent of
* ((A AND B) OR (A AND C))
* is
* ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
* which the code was able to simplify to
* (A AND (A OR C) AND (B OR A) AND (B OR C))
* thus successfully extracting the common condition A --- but at the cost
* of cluttering the qual with many redundant clauses.
*--------------------
*/
/*
* find_duplicate_ors
* Given a qualification tree with the NOTs pushed down, search for
* OR clauses to which the inverse OR distributive law might apply.
* Only the top-level AND/OR structure is searched.
* *
* Returns the modified qualification. AND/OR flatness is preserved. * Returns the modified qualification. AND/OR flatness is preserved.
*/ */
static Expr * static Expr *
find_ors(Expr *qual) find_duplicate_ors(Expr *qual)
{ {
if (qual == NULL) if (qual == NULL)
return NULL; return NULL;
if (and_clause((Node *) qual)) if (or_clause((Node *) qual))
{ {
List *andlist = NIL; List *orlist = NIL;
List *temp; List *temp;
/* Recurse */
foreach(temp, ((BoolExpr *) qual)->args) foreach(temp, ((BoolExpr *) qual)->args)
andlist = lappend(andlist, find_ors(lfirst(temp))); orlist = lappend(orlist, find_duplicate_ors(lfirst(temp)));
return make_andclause(pull_ands(andlist)); /*
* Don't need pull_ors() since this routine will never introduce
* an OR where there wasn't one before.
*/
return process_duplicate_ors(orlist);
} }
else if (or_clause((Node *) qual)) else if (and_clause((Node *) qual))
{ {
List *orlist = NIL; List *andlist = NIL;
List *temp; List *temp;
/* Recurse */
foreach(temp, ((BoolExpr *) qual)->args) foreach(temp, ((BoolExpr *) qual)->args)
orlist = lappend(orlist, find_ors(lfirst(temp))); andlist = lappend(andlist, find_duplicate_ors(lfirst(temp)));
return or_normalize(pull_ors(orlist)); /* Flatten any ANDs introduced just below here */
andlist = pull_ands(andlist);
/* The AND list can't get shorter, so result is always an AND */
return make_andclause(andlist);
} }
else else
return qual; return qual;
} }
/* /*
* or_normalize * process_duplicate_ors
* Given a list of exprs which are 'or'ed together, try to apply * Given a list of exprs which are ORed together, try to apply
* the distributive law * the inverse OR distributive law.
* ("OR" A ("AND" B C)) => ("AND" ("OR" A B) ("OR" A C))
* to convert the top-level OR clause to a top-level AND clause.
* *
* Returns the resulting expression (could be an AND clause, an OR * Returns the resulting expression (could be an AND clause, an OR
* clause, or maybe even a single subexpression). * clause, or maybe even a single subexpression).
*/ */
static Expr * static Expr *
or_normalize(List *orlist) process_duplicate_ors(List *orlist)
{ {
Expr *distributable = NULL; List *reference = NIL;
int num_subclauses = 1; int num_subclauses = 0;
List *andclauses = NIL; List *winners;
List *neworlist;
List *temp; List *temp;
List *temp2;
if (orlist == NIL) if (orlist == NIL)
return NULL; /* probably can't happen */ return NULL; /* probably can't happen */
...@@ -596,9 +450,10 @@ or_normalize(List *orlist) ...@@ -596,9 +450,10 @@ or_normalize(List *orlist)
return lfirst(orlist); /* single-expression OR (can this happen?) */ return lfirst(orlist); /* single-expression OR (can this happen?) */
/* /*
* If we have a choice of AND clauses, pick the one with the most * Choose the shortest AND clause as the reference list --- obviously,
* subclauses. Because we initialized num_subclauses = 1, any AND * any subclause not in this clause isn't in all the clauses.
* clauses with only one arg will be ignored as useless. * If we find a clause that's not an AND, we can treat it as a
* one-element AND clause, which necessarily wins as shortest.
*/ */
foreach(temp, orlist) foreach(temp, orlist)
{ {
...@@ -606,335 +461,134 @@ or_normalize(List *orlist) ...@@ -606,335 +461,134 @@ or_normalize(List *orlist)
if (and_clause((Node *) clause)) if (and_clause((Node *) clause))
{ {
int nclauses = length(((BoolExpr *) clause)->args); List *subclauses = ((BoolExpr *) clause)->args;
int nclauses = length(subclauses);
if (nclauses > num_subclauses) if (reference == NIL || nclauses < num_subclauses)
{ {
distributable = clause; reference = subclauses;
num_subclauses = nclauses; num_subclauses = nclauses;
} }
} }
else
{
reference = makeList1(clause);
break;
}
} }
/* if there's no suitable AND clause, we can't transform the OR */
if (!distributable)
return make_orclause(orlist);
/* /*
* Caution: lremove destructively modifies the input orlist. This * Just in case, eliminate any duplicates in the reference list.
* should be OK, since or_normalize is only called with freshly
* constructed lists that are not referenced elsewhere.
*/ */
orlist = lremove(distributable, orlist); reference = set_union(NIL, reference);
foreach(temp, ((BoolExpr *) distributable)->args) /*
* Check each element of the reference list to see if it's in all the
* OR clauses. Build a new list of winning clauses.
*/
winners = NIL;
foreach(temp, reference)
{ {
Expr *andclause = lfirst(temp); Expr *refclause = lfirst(temp);
List *neworlist; bool win = true;
/*
* We are going to insert the orlist into multiple places in the
* result expression. For most expression types, it'd be OK to
* just have multiple links to the same subtree, but this fails
* badly for SubLinks (and perhaps other cases?). For safety, we
* make a distinct copy for each place the orlist is inserted.
*/
if (lnext(temp) == NIL)
neworlist = orlist; /* can use original tree at the end */
else
neworlist = copyObject(orlist);
/*
* pull_ors is needed here in case andclause has a top-level OR.
* Then we recursively apply or_normalize, since there might be an
* AND subclause in the resulting OR-list.
*/
andclause = or_normalize(pull_ors(lcons(andclause, neworlist)));
andclauses = lappend(andclauses, andclause);
}
/* pull_ands is needed in case any sub-or_normalize succeeded */
return make_andclause(pull_ands(andclauses));
}
/*
* find_ands
* Given a qualification tree with the NOTs pushed down, convert it
* to a tree in DNF by repeatedly applying the rule:
* ("AND" A ("OR" B C)) => ("OR" ("AND" A B) ("AND" A C))
*
* Note that 'and' clauses will always be turned into 'or' clauses
* if they contain any 'or' subclauses. Also, we do not descend
* below the top-level AND/OR structure.
*
* Returns the modified qualification. AND/OR flatness is preserved.
*/
static Expr *
find_ands(Expr *qual)
{
if (qual == NULL)
return NULL;
if (or_clause((Node *) qual)) foreach(temp2, orlist)
{ {
List *orlist = NIL; Expr *clause = lfirst(temp2);
List *temp;
foreach(temp, ((BoolExpr *) qual)->args) if (and_clause((Node *) clause))
orlist = lappend(orlist, find_ands(lfirst(temp))); {
return make_orclause(pull_ors(orlist)); if (!member(refclause, ((BoolExpr *) clause)->args))
} {
else if (and_clause((Node *) qual)) win = false;
{ break;
List *andlist = NIL; }
List *temp; }
else
{
if (!equal(refclause, clause))
{
win = false;
break;
}
}
}
foreach(temp, ((BoolExpr *) qual)->args) if (win)
andlist = lappend(andlist, find_ands(lfirst(temp))); winners = lappend(winners, refclause);
return and_normalize(pull_ands(andlist));
} }
else
return qual;
}
/* /*
* and_normalize * If no winners, we can't transform the OR
* Given a list of exprs which are 'and'ed together, try to apply */
* the distributive law if (winners == NIL)
* ("AND" A ("OR" B C)) => ("OR" ("AND" A B) ("AND" A C)) return make_orclause(orlist);
* to convert the top-level AND clause to a top-level OR clause.
*
* Returns the resulting expression (could be an AND clause, an OR
* clause, or maybe even a single subexpression).
*/
static Expr *
and_normalize(List *andlist)
{
Expr *distributable = NULL;
int num_subclauses = 1;
List *orclauses = NIL;
List *temp;
if (andlist == NIL)
return NULL; /* probably can't happen */
if (lnext(andlist) == NIL)
return lfirst(andlist); /* single-expression AND (can this
* happen?) */
/* /*
* If we have a choice of OR clauses, pick the one with the most * Generate new OR list consisting of the remaining sub-clauses.
* subclauses. Because we initialized num_subclauses = 1, any OR *
* clauses with only one arg will be ignored as useless. * If any clause degenerates to empty, then we have a situation like
* (A AND B) OR (A), which can be reduced to just A --- that is, the
* additional conditions in other arms of the OR are irrelevant.
*
* Note that because we use set_difference, any multiple occurrences of
* a winning clause in an AND sub-clause will be removed automatically.
*/ */
foreach(temp, andlist) neworlist = NIL;
foreach(temp, orlist)
{ {
Expr *clause = lfirst(temp); Expr *clause = lfirst(temp);
if (or_clause((Node *) clause)) if (and_clause((Node *) clause))
{ {
int nclauses = length(((BoolExpr *) clause)->args); List *subclauses = ((BoolExpr *) clause)->args;
if (nclauses > num_subclauses) subclauses = set_difference(subclauses, winners);
if (subclauses != NIL)
{ {
distributable = clause; if (lnext(subclauses) == NIL)
num_subclauses = nclauses; neworlist = lappend(neworlist, lfirst(subclauses));
else
neworlist = lappend(neworlist, make_andclause(subclauses));
}
else
{
neworlist = NIL; /* degenerate case, see above */
break;
}
}
else
{
if (!member(clause, winners))
neworlist = lappend(neworlist, clause);
else
{
neworlist = NIL; /* degenerate case, see above */
break;
} }
} }
} }
/* if there's no suitable OR clause, we can't transform the AND */
if (!distributable)
return make_andclause(andlist);
/* /*
* Caution: lremove destructively modifies the input andlist. This * Append reduced OR to the winners list, if it's not degenerate, handling
* should be OK, since and_normalize is only called with freshly * the special case of one element correctly (can that really happen?).
* constructed lists that are not referenced elsewhere. * Also be careful to maintain AND/OR flatness in case we pulled up a
* sub-sub-OR-clause.
*/ */
andlist = lremove(distributable, andlist); if (neworlist != NIL)
foreach(temp, ((BoolExpr *) distributable)->args)
{
Expr *orclause = lfirst(temp);
List *newandlist;
/*
* We are going to insert the andlist into multiple places in the
* result expression. For most expression types, it'd be OK to
* just have multiple links to the same subtree, but this fails
* badly for SubLinks (and perhaps other cases?). For safety, we
* make a distinct copy for each place the andlist is inserted.
*/
if (lnext(temp) == NIL)
newandlist = andlist; /* can use original tree at the
* end */
else
newandlist = copyObject(andlist);
/*
* pull_ands is needed here in case orclause has a top-level AND.
* Then we recursively apply and_normalize, since there might be
* an OR subclause in the resulting AND-list.
*/
orclause = and_normalize(pull_ands(lcons(orclause, newandlist)));
orclauses = lappend(orclauses, orclause);
}
/* pull_ors is needed in case any sub-and_normalize succeeded */
return make_orclause(pull_ors(orclauses));
}
/*
* qual_cleanup
* Fix up a qualification by removing duplicate entries (which could be
* created during normalization, if identical subexpressions from different
* parts of the tree are brought together). Also, check for AND and OR
* clauses with only one remaining subexpression, and simplify.
*
* Returns the modified qualification.
*/
static Expr *
qual_cleanup(Expr *qual)
{
if (qual == NULL)
return NULL;
if (and_clause((Node *) qual))
{ {
List *andlist = NIL; if (lnext(neworlist) == NIL)
List *temp; winners = lappend(winners, lfirst(neworlist));
foreach(temp, ((BoolExpr *) qual)->args)
andlist = lappend(andlist, qual_cleanup(lfirst(temp)));
andlist = remove_duplicates(pull_ands(andlist));
if (length(andlist) > 1)
return make_andclause(andlist);
else else
return lfirst(andlist); winners = lappend(winners, make_orclause(pull_ors(neworlist)));
} }
else if (or_clause((Node *) qual))
{
List *orlist = NIL;
List *temp;
foreach(temp, ((BoolExpr *) qual)->args)
orlist = lappend(orlist, qual_cleanup(lfirst(temp)));
orlist = remove_duplicates(pull_ors(orlist));
if (length(orlist) > 1) /*
return make_orclause(orlist); * And return the constructed AND clause, again being wary of a single
else * element and AND/OR flatness.
return lfirst(orlist); */
} if (lnext(winners) == NIL)
else return (Expr *) lfirst(winners);
return qual;
}
/*
* remove_duplicates
*/
static List *
remove_duplicates(List *list)
{
List *result = NIL;
List *i;
if (length(list) <= 1)
return list;
foreach(i, list)
{
if (!member(lfirst(i), result))
result = lappend(result, lfirst(i));
}
return result;
}
/*
* count_bool_nodes
* Support for heuristics in canonicalize_qual(): count the
* number of nodes that are inputs to the top level AND/OR
* part of a qual tree, and estimate how many nodes will appear
* in the CNF'ified or DNF'ified equivalent of the expression.
*
* This is just an approximate calculation; it cannot detect possible
* simplifications from eliminating duplicate subclauses. The idea is just to
* cheaply determine whether CNF will be markedly worse than DNF or vice
* versa.
*
* The counts/estimates are represented as doubles to avoid risk of overflow.
*/
static void
count_bool_nodes(Expr *qual,
double *nodes,
double *cnfnodes,
double *dnfnodes)
{
List *temp;
double subnodes,
subcnfnodes,
subdnfnodes;
if (and_clause((Node *) qual))
{
*nodes = *cnfnodes = 0.0;
*dnfnodes = 1.0; /* DNF nodes will be product of sub-counts */
foreach(temp, ((BoolExpr *) qual)->args)
{
count_bool_nodes(lfirst(temp),
&subnodes, &subcnfnodes, &subdnfnodes);
*nodes += subnodes;
*cnfnodes += subcnfnodes;
*dnfnodes *= subdnfnodes;
}
/*
* we could get dnfnodes < cnfnodes here, if all the sub-nodes are
* simple ones with count 1. Make sure dnfnodes isn't too small.
*/
if (*dnfnodes < *cnfnodes)
*dnfnodes = *cnfnodes;
}
else if (or_clause((Node *) qual))
{
*nodes = *dnfnodes = 0.0;
*cnfnodes = 1.0; /* CNF nodes will be product of sub-counts */
foreach(temp, ((BoolExpr *) qual)->args)
{
count_bool_nodes(lfirst(temp),
&subnodes, &subcnfnodes, &subdnfnodes);
*nodes += subnodes;
*cnfnodes *= subcnfnodes;
*dnfnodes += subdnfnodes;
}
/*
* we could get cnfnodes < dnfnodes here, if all the sub-nodes are
* simple ones with count 1. Make sure cnfnodes isn't too small.
*/
if (*cnfnodes < *dnfnodes)
*cnfnodes = *dnfnodes;
}
else if (contain_subplans((Node *) qual))
{
/*
* charge extra for subexpressions containing sub-SELECTs, to
* discourage us from rearranging them in a way that might
* generate N copies of a subselect rather than one. The magic
* constant here interacts with the "4x maximum growth" heuristic
* in canonicalize_qual().
*/
*nodes = 1.0;
*cnfnodes = *dnfnodes = 25.0;
}
else else
{ return make_andclause(pull_ands(winners));
/* anything else counts 1 for my purposes */
*nodes = *cnfnodes = *dnfnodes = 1.0;
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment