Commit 9f76d0d9 authored by Tom Lane's avatar Tom Lane

Fix GEQO to work again in CVS tip, by being more careful about memory

allocation in best_inner_indexscan().  While at it, simplify GEQO's
interface to the main planner --- make_join_rel() offers exactly the
API it really wants, whereas calling make_rels_by_clause_joins() and
make_rels_by_clauseless_joins() required jumping through hoops.
Rewrite gimme_tree for clarity (sometimes iteration is much better than
recursion), and approximately halve GEQO's runtime by recognizing that
tours of the forms (a,b,c,d,...) and (b,a,c,d,...) are equivalent
because of symmetry in make_join_rel().
parent 9cecff03
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: geqo_eval.c,v 1.59 2002/06/20 20:29:29 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.60 2002/12/16 21:30:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -21,6 +21,7 @@
#include "postgres.h"
#include <float.h>
#include <math.h>
#include <limits.h>
......@@ -44,6 +45,20 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
Cost fitness;
List *savelist;
/*
* Because gimme_tree considers both left- and right-sided trees,
* there is no difference between a tour (a,b,c,d,...) and a tour
* (b,a,c,d,...) --- the same join orders will be considered.
* To avoid redundant cost calculations, we simply reject tours where
* tour[0] > tour[1], assigning them an artificially bad fitness.
*
* (It would be better to tweak the GEQO logic to not generate such tours
* in the first place, but I'm not sure of all the implications in the
* mutation logic.)
*/
if (num_gene >= 2 && tour[0] > tour[1])
return DBL_MAX;
/*
* Create a private memory context that will hold all temp storage
* allocated inside gimme_tree().
......@@ -60,11 +75,15 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
ALLOCSET_DEFAULT_MAXSIZE);
oldcxt = MemoryContextSwitchTo(mycontext);
/* preserve root->join_rel_list, which gimme_tree changes */
/*
* preserve root->join_rel_list, which gimme_tree changes; without this,
* it'll be pointing at recycled storage after the MemoryContextDelete
* below.
*/
savelist = root->join_rel_list;
/* construct the best path for the given combination of relations */
joinrel = gimme_tree(root, initial_rels, tour, num_gene, 0, NULL);
joinrel = gimme_tree(root, initial_rels, tour, num_gene);
/*
* compute fitness
......@@ -86,70 +105,61 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
/*
* gimme_tree
* this routine considers only LEFT-SIDED TREES!
* Form planner estimates for a join tree constructed in the specified
* order.
*
* 'root' is the Query
* 'initial_rels' is the list of initial relations (FROM-list items)
* 'tour' is the proposed join order, of length 'num_gene'
* 'rel_count' is number of initial_rels items already joined (initially 0)
* 'old_rel' is the preceding join (initially NULL)
*
* Returns a new join relation incorporating all joins in a left-sided tree.
* Returns a new join relation whose cheapest path is the best plan for
* this join order.
*
* Note that at each step we consider using the next rel as both left and
* right side of a join. However, we cannot build general ("bushy") plan
* trees this way, only left-sided and right-sided trees.
*/
RelOptInfo *
gimme_tree(Query *root, List *initial_rels,
Gene *tour, int num_gene,
int rel_count, RelOptInfo *old_rel)
Gene *tour, int num_gene)
{
RelOptInfo *inner_rel; /* current relation */
int init_rel_index;
RelOptInfo *joinrel;
int cur_rel_index;
int rel_count;
if (rel_count < num_gene)
/*
* Start with the first relation ...
*/
cur_rel_index = (int) tour[0];
joinrel = (RelOptInfo *) nth(cur_rel_index - 1, initial_rels);
/*
* And add on each relation in the specified order ...
*/
for (rel_count = 1; rel_count < num_gene; rel_count++)
{
/* tree not yet finished */
init_rel_index = (int) tour[rel_count];
inner_rel = (RelOptInfo *) nth(init_rel_index - 1, initial_rels);
if (rel_count == 0)
{
/* processing first join with init_rel_index = (int) tour[0] */
rel_count++;
return gimme_tree(root, initial_rels,
tour, num_gene,
rel_count, inner_rel);
}
else
{
/* tree main part */
List *acceptable_rels = makeList1(inner_rel);
List *new_rels;
RelOptInfo *new_rel;
new_rels = make_rels_by_clause_joins(root, old_rel,
acceptable_rels);
/* Shouldn't get more than one result */
Assert(length(new_rels) <= 1);
if (new_rels == NIL)
{
new_rels = make_rels_by_clauseless_joins(root, old_rel,
acceptable_rels);
Assert(length(new_rels) <= 1);
if (new_rels == NIL)
elog(ERROR, "gimme_tree: failed to construct join rel");
}
new_rel = (RelOptInfo *) lfirst(new_rels);
/* Find and save the cheapest paths for this rel */
set_cheapest(new_rel);
/* and recurse... */
rel_count++;
return gimme_tree(root, initial_rels,
tour, num_gene,
rel_count, new_rel);
}
RelOptInfo *inner_rel;
RelOptInfo *new_rel;
cur_rel_index = (int) tour[rel_count];
inner_rel = (RelOptInfo *) nth(cur_rel_index - 1, initial_rels);
/*
* Construct a RelOptInfo representing the previous joinrel joined
* to inner_rel. These are always inner joins. Note that we expect
* the joinrel not to exist in root->join_rel_list yet, and so the
* paths constructed for it will only include the ones we want.
*/
new_rel = make_join_rel(root, joinrel, inner_rel, JOIN_INNER);
/* Find and save the cheapest paths for this rel */
set_cheapest(new_rel);
/* and repeat... */
joinrel = new_rel;
}
return old_rel; /* tree finished ... */
return joinrel;
}
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: geqo_main.c,v 1.32 2002/07/20 04:59:10 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.33 2002/12/16 21:30:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -235,8 +235,7 @@ geqo(Query *root, int number_of_rels, List *initial_rels)
/* root->join_rel_list will be modified during this ! */
best_rel = gimme_tree(root, initial_rels,
best_tour, pool->string_length,
0, NULL);
best_tour, pool->string_length);
/* DBG: show the query plan
print_plan(best_plan, root);
......
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_misc.c,v 1.35 2002/11/06 00:00:44 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_misc.c,v 1.36 2002/12/16 21:30:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -31,19 +31,25 @@
/*
* avg_pool
*/
static float
static double
avg_pool(Pool *pool)
{
int i;
double cumulative = 0.0;
if (pool->size == 0)
if (pool->size <= 0)
elog(ERROR, "avg_pool: pool_size of zero");
/*
* Since the pool may contain multiple occurrences of DBL_MAX, divide
* by pool->size before summing, not after, to avoid overflow. This
* loses a little in speed and accuracy, but this routine is only used
* for debug printouts, so we don't care that much.
*/
for (i = 0; i < pool->size; i++)
cumulative = cumulative + pool->data[i].worth;
cumulative += pool->data[i].worth / pool->size;
return (float) cumulative / pool->size;
return cumulative;
}
/* print_pool
......@@ -72,8 +78,10 @@ print_pool(FILE *fp, Pool *pool, int start, int stop)
fprintf(fp, "%d)\t", i);
for (j = 0; j < pool->string_length; j++)
fprintf(fp, "%d ", pool->data[i].string[j]);
fprintf(fp, "%f\n", pool->data[i].worth);
fprintf(fp, "%g\n", pool->data[i].worth);
}
fflush(fp);
}
/* print_gen
......@@ -90,12 +98,14 @@ print_gen(FILE *fp, Pool *pool, int generation)
lowest = pool->size > 1 ? pool->size - 2 : 0;
fprintf(fp,
"%5d | Best: %f Worst: %f Mean: %f Avg: %f\n",
"%5d | Best: %g Worst: %g Mean: %g Avg: %g\n",
generation,
pool->data[0].worth,
pool->data[lowest].worth,
pool->data[pool->size / 2].worth,
avg_pool(pool));
fflush(fp);
}
......@@ -116,6 +126,8 @@ print_edge_table(FILE *fp, Edge *edge_table, int num_gene)
}
fprintf(fp, "\n");
fflush(fp);
}
#endif /* GEQO_DEBUG */
......@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.129 2002/12/15 16:17:49 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.130 2002/12/16 21:30:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -1398,6 +1398,7 @@ best_inner_indexscan(Query *root, RelOptInfo *rel,
List *ilist;
List *jlist;
InnerIndexscanInfo *info;
MemoryContext oldcontext;
/*
* Nestloop only supports inner and left joins.
......@@ -1415,15 +1416,27 @@ best_inner_indexscan(Query *root, RelOptInfo *rel,
}
/*
* If there are no indexable joinclauses for this rel, exit quickly.
* Otherwise, intersect the given outer_relids with index_outer_relids
* to find the set of outer relids actually relevant for this index.
* If there are none, again we can fail immediately.
*/
if (!rel->index_outer_relids)
return NULL;
/*
* Otherwise, we have to do path selection in the memory context of
* the given rel, so that any created path can be safely attached to
* the rel's cache of best inner paths. (This is not currently an
* issue for normal planning, but it is an issue for GEQO planning.)
*/
oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
/*
* Intersect the given outer_relids with index_outer_relids
* to find the set of outer relids actually relevant for this index.
* If there are none, again we can fail immediately.
*/
outer_relids = set_intersecti(rel->index_outer_relids, outer_relids);
if (!outer_relids)
{
MemoryContextSwitchTo(oldcontext);
return NULL;
}
/*
* Look to see if we already computed the result for this set of
* relevant outerrels. (We include the isouterjoin status in the
......@@ -1437,6 +1450,7 @@ best_inner_indexscan(Query *root, RelOptInfo *rel,
info->isouterjoin == isouterjoin)
{
freeList(outer_relids);
MemoryContextSwitchTo(oldcontext);
return info->best_innerpath;
}
}
......@@ -1517,6 +1531,8 @@ best_inner_indexscan(Query *root, RelOptInfo *rel,
info->best_innerpath = cheapest;
rel->index_inner_paths = lcons(info, rel->index_inner_paths);
MemoryContextSwitchTo(oldcontext);
return cheapest;
}
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.57 2002/06/20 20:29:30 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.58 2002/12/16 21:30:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -18,8 +18,12 @@
#include "optimizer/paths.h"
static RelOptInfo *make_join_rel(Query *root, RelOptInfo *rel1,
RelOptInfo *rel2, JoinType jointype);
static List *make_rels_by_clause_joins(Query *root,
RelOptInfo *old_rel,
List *other_rels);
static List *make_rels_by_clauseless_joins(Query *root,
RelOptInfo *old_rel,
List *other_rels);
/*
......@@ -246,7 +250,7 @@ make_rels_by_joins(Query *root, int level, List **joinrels)
* no extra test for overlap for initial rels, since the is_subset test can
* only succeed when other_rel is not already part of old_rel.)
*/
List *
static List *
make_rels_by_clause_joins(Query *root,
RelOptInfo *old_rel,
List *other_rels)
......@@ -297,7 +301,7 @@ make_rels_by_clause_joins(Query *root,
* Currently, this is only used with initial rels in other_rels, but it would
* work for joining to joinrels too.
*/
List *
static List *
make_rels_by_clauseless_joins(Query *root,
RelOptInfo *old_rel,
List *other_rels)
......@@ -392,7 +396,7 @@ make_jointree_rel(Query *root, Node *jtnode)
* (The join rel may already contain paths generated from other
* pairs of rels that add up to the same set of base rels.)
*/
static RelOptInfo *
RelOptInfo *
make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2,
JoinType jointype)
{
......
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: geqo.h,v 1.29 2002/07/20 04:59:10 momjian Exp $
* $Id: geqo.h,v 1.30 2002/12/16 21:30:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -74,7 +74,6 @@ extern RelOptInfo *geqo(Query *root, int number_of_rels, List *initial_rels);
extern Cost geqo_eval(Query *root, List *initial_rels,
Gene *tour, int num_gene);
extern RelOptInfo *gimme_tree(Query *root, List *initial_rels,
Gene *tour, int num_gene,
int rel_count, RelOptInfo *old_rel);
Gene *tour, int num_gene);
#endif /* GEQO_H */
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: paths.h,v 1.62 2002/12/12 15:49:41 tgl Exp $
* $Id: paths.h,v 1.63 2002/12/16 21:30:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -74,13 +74,10 @@ extern void add_paths_to_joinrel(Query *root, RelOptInfo *joinrel,
* routines to determine which relations to join
*/
extern List *make_rels_by_joins(Query *root, int level, List **joinrels);
extern List *make_rels_by_clause_joins(Query *root,
RelOptInfo *old_rel,
List *other_rels);
extern List *make_rels_by_clauseless_joins(Query *root,
RelOptInfo *old_rel,
List *other_rels);
extern RelOptInfo *make_jointree_rel(Query *root, Node *jtnode);
extern RelOptInfo *make_join_rel(Query *root,
RelOptInfo *rel1, RelOptInfo *rel2,
JoinType jointype);
/*
* pathkeys.c
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment