Commit 9f76d0d9 authored by Tom Lane's avatar Tom Lane

Fix GEQO to work again in CVS tip, by being more careful about memory

allocation in best_inner_indexscan().  While at it, simplify GEQO's
interface to the main planner --- make_join_rel() offers exactly the
API it really wants, whereas calling make_rels_by_clause_joins() and
make_rels_by_clauseless_joins() required jumping through hoops.
Rewrite gimme_tree for clarity (sometimes iteration is much better than
recursion), and approximately halve GEQO's runtime by recognizing that
tours of the forms (a,b,c,d,...) and (b,a,c,d,...) are equivalent
because of symmetry in make_join_rel().
parent 9cecff03
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: geqo_eval.c,v 1.59 2002/06/20 20:29:29 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.60 2002/12/16 21:30:29 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "postgres.h" #include "postgres.h"
#include <float.h>
#include <math.h> #include <math.h>
#include <limits.h> #include <limits.h>
...@@ -44,6 +45,20 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene) ...@@ -44,6 +45,20 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
Cost fitness; Cost fitness;
List *savelist; List *savelist;
/*
* Because gimme_tree considers both left- and right-sided trees,
* there is no difference between a tour (a,b,c,d,...) and a tour
* (b,a,c,d,...) --- the same join orders will be considered.
* To avoid redundant cost calculations, we simply reject tours where
* tour[0] > tour[1], assigning them an artificially bad fitness.
*
* (It would be better to tweak the GEQO logic to not generate such tours
* in the first place, but I'm not sure of all the implications in the
* mutation logic.)
*/
if (num_gene >= 2 && tour[0] > tour[1])
return DBL_MAX;
/* /*
* Create a private memory context that will hold all temp storage * Create a private memory context that will hold all temp storage
* allocated inside gimme_tree(). * allocated inside gimme_tree().
...@@ -60,11 +75,15 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene) ...@@ -60,11 +75,15 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
ALLOCSET_DEFAULT_MAXSIZE); ALLOCSET_DEFAULT_MAXSIZE);
oldcxt = MemoryContextSwitchTo(mycontext); oldcxt = MemoryContextSwitchTo(mycontext);
/* preserve root->join_rel_list, which gimme_tree changes */ /*
* preserve root->join_rel_list, which gimme_tree changes; without this,
* it'll be pointing at recycled storage after the MemoryContextDelete
* below.
*/
savelist = root->join_rel_list; savelist = root->join_rel_list;
/* construct the best path for the given combination of relations */ /* construct the best path for the given combination of relations */
joinrel = gimme_tree(root, initial_rels, tour, num_gene, 0, NULL); joinrel = gimme_tree(root, initial_rels, tour, num_gene);
/* /*
* compute fitness * compute fitness
...@@ -86,70 +105,61 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene) ...@@ -86,70 +105,61 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
/* /*
* gimme_tree * gimme_tree
* this routine considers only LEFT-SIDED TREES! * Form planner estimates for a join tree constructed in the specified
* order.
* *
* 'root' is the Query * 'root' is the Query
* 'initial_rels' is the list of initial relations (FROM-list items) * 'initial_rels' is the list of initial relations (FROM-list items)
* 'tour' is the proposed join order, of length 'num_gene' * 'tour' is the proposed join order, of length 'num_gene'
* 'rel_count' is number of initial_rels items already joined (initially 0)
* 'old_rel' is the preceding join (initially NULL)
* *
* Returns a new join relation incorporating all joins in a left-sided tree. * Returns a new join relation whose cheapest path is the best plan for
* this join order.
*
* Note that at each step we consider using the next rel as both left and
* right side of a join. However, we cannot build general ("bushy") plan
* trees this way, only left-sided and right-sided trees.
*/ */
RelOptInfo * RelOptInfo *
gimme_tree(Query *root, List *initial_rels, gimme_tree(Query *root, List *initial_rels,
Gene *tour, int num_gene, Gene *tour, int num_gene)
int rel_count, RelOptInfo *old_rel)
{ {
RelOptInfo *inner_rel; /* current relation */ RelOptInfo *joinrel;
int init_rel_index; int cur_rel_index;
int rel_count;
if (rel_count < num_gene) /*
{ * Start with the first relation ...
/* tree not yet finished */ */
init_rel_index = (int) tour[rel_count]; cur_rel_index = (int) tour[0];
inner_rel = (RelOptInfo *) nth(init_rel_index - 1, initial_rels); joinrel = (RelOptInfo *) nth(cur_rel_index - 1, initial_rels);
if (rel_count == 0) /*
{ * And add on each relation in the specified order ...
/* processing first join with init_rel_index = (int) tour[0] */ */
rel_count++; for (rel_count = 1; rel_count < num_gene; rel_count++)
return gimme_tree(root, initial_rels,
tour, num_gene,
rel_count, inner_rel);
}
else
{ {
/* tree main part */ RelOptInfo *inner_rel;
List *acceptable_rels = makeList1(inner_rel);
List *new_rels;
RelOptInfo *new_rel; RelOptInfo *new_rel;
new_rels = make_rels_by_clause_joins(root, old_rel, cur_rel_index = (int) tour[rel_count];
acceptable_rels);
/* Shouldn't get more than one result */ inner_rel = (RelOptInfo *) nth(cur_rel_index - 1, initial_rels);
Assert(length(new_rels) <= 1);
if (new_rels == NIL) /*
{ * Construct a RelOptInfo representing the previous joinrel joined
new_rels = make_rels_by_clauseless_joins(root, old_rel, * to inner_rel. These are always inner joins. Note that we expect
acceptable_rels); * the joinrel not to exist in root->join_rel_list yet, and so the
Assert(length(new_rels) <= 1); * paths constructed for it will only include the ones we want.
if (new_rels == NIL) */
elog(ERROR, "gimme_tree: failed to construct join rel"); new_rel = make_join_rel(root, joinrel, inner_rel, JOIN_INNER);
}
new_rel = (RelOptInfo *) lfirst(new_rels);
/* Find and save the cheapest paths for this rel */ /* Find and save the cheapest paths for this rel */
set_cheapest(new_rel); set_cheapest(new_rel);
/* and recurse... */ /* and repeat... */
rel_count++; joinrel = new_rel;
return gimme_tree(root, initial_rels,
tour, num_gene,
rel_count, new_rel);
}
} }
return old_rel; /* tree finished ... */ return joinrel;
} }
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: geqo_main.c,v 1.32 2002/07/20 04:59:10 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.33 2002/12/16 21:30:29 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -235,8 +235,7 @@ geqo(Query *root, int number_of_rels, List *initial_rels) ...@@ -235,8 +235,7 @@ geqo(Query *root, int number_of_rels, List *initial_rels)
/* root->join_rel_list will be modified during this ! */ /* root->join_rel_list will be modified during this ! */
best_rel = gimme_tree(root, initial_rels, best_rel = gimme_tree(root, initial_rels,
best_tour, pool->string_length, best_tour, pool->string_length);
0, NULL);
/* DBG: show the query plan /* DBG: show the query plan
print_plan(best_plan, root); print_plan(best_plan, root);
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_misc.c,v 1.35 2002/11/06 00:00:44 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/geqo_misc.c,v 1.36 2002/12/16 21:30:29 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -31,19 +31,25 @@ ...@@ -31,19 +31,25 @@
/* /*
* avg_pool * avg_pool
*/ */
static float static double
avg_pool(Pool *pool) avg_pool(Pool *pool)
{ {
int i; int i;
double cumulative = 0.0; double cumulative = 0.0;
if (pool->size == 0) if (pool->size <= 0)
elog(ERROR, "avg_pool: pool_size of zero"); elog(ERROR, "avg_pool: pool_size of zero");
/*
* Since the pool may contain multiple occurrences of DBL_MAX, divide
* by pool->size before summing, not after, to avoid overflow. This
* loses a little in speed and accuracy, but this routine is only used
* for debug printouts, so we don't care that much.
*/
for (i = 0; i < pool->size; i++) for (i = 0; i < pool->size; i++)
cumulative = cumulative + pool->data[i].worth; cumulative += pool->data[i].worth / pool->size;
return (float) cumulative / pool->size; return cumulative;
} }
/* print_pool /* print_pool
...@@ -72,8 +78,10 @@ print_pool(FILE *fp, Pool *pool, int start, int stop) ...@@ -72,8 +78,10 @@ print_pool(FILE *fp, Pool *pool, int start, int stop)
fprintf(fp, "%d)\t", i); fprintf(fp, "%d)\t", i);
for (j = 0; j < pool->string_length; j++) for (j = 0; j < pool->string_length; j++)
fprintf(fp, "%d ", pool->data[i].string[j]); fprintf(fp, "%d ", pool->data[i].string[j]);
fprintf(fp, "%f\n", pool->data[i].worth); fprintf(fp, "%g\n", pool->data[i].worth);
} }
fflush(fp);
} }
/* print_gen /* print_gen
...@@ -90,12 +98,14 @@ print_gen(FILE *fp, Pool *pool, int generation) ...@@ -90,12 +98,14 @@ print_gen(FILE *fp, Pool *pool, int generation)
lowest = pool->size > 1 ? pool->size - 2 : 0; lowest = pool->size > 1 ? pool->size - 2 : 0;
fprintf(fp, fprintf(fp,
"%5d | Best: %f Worst: %f Mean: %f Avg: %f\n", "%5d | Best: %g Worst: %g Mean: %g Avg: %g\n",
generation, generation,
pool->data[0].worth, pool->data[0].worth,
pool->data[lowest].worth, pool->data[lowest].worth,
pool->data[pool->size / 2].worth, pool->data[pool->size / 2].worth,
avg_pool(pool)); avg_pool(pool));
fflush(fp);
} }
...@@ -116,6 +126,8 @@ print_edge_table(FILE *fp, Edge *edge_table, int num_gene) ...@@ -116,6 +126,8 @@ print_edge_table(FILE *fp, Edge *edge_table, int num_gene)
} }
fprintf(fp, "\n"); fprintf(fp, "\n");
fflush(fp);
} }
#endif /* GEQO_DEBUG */ #endif /* GEQO_DEBUG */
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.129 2002/12/15 16:17:49 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.130 2002/12/16 21:30:29 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1398,6 +1398,7 @@ best_inner_indexscan(Query *root, RelOptInfo *rel, ...@@ -1398,6 +1398,7 @@ best_inner_indexscan(Query *root, RelOptInfo *rel,
List *ilist; List *ilist;
List *jlist; List *jlist;
InnerIndexscanInfo *info; InnerIndexscanInfo *info;
MemoryContext oldcontext;
/* /*
* Nestloop only supports inner and left joins. * Nestloop only supports inner and left joins.
...@@ -1415,15 +1416,27 @@ best_inner_indexscan(Query *root, RelOptInfo *rel, ...@@ -1415,15 +1416,27 @@ best_inner_indexscan(Query *root, RelOptInfo *rel,
} }
/* /*
* If there are no indexable joinclauses for this rel, exit quickly. * If there are no indexable joinclauses for this rel, exit quickly.
* Otherwise, intersect the given outer_relids with index_outer_relids
* to find the set of outer relids actually relevant for this index.
* If there are none, again we can fail immediately.
*/ */
if (!rel->index_outer_relids) if (!rel->index_outer_relids)
return NULL; return NULL;
/*
* Otherwise, we have to do path selection in the memory context of
* the given rel, so that any created path can be safely attached to
* the rel's cache of best inner paths. (This is not currently an
* issue for normal planning, but it is an issue for GEQO planning.)
*/
oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
/*
* Intersect the given outer_relids with index_outer_relids
* to find the set of outer relids actually relevant for this index.
* If there are none, again we can fail immediately.
*/
outer_relids = set_intersecti(rel->index_outer_relids, outer_relids); outer_relids = set_intersecti(rel->index_outer_relids, outer_relids);
if (!outer_relids) if (!outer_relids)
{
MemoryContextSwitchTo(oldcontext);
return NULL; return NULL;
}
/* /*
* Look to see if we already computed the result for this set of * Look to see if we already computed the result for this set of
* relevant outerrels. (We include the isouterjoin status in the * relevant outerrels. (We include the isouterjoin status in the
...@@ -1437,6 +1450,7 @@ best_inner_indexscan(Query *root, RelOptInfo *rel, ...@@ -1437,6 +1450,7 @@ best_inner_indexscan(Query *root, RelOptInfo *rel,
info->isouterjoin == isouterjoin) info->isouterjoin == isouterjoin)
{ {
freeList(outer_relids); freeList(outer_relids);
MemoryContextSwitchTo(oldcontext);
return info->best_innerpath; return info->best_innerpath;
} }
} }
...@@ -1517,6 +1531,8 @@ best_inner_indexscan(Query *root, RelOptInfo *rel, ...@@ -1517,6 +1531,8 @@ best_inner_indexscan(Query *root, RelOptInfo *rel,
info->best_innerpath = cheapest; info->best_innerpath = cheapest;
rel->index_inner_paths = lcons(info, rel->index_inner_paths); rel->index_inner_paths = lcons(info, rel->index_inner_paths);
MemoryContextSwitchTo(oldcontext);
return cheapest; return cheapest;
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.57 2002/06/20 20:29:30 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.58 2002/12/16 21:30:30 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -18,8 +18,12 @@ ...@@ -18,8 +18,12 @@
#include "optimizer/paths.h" #include "optimizer/paths.h"
static RelOptInfo *make_join_rel(Query *root, RelOptInfo *rel1, static List *make_rels_by_clause_joins(Query *root,
RelOptInfo *rel2, JoinType jointype); RelOptInfo *old_rel,
List *other_rels);
static List *make_rels_by_clauseless_joins(Query *root,
RelOptInfo *old_rel,
List *other_rels);
/* /*
...@@ -246,7 +250,7 @@ make_rels_by_joins(Query *root, int level, List **joinrels) ...@@ -246,7 +250,7 @@ make_rels_by_joins(Query *root, int level, List **joinrels)
* no extra test for overlap for initial rels, since the is_subset test can * no extra test for overlap for initial rels, since the is_subset test can
* only succeed when other_rel is not already part of old_rel.) * only succeed when other_rel is not already part of old_rel.)
*/ */
List * static List *
make_rels_by_clause_joins(Query *root, make_rels_by_clause_joins(Query *root,
RelOptInfo *old_rel, RelOptInfo *old_rel,
List *other_rels) List *other_rels)
...@@ -297,7 +301,7 @@ make_rels_by_clause_joins(Query *root, ...@@ -297,7 +301,7 @@ make_rels_by_clause_joins(Query *root,
* Currently, this is only used with initial rels in other_rels, but it would * Currently, this is only used with initial rels in other_rels, but it would
* work for joining to joinrels too. * work for joining to joinrels too.
*/ */
List * static List *
make_rels_by_clauseless_joins(Query *root, make_rels_by_clauseless_joins(Query *root,
RelOptInfo *old_rel, RelOptInfo *old_rel,
List *other_rels) List *other_rels)
...@@ -392,7 +396,7 @@ make_jointree_rel(Query *root, Node *jtnode) ...@@ -392,7 +396,7 @@ make_jointree_rel(Query *root, Node *jtnode)
* (The join rel may already contain paths generated from other * (The join rel may already contain paths generated from other
* pairs of rels that add up to the same set of base rels.) * pairs of rels that add up to the same set of base rels.)
*/ */
static RelOptInfo * RelOptInfo *
make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2, make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2,
JoinType jointype) JoinType jointype)
{ {
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: geqo.h,v 1.29 2002/07/20 04:59:10 momjian Exp $ * $Id: geqo.h,v 1.30 2002/12/16 21:30:30 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -74,7 +74,6 @@ extern RelOptInfo *geqo(Query *root, int number_of_rels, List *initial_rels); ...@@ -74,7 +74,6 @@ extern RelOptInfo *geqo(Query *root, int number_of_rels, List *initial_rels);
extern Cost geqo_eval(Query *root, List *initial_rels, extern Cost geqo_eval(Query *root, List *initial_rels,
Gene *tour, int num_gene); Gene *tour, int num_gene);
extern RelOptInfo *gimme_tree(Query *root, List *initial_rels, extern RelOptInfo *gimme_tree(Query *root, List *initial_rels,
Gene *tour, int num_gene, Gene *tour, int num_gene);
int rel_count, RelOptInfo *old_rel);
#endif /* GEQO_H */ #endif /* GEQO_H */
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: paths.h,v 1.62 2002/12/12 15:49:41 tgl Exp $ * $Id: paths.h,v 1.63 2002/12/16 21:30:30 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -74,13 +74,10 @@ extern void add_paths_to_joinrel(Query *root, RelOptInfo *joinrel, ...@@ -74,13 +74,10 @@ extern void add_paths_to_joinrel(Query *root, RelOptInfo *joinrel,
* routines to determine which relations to join * routines to determine which relations to join
*/ */
extern List *make_rels_by_joins(Query *root, int level, List **joinrels); extern List *make_rels_by_joins(Query *root, int level, List **joinrels);
extern List *make_rels_by_clause_joins(Query *root,
RelOptInfo *old_rel,
List *other_rels);
extern List *make_rels_by_clauseless_joins(Query *root,
RelOptInfo *old_rel,
List *other_rels);
extern RelOptInfo *make_jointree_rel(Query *root, Node *jtnode); extern RelOptInfo *make_jointree_rel(Query *root, Node *jtnode);
extern RelOptInfo *make_join_rel(Query *root,
RelOptInfo *rel1, RelOptInfo *rel2,
JoinType jointype);
/* /*
* pathkeys.c * pathkeys.c
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment