allpaths.c 18.4 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * allpaths.c
4
 *	  Routines to find possible search paths for processing a query
5
 *
6
 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
Bruce Momjian's avatar
Add:  
Bruce Momjian committed
7
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.83 2001/12/10 22:54:12 tgl Exp $
12 13 14 15 16 17
 *
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

18 19 20
#ifdef OPTIMIZER_DEBUG
#include "nodes/print.h"
#endif
21
#include "optimizer/clauses.h"
22
#include "optimizer/cost.h"
Marc G. Fournier's avatar
Marc G. Fournier committed
23
#include "optimizer/geqo.h"
Bruce Momjian's avatar
Bruce Momjian committed
24 25
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
26 27
#include "optimizer/plancat.h"
#include "optimizer/planner.h"
28
#include "optimizer/prep.h"
29
#include "parser/parsetree.h"
30
#include "rewrite/rewriteManip.h"
Marc G. Fournier's avatar
Marc G. Fournier committed
31

32

33
bool		enable_geqo = true;
34
int			geqo_rels = DEFAULT_GEQO_RELS;
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
35 36


37 38
static void set_base_rel_pathlists(Query *root);
static void set_plain_rel_pathlist(Query *root, RelOptInfo *rel,
39
					   RangeTblEntry *rte);
40
static void set_inherited_rel_pathlist(Query *root, RelOptInfo *rel,
41 42
						   Index rti, RangeTblEntry *rte,
						   List *inheritlist);
43
static void set_subquery_pathlist(Query *root, RelOptInfo *rel,
44
					  Index rti, RangeTblEntry *rte);
45
static RelOptInfo *make_one_rel_by_joins(Query *root, int levels_needed,
46
					  List *initial_rels);
47

48

49
/*
50 51
 * make_one_rel
 *	  Finds all possible access paths for executing a query, returning a
52
 *	  single rel that represents the join of all base rels in the query.
53
 */
54
RelOptInfo *
55
make_one_rel(Query *root)
56
{
57
	RelOptInfo *rel;
58

59 60 61
	/*
	 * Generate access paths for the base rels.
	 */
62
	set_base_rel_pathlists(root);
63

64
	/*
65
	 * Generate access paths for the entire join tree.
66
	 */
67
	Assert(root->jointree != NULL && IsA(root->jointree, FromExpr));
68

69
	rel = make_fromexpr_rel(root, root->jointree);
70

71
	/*
72
	 * The result should join all the query's base rels.
73 74 75 76
	 */
	Assert(length(rel->relids) == length(root->base_rel_list));

	return rel;
77 78
}

79
/*
80
 * set_base_rel_pathlists
81 82 83
 *	  Finds all paths available for scanning each base-relation entry.
 *	  Sequential scan and any available indices are considered.
 *	  Each useful path is attached to its relation's 'pathlist' field.
84 85
 */
static void
86
set_base_rel_pathlists(Query *root)
87
{
88
	List	   *rellist;
89

90
	foreach(rellist, root->base_rel_list)
91
	{
92
		RelOptInfo *rel = (RelOptInfo *) lfirst(rellist);
93
		Index		rti;
94
		RangeTblEntry *rte;
95
		List	   *inheritlist;
96

97
		Assert(length(rel->relids) == 1);		/* better be base rel */
98 99
		rti = lfirsti(rel->relids);
		rte = rt_fetch(rti, root->rtable);
100

101 102 103
		if (rel->issubquery)
		{
			/* Subquery --- generate a separate plan for it */
104
			set_subquery_pathlist(root, rel, rti, rte);
105
		}
106 107
		else if ((inheritlist = expand_inherted_rtentry(root, rti, true))
				 != NIL)
108 109
		{
			/* Relation is root of an inheritance tree, process specially */
110
			set_inherited_rel_pathlist(root, rel, rti, rte, inheritlist);
111 112 113 114
		}
		else
		{
			/* Plain relation */
115 116
			set_plain_rel_pathlist(root, rel, rte);
		}
117 118 119 120

#ifdef OPTIMIZER_DEBUG
		debug_print_rel(root, rel);
#endif
121 122
	}
}
123

124 125 126 127 128 129 130 131 132
/*
 * set_plain_rel_pathlist
 *	  Build access paths for a plain relation (no subquery, no inheritance)
 */
static void
set_plain_rel_pathlist(Query *root, RelOptInfo *rel, RangeTblEntry *rte)
{
	/* Mark rel with estimated output rows, width, etc */
	set_baserel_size_estimates(root, rel);
133

134 135 136
	/*
	 * Generate paths and add them to the rel's pathlist.
	 *
137 138 139
	 * Note: add_path() will discard any paths that are dominated by another
	 * available path, keeping only those paths that are superior along at
	 * least one dimension of cost or sortedness.
140
	 */
141

142
	/* Consider sequential scan */
143
	add_path(rel, create_seqscan_path(root, rel));
144

145 146
	/* Consider TID scans */
	create_tidscan_paths(root, rel);
147

148
	/* Consider index paths for both simple and OR index clauses */
149
	create_index_paths(root, rel);
150

151
	/* create_index_paths must be done before create_or_index_paths */
152
	create_or_index_paths(root, rel);
153

154 155 156 157 158 159 160 161 162
	/* Now find the cheapest of the paths for this rel */
	set_cheapest(rel);
}

/*
 * set_inherited_rel_pathlist
 *	  Build access paths for a inheritance tree rooted at rel
 *
 * inheritlist is a list of RT indexes of all tables in the inheritance tree,
163
 * including a duplicate of the parent itself.	Note we will not come here
164 165 166 167 168 169 170 171 172 173 174 175
 * unless there's at least one child in addition to the parent.
 *
 * NOTE: the passed-in rel and RTE will henceforth represent the appended
 * result of the whole inheritance tree.  The members of inheritlist represent
 * the individual tables --- in particular, the inheritlist member that is a
 * duplicate of the parent RTE represents the parent table alone.
 * We will generate plans to scan the individual tables that refer to
 * the inheritlist RTEs, whereas Vars elsewhere in the plan tree that
 * refer to the original RTE are taken to refer to the append output.
 * In particular, this means we have separate RelOptInfos for the parent
 * table and for the append output, which is a good thing because they're
 * not the same size.
176 177
 */
static void
178 179
set_inherited_rel_pathlist(Query *root, RelOptInfo *rel,
						   Index rti, RangeTblEntry *rte,
180 181
						   List *inheritlist)
{
182
	int			parentRTindex = rti;
183 184 185 186 187
	Oid			parentOID = rte->relid;
	List	   *subpaths = NIL;
	List	   *il;

	/*
188 189
	 * XXX for now, can't handle inherited expansion of FOR UPDATE; can we
	 * do better?
190 191 192 193 194
	 */
	if (intMember(parentRTindex, root->rowMarks))
		elog(ERROR, "SELECT FOR UPDATE is not supported for inherit queries");

	/*
195 196 197
	 * The executor will check the parent table's access permissions when
	 * it examines the parent's inheritlist entry.  There's no need to
	 * check twice, so turn off access check bits in the original RTE.
198 199 200 201 202 203
	 */
	rte->checkForRead = false;
	rte->checkForWrite = false;

	/*
	 * Initialize to compute size estimates for whole inheritance tree
204 205 206 207 208
	 */
	rel->rows = 0;
	rel->width = 0;

	/*
209 210
	 * Generate access paths for each table in the tree (parent AND
	 * children), and pick the cheapest path for each table.
211 212 213
	 */
	foreach(il, inheritlist)
	{
214
		int			childRTindex = lfirsti(il);
215
		RangeTblEntry *childrte;
216
		Oid			childOID;
217 218 219 220 221 222 223
		RelOptInfo *childrel;

		childrte = rt_fetch(childRTindex, root->rtable);
		childOID = childrte->relid;

		/*
		 * Make a RelOptInfo for the child so we can do planning.  Do NOT
224 225 226
		 * attach the RelOptInfo to the query's base_rel_list, however,
		 * since the child is not part of the main join tree.  Instead,
		 * the child RelOptInfo is added to other_rel_list.
227
		 */
228
		childrel = build_other_rel(root, childRTindex);
229 230

		/*
231
		 * Copy the parent's targetlist and restriction quals to the
232
		 * child, with attribute-number adjustment as needed.  We don't
233 234
		 * bother to copy the join quals, since we can't do any joining of
		 * the individual tables.
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
		 */
		childrel->targetlist = (List *)
			adjust_inherited_attrs((Node *) rel->targetlist,
								   parentRTindex,
								   parentOID,
								   childRTindex,
								   childOID);
		childrel->baserestrictinfo = (List *)
			adjust_inherited_attrs((Node *) rel->baserestrictinfo,
								   parentRTindex,
								   parentOID,
								   childRTindex,
								   childOID);
		childrel->baserestrictcost = rel->baserestrictcost;

		/*
		 * Now compute child access paths, and save the cheapest.
		 */
		set_plain_rel_pathlist(root, childrel, childrte);

		subpaths = lappend(subpaths, childrel->cheapest_total_path);

		/* Also update total size estimates */
		rel->rows += childrel->rows;
		if (childrel->width > rel->width)
			rel->width = childrel->width;
261
	}
262 263

	/*
264 265
	 * Finally, build Append path and install it as the only access path
	 * for the parent rel.
266 267 268 269 270
	 */
	add_path(rel, (Path *) create_append_path(rel, subpaths));

	/* Select cheapest path (pretty easy in this case...) */
	set_cheapest(rel);
271 272
}

273 274 275 276 277 278 279 280 281 282 283 284
/*
 * set_subquery_pathlist
 *		Build the (single) access path for a subquery RTE
 */
static void
set_subquery_pathlist(Query *root, RelOptInfo *rel,
					  Index rti, RangeTblEntry *rte)
{
	Query	   *subquery = rte->subquery;

	/*
	 * If there are any restriction clauses that have been attached to the
285 286 287 288 289 290 291
	 * subquery relation, consider pushing them down to become HAVING
	 * quals of the subquery itself.  (Not WHERE clauses, since they may
	 * refer to subquery outputs that are aggregate results.  But
	 * planner.c will transfer them into the subquery's WHERE if they do
	 * not.)  This transformation is useful because it may allow us to
	 * generate a better plan for the subquery than evaluating all the
	 * subquery output rows and then filtering them.
292 293 294 295
	 *
	 * There are several cases where we cannot push down clauses:
	 *
	 * 1. If the subquery contains set ops (UNION/INTERSECT/EXCEPT) we do not
296 297 298 299
	 * push down any qual clauses, since the planner doesn't support quals
	 * at the top level of a setop.  (With suitable analysis we could try
	 * to push the quals down into the component queries of the setop, but
	 * getting it right seems nontrivial.  Work on this later.)
300
	 *
301 302 303
	 * 2. If the subquery has a LIMIT clause or a DISTINCT ON clause, we must
	 * not push down any quals, since that could change the set of rows
	 * returned.  (Actually, we could push down quals into a DISTINCT ON
304 305 306
	 * subquery if they refer only to DISTINCT-ed output columns, but
	 * checking that seems more work than it's worth.  In any case, a
	 * plain DISTINCT is safe to push down past.)
307
	 *
308 309 310 311 312 313 314 315
	 * 3. If the subquery has any ITER nodes (ie, functions returning sets)
	 * in its target list, we do not push down any quals, since the quals
	 * might refer to those tlist items, which would mean we'd introduce
	 * functions-returning-sets into the subquery's WHERE/HAVING quals.
	 * (It'd be sufficient to not push down quals that refer to those
	 * particular tlist items, but that's much clumsier to check.)
	 *
	 * 4. We do not push down clauses that contain subselects, mainly because
316 317
	 * I'm not sure it will work correctly (the subplan hasn't yet
	 * transformed sublinks to subselects).
318 319 320 321 322 323 324 325 326
	 *
	 * Non-pushed-down clauses will get evaluated as qpquals of the
	 * SubqueryScan node.
	 *
	 * XXX Are there any cases where we want to make a policy decision not to
	 * push down, because it'd result in a worse plan?
	 */
	if (subquery->setOperations == NULL &&
		subquery->limitOffset == NULL &&
327
		subquery->limitCount == NULL &&
328 329
		!has_distinct_on_clause(subquery) &&
		!contain_iter_clause((Node *) subquery->targetList))
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
	{
		/* OK to consider pushing down individual quals */
		List	   *upperrestrictlist = NIL;
		List	   *lst;

		foreach(lst, rel->baserestrictinfo)
		{
			RestrictInfo *rinfo = (RestrictInfo *) lfirst(lst);
			Node	   *clause = (Node *) rinfo->clause;

			if (contain_subplans(clause))
			{
				/* Keep it in the upper query */
				upperrestrictlist = lappend(upperrestrictlist, rinfo);
			}
			else
			{
				/*
348 349 350 351 352
				 * We need to replace Vars in the clause (which must refer
				 * to outputs of the subquery) with copies of the
				 * subquery's targetlist expressions.  Note that at this
				 * point, any uplevel Vars in the clause should have been
				 * replaced with Params, so they need no work.
353 354 355 356 357 358
				 */
				clause = ResolveNew(clause, rti, 0,
									subquery->targetList,
									CMD_SELECT, 0);
				subquery->havingQual = make_and_qual(subquery->havingQual,
													 clause);
359

360 361
				/*
				 * We need not change the subquery's hasAggs or
362 363 364
				 * hasSublinks flags, since we can't be pushing down any
				 * aggregates that weren't there before, and we don't push
				 * down subselects at all.
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
				 */
			}
		}
		rel->baserestrictinfo = upperrestrictlist;
	}

	/* Generate the plan for the subquery */
	rel->subplan = subquery_planner(subquery,
									-1.0 /* default case */ );

	/* Copy number of output rows from subplan */
	rel->tuples = rel->subplan->plan_rows;

	/* Mark rel with estimated output rows, width, etc */
	set_baserel_size_estimates(root, rel);

	/* Generate appropriate path */
	add_path(rel, create_subqueryscan_path(rel));

	/* Select cheapest path (pretty easy in this case...) */
	set_cheapest(rel);
}
387

388
/*
389 390
 * make_fromexpr_rel
 *	  Build access paths for a FromExpr jointree node.
391
 */
392 393
RelOptInfo *
make_fromexpr_rel(Query *root, FromExpr *from)
394
{
395 396
	int			levels_needed;
	List	   *initial_rels = NIL;
397 398
	List	   *jt;

399
	/*
400 401 402
	 * Count the number of child jointree nodes.  This is the depth of the
	 * dynamic-programming algorithm we must employ to consider all ways
	 * of joining the child nodes.
403 404 405 406 407 408 409 410 411 412 413 414
	 */
	levels_needed = length(from->fromlist);

	if (levels_needed <= 0)
		return NULL;			/* nothing to do? */

	/*
	 * Construct a list of rels corresponding to the child jointree nodes.
	 * This may contain both base rels and rels constructed according to
	 * explicit JOIN directives.
	 */
	foreach(jt, from->fromlist)
415 416 417
	{
		Node	   *jtnode = (Node *) lfirst(jt);

418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
		initial_rels = lappend(initial_rels,
							   make_jointree_rel(root, jtnode));
	}

	if (levels_needed == 1)
	{
		/*
		 * Single jointree node, so we're done.
		 */
		return (RelOptInfo *) lfirst(initial_rels);
	}
	else
	{
		/*
		 * Consider the different orders in which we could join the rels,
		 * using either GEQO or regular optimizer.
		 */
		if (enable_geqo && levels_needed >= geqo_rels)
			return geqo(root, levels_needed, initial_rels);
		else
			return make_one_rel_by_joins(root, levels_needed, initial_rels);
439 440 441
	}
}

442
/*
443
 * make_one_rel_by_joins
444
 *	  Find all possible joinpaths for a query by successively finding ways
445
 *	  to join component relations into join relations.
446
 *
447
 * 'levels_needed' is the number of iterations needed, ie, the number of
448 449 450
 *		independent jointree items in the query.  This is > 1.
 *
 * 'initial_rels' is a list of RelOptInfo nodes for each independent
451
 *		jointree item.	These are the components to be joined together.
452
 *
453
 * Returns the final level of join relations, i.e., the relation that is
454
 * the result of joining all the original relations together.
455
 */
456
static RelOptInfo *
457
make_one_rel_by_joins(Query *root, int levels_needed, List *initial_rels)
458
{
459
	List	  **joinitems;
460
	int			lev;
Bruce Momjian's avatar
Bruce Momjian committed
461
	RelOptInfo *rel;
462

463
	/*
464
	 * We employ a simple "dynamic programming" algorithm: we first find
465 466 467 468 469 470 471
	 * all ways to build joins of two jointree items, then all ways to
	 * build joins of three items (from two-item joins and single items),
	 * then four-item joins, and so on until we have considered all ways
	 * to join all the items into one rel.
	 *
	 * joinitems[j] is a list of all the j-item rels.  Initially we set
	 * joinitems[1] to represent all the single-jointree-item relations.
472
	 */
473 474
	joinitems = (List **) palloc((levels_needed + 1) * sizeof(List *));
	MemSet(joinitems, 0, (levels_needed + 1) * sizeof(List *));
475 476

	joinitems[1] = initial_rels;
477

478
	for (lev = 2; lev <= levels_needed; lev++)
479
	{
480
		List	   *x;
Bruce Momjian's avatar
Bruce Momjian committed
481

482 483
		/*
		 * Determine all possible pairs of relations to be joined at this
484
		 * level, and build paths for making each one from every available
485
		 * pair of lower-level relations.
486
		 */
487
		joinitems[lev] = make_rels_by_joins(root, lev, joinitems);
488

489
		/*
490
		 * Do cleanup work on each just-processed rel.
491
		 */
492
		foreach(x, joinitems[lev])
493 494
		{
			rel = (RelOptInfo *) lfirst(x);
Bruce Momjian's avatar
Bruce Momjian committed
495

496
#ifdef NOT_USED
497

498
			/*
499 500
			 * * for each expensive predicate in each path in each
			 * distinct rel, * consider doing pullup  -- JMH
501 502 503
			 */
			if (XfuncMode != XFUNC_NOPULL && XfuncMode != XFUNC_OFF)
				xfunc_trypullup(rel);
504
#endif
505

506 507
			/* Find and save the cheapest paths for this rel */
			set_cheapest(rel);
Bruce Momjian's avatar
Bruce Momjian committed
508

509
#ifdef OPTIMIZER_DEBUG
510
			debug_print_rel(root, rel);
511
#endif
512
		}
513
	}
514

515
	/*
516
	 * We should have a single rel at the final level.
517
	 */
518
	Assert(length(joinitems[levels_needed]) == 1);
519

520
	rel = (RelOptInfo *) lfirst(joinitems[levels_needed]);
521 522

	return rel;
523 524 525 526 527 528
}

/*****************************************************************************
 *
 *****************************************************************************/

529
#ifdef OPTIMIZER_DEBUG
530

531
static void
532 533 534 535 536 537 538 539 540 541 542 543 544 545
print_relids(Relids relids)
{
	List	   *l;

	foreach(l, relids)
	{
		printf("%d", lfirsti(l));
		if (lnext(l))
			printf(" ");
	}
}

static void
print_restrictclauses(Query *root, List *clauses)
546
{
547
	List	   *l;
548

549 550
	foreach(l, clauses)
	{
551
		RestrictInfo *c = lfirst(l);
552

553 554
		print_expr((Node *) c->clause, root->rtable);
		if (lnext(l))
555
			printf(", ");
556
	}
557 558
}

559
static void
560
print_path(Query *root, Path *path, int indent)
561
{
562
	const char *ptype;
563
	bool		join;
564
	int			i;
565 566 567

	switch (nodeTag(path))
	{
568 569 570 571 572 573 574 575
		case T_Path:
			ptype = "SeqScan";
			join = false;
			break;
		case T_IndexPath:
			ptype = "IdxScan";
			join = false;
			break;
576 577 578 579
		case T_TidPath:
			ptype = "TidScan";
			join = false;
			break;
580
		case T_NestPath:
581 582 583 584 585 586 587 588 589 590 591 592
			ptype = "Nestloop";
			join = true;
			break;
		case T_MergePath:
			ptype = "MergeJoin";
			join = true;
			break;
		case T_HashPath:
			ptype = "HashJoin";
			join = true;
			break;
		default:
593 594
			ptype = "???Path";
			join = false;
595
			break;
596
	}
597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612

	for (i = 0; i < indent; i++)
		printf("\t");
	printf("%s(", ptype);
	print_relids(path->parent->relids);
	printf(") rows=%.0f cost=%.2f..%.2f\n",
		   path->parent->rows, path->startup_cost, path->total_cost);

	if (path->pathkeys)
	{
		for (i = 0; i < indent; i++)
			printf("\t");
		printf("  pathkeys: ");
		print_pathkeys(path->pathkeys, root->rtable);
	}

613 614
	if (join)
	{
615
		JoinPath   *jp = (JoinPath *) path;
616

617 618 619 620 621
		for (i = 0; i < indent; i++)
			printf("\t");
		printf("  clauses: ");
		print_restrictclauses(root, jp->joinrestrictinfo);
		printf("\n");
622

623
		if (nodeTag(path) == T_MergePath)
624
		{
625
			MergePath  *mp = (MergePath *) path;
626

627 628
			if (mp->outersortkeys || mp->innersortkeys)
			{
629
				for (i = 0; i < indent; i++)
630
					printf("\t");
631 632 633 634
				printf("  sortouter=%d sortinner=%d\n",
					   ((mp->outersortkeys) ? 1 : 0),
					   ((mp->innersortkeys) ? 1 : 0));
			}
635
		}
636

637 638 639
		print_path(root, jp->outerjoinpath, indent + 1);
		print_path(root, jp->innerjoinpath, indent + 1);
	}
640 641
}

642
void
643
debug_print_rel(Query *root, RelOptInfo *rel)
644
{
645
	List	   *l;
646

647 648
	printf("RELOPTINFO (");
	print_relids(rel->relids);
649
	printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
650

651 652 653 654 655 656 657 658 659
	if (rel->baserestrictinfo)
	{
		printf("\tbaserestrictinfo: ");
		print_restrictclauses(root, rel->baserestrictinfo);
		printf("\n");
	}

	foreach(l, rel->joininfo)
	{
660
		JoinInfo   *j = (JoinInfo *) lfirst(l);
661 662 663 664 665 666 667 668

		printf("\tjoininfo (");
		print_relids(j->unjoined_relids);
		printf("): ");
		print_restrictclauses(root, j->jinfo_restrictinfo);
		printf("\n");
	}

669 670 671
	printf("\tpath list:\n");
	foreach(l, rel->pathlist)
		print_path(root, lfirst(l), 1);
672
	printf("\n\tcheapest startup path:\n");
673
	print_path(root, rel->cheapest_startup_path, 1);
674
	printf("\n\tcheapest total path:\n");
675
	print_path(root, rel->cheapest_total_path, 1);
676 677
	printf("\n");
	fflush(stdout);
678
}
679

680
#endif   /* OPTIMIZER_DEBUG */