pathnode.c 17.8 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * pathnode.c
4
 *	  Routines to manipulate pathlists and create path nodes
5
 *
Bruce Momjian's avatar
Bruce Momjian committed
6
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
Bruce Momjian's avatar
Add:  
Bruce Momjian committed
7
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.80 2002/11/24 21:52:14 tgl Exp $
12 13 14 15 16
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

17 18
#include <math.h>

19
#include "nodes/plannodes.h"
20
#include "optimizer/cost.h"
Bruce Momjian's avatar
Bruce Momjian committed
21
#include "optimizer/pathnode.h"
22
#include "optimizer/paths.h"
Bruce Momjian's avatar
Bruce Momjian committed
23
#include "optimizer/restrictinfo.h"
24 25 26


/*****************************************************************************
27
 *		MISC. PATH UTILITIES
28 29
 *****************************************************************************/

30
/*
31 32 33 34 35 36 37 38 39 40 41 42 43
 * compare_path_costs
 *	  Return -1, 0, or +1 according as path1 is cheaper, the same cost,
 *	  or more expensive than path2 for the specified criterion.
 */
int
compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
{
	if (criterion == STARTUP_COST)
	{
		if (path1->startup_cost < path2->startup_cost)
			return -1;
		if (path1->startup_cost > path2->startup_cost)
			return +1;
44

45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
		/*
		 * If paths have the same startup cost (not at all unlikely),
		 * order them by total cost.
		 */
		if (path1->total_cost < path2->total_cost)
			return -1;
		if (path1->total_cost > path2->total_cost)
			return +1;
	}
	else
	{
		if (path1->total_cost < path2->total_cost)
			return -1;
		if (path1->total_cost > path2->total_cost)
			return +1;
60

61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
		/*
		 * If paths have the same total cost, order them by startup cost.
		 */
		if (path1->startup_cost < path2->startup_cost)
			return -1;
		if (path1->startup_cost > path2->startup_cost)
			return +1;
	}
	return 0;
}

/*
 * compare_path_fractional_costs
 *	  Return -1, 0, or +1 according as path1 is cheaper, the same cost,
 *	  or more expensive than path2 for fetching the specified fraction
 *	  of the total tuples.
77
 *
78 79
 * If fraction is <= 0 or > 1, we interpret it as 1, ie, we select the
 * path with the cheaper total_cost.
80
 */
81 82 83
int
compare_fractional_path_costs(Path *path1, Path *path2,
							  double fraction)
84
{
85 86 87 88 89 90 91 92 93 94 95 96 97 98
	Cost		cost1,
				cost2;

	if (fraction <= 0.0 || fraction >= 1.0)
		return compare_path_costs(path1, path2, TOTAL_COST);
	cost1 = path1->startup_cost +
		fraction * (path1->total_cost - path1->startup_cost);
	cost2 = path2->startup_cost +
		fraction * (path2->total_cost - path2->startup_cost);
	if (cost1 < cost2)
		return -1;
	if (cost1 > cost2)
		return +1;
	return 0;
99 100
}

101
/*
102
 * set_cheapest
103 104
 *	  Find the minimum-cost paths from among a relation's paths,
 *	  and save them in the rel's cheapest-path fields.
105
 *
106 107
 * This is normally called only after we've finished constructing the path
 * list for the rel node.
108
 *
109 110 111 112
 * If we find two paths of identical costs, try to keep the better-sorted one.
 * The paths might have unrelated sort orderings, in which case we can only
 * guess which might be better to keep, but if one is superior then we
 * definitely should keep it.
113
 */
114 115
void
set_cheapest(RelOptInfo *parent_rel)
116
{
117
	List	   *pathlist = parent_rel->pathlist;
118
	List	   *p;
119 120
	Path	   *cheapest_startup_path;
	Path	   *cheapest_total_path;
121

Bruce Momjian's avatar
Bruce Momjian committed
122
	Assert(IsA(parent_rel, RelOptInfo));
123 124 125

	if (pathlist == NIL)
		elog(ERROR, "Unable to devise a query plan for the given query");
126

127
	cheapest_startup_path = cheapest_total_path = (Path *) lfirst(pathlist);
128

129 130
	foreach(p, lnext(pathlist))
	{
131
		Path	   *path = (Path *) lfirst(p);
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
		int			cmp;

		cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
		if (cmp > 0 ||
			(cmp == 0 &&
			 compare_pathkeys(cheapest_startup_path->pathkeys,
							  path->pathkeys) == PATHKEYS_BETTER2))
			cheapest_startup_path = path;

		cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
		if (cmp > 0 ||
			(cmp == 0 &&
			 compare_pathkeys(cheapest_total_path->pathkeys,
							  path->pathkeys) == PATHKEYS_BETTER2))
			cheapest_total_path = path;
147 148
	}

149 150
	parent_rel->cheapest_startup_path = cheapest_startup_path;
	parent_rel->cheapest_total_path = cheapest_total_path;
151 152 153 154 155 156 157
}

/*
 * add_path
 *	  Consider a potential implementation path for the specified parent rel,
 *	  and add it to the rel's pathlist if it is worthy of consideration.
 *	  A path is worthy if it has either a better sort order (better pathkeys)
158
 *	  or cheaper cost (on either dimension) than any of the existing old paths.
159
 *
160 161 162
 *	  Unless parent_rel->pruneable is false, we also remove from the rel's
 *	  pathlist any old paths that are dominated by new_path --- that is,
 *	  new_path is both cheaper and at least as well ordered.
163
 *
164 165 166 167 168 169
 *	  The pathlist is kept sorted by TOTAL_COST metric, with cheaper paths
 *	  at the front.  No code depends on that for correctness; it's simply
 *	  a speed hack within this routine.  Doing it that way makes it more
 *	  likely that we will reject an inferior path after a few comparisons,
 *	  rather than many comparisons.
 *
170 171 172 173
 *	  NOTE: discarded Path objects are immediately pfree'd to reduce planner
 *	  memory consumption.  We dare not try to free the substructure of a Path,
 *	  since much of it may be shared with other Paths or the query tree itself;
 *	  but just recycling discarded Path nodes is a very useful savings in
174
 *	  a large join tree.  We can recycle the List nodes of pathlist, too.
175
 *
176 177
 * 'parent_rel' is the relation entry to which the path corresponds.
 * 'new_path' is a potential path for parent_rel.
178
 *
179
 * Returns nothing, but modifies parent_rel->pathlist.
180
 */
181 182
void
add_path(RelOptInfo *parent_rel, Path *new_path)
183
{
184 185
	bool		accept_new = true;		/* unless we find a superior old
										 * path */
186
	List	   *insert_after = NIL;		/* where to insert new item */
187
	List	   *p1_prev = NIL;
Bruce Momjian's avatar
Bruce Momjian committed
188
	List	   *p1;
189

190 191 192 193 194
	/*
	 * Loop to check proposed new path against old paths.  Note it is
	 * possible for more than one old path to be tossed out because
	 * new_path dominates it.
	 */
195
	p1 = parent_rel->pathlist;	/* cannot use foreach here */
196
	while (p1 != NIL)
197
	{
198
		Path	   *old_path = (Path *) lfirst(p1);
199
		bool		remove_old = false; /* unless new proves superior */
200
		int			costcmp;
201

202
		costcmp = compare_path_costs(new_path, old_path, TOTAL_COST);
203

204
		/*
205 206 207
		 * If the two paths compare differently for startup and total
		 * cost, then we want to keep both, and we can skip the (much
		 * slower) comparison of pathkeys.	If they compare the same,
208 209
		 * proceed with the pathkeys comparison.  Note: this test relies
		 * on the fact that compare_path_costs will only return 0 if both
210 211
		 * costs are equal (and, therefore, there's no need to call it
		 * twice in that case).
212 213
		 */
		if (costcmp == 0 ||
214 215
			costcmp == compare_path_costs(new_path, old_path,
										  STARTUP_COST))
216
		{
217 218 219 220
			switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys))
			{
				case PATHKEYS_EQUAL:
					if (costcmp < 0)
221
						remove_old = true;		/* new dominates old */
222
					else
223 224
						accept_new = false;		/* old equals or dominates
												 * new */
225 226 227
					break;
				case PATHKEYS_BETTER1:
					if (costcmp <= 0)
228
						remove_old = true;		/* new dominates old */
229 230 231
					break;
				case PATHKEYS_BETTER2:
					if (costcmp >= 0)
232
						accept_new = false;		/* old dominates new */
233 234 235 236 237
					break;
				case PATHKEYS_DIFFERENT:
					/* keep both paths, since they have different ordering */
					break;
			}
238
		}
Bruce Momjian's avatar
Bruce Momjian committed
239

240 241 242 243 244
		/*
		 * Remove current element from pathlist if dominated by new,
		 * unless xfunc told us not to remove any paths.
		 */
		if (remove_old && parent_rel->pruneable)
245
		{
246
			List	   *p1_next = lnext(p1);
247

248
			if (p1_prev)
249
				lnext(p1_prev) = p1_next;
250
			else
251
				parent_rel->pathlist = p1_next;
252
			pfree(old_path);
253 254
			pfree(p1);			/* this is why we can't use foreach */
			p1 = p1_next;
255
		}
256
		else
257 258 259 260
		{
			/* new belongs after this old path if it has cost >= old's */
			if (costcmp >= 0)
				insert_after = p1;
261
			p1_prev = p1;
262 263
			p1 = lnext(p1);
		}
264 265 266 267 268 269

		/*
		 * If we found an old path that dominates new_path, we can quit
		 * scanning the pathlist; we will not add new_path, and we assume
		 * new_path cannot dominate any other elements of the pathlist.
		 */
270
		if (!accept_new)
271
			break;
272
	}
273

274 275
	if (accept_new)
	{
276 277 278 279 280
		/* Accept the new path: insert it at proper place in pathlist */
		if (insert_after)
			lnext(insert_after) = lcons(new_path, lnext(insert_after));
		else
			parent_rel->pathlist = lcons(new_path, parent_rel->pathlist);
281
	}
282 283
	else
	{
284
		/* Reject and recycle the new path */
285 286
		pfree(new_path);
	}
287 288 289 290
}


/*****************************************************************************
291
 *		PATH NODE CREATION ROUTINES
292 293
 *****************************************************************************/

294
/*
295
 * create_seqscan_path
296 297
 *	  Creates a path corresponding to a sequential scan, returning the
 *	  pathnode.
298
 */
299
Path *
300
create_seqscan_path(Query *root, RelOptInfo *rel)
301
{
302
	Path	   *pathnode = makeNode(Path);
303 304 305

	pathnode->pathtype = T_SeqScan;
	pathnode->parent = rel;
306
	pathnode->pathkeys = NIL;	/* seqscan has unordered result */
307

308
	cost_seqscan(pathnode, root, rel);
309

310
	return pathnode;
311 312
}

313
/*
314
 * create_index_path
315
 *	  Creates a path node for an index scan.
316
 *
317
 * 'rel' is the parent rel
318 319 320
 * 'index' is an index on 'rel'
 * 'restriction_clauses' is a list of RestrictInfo nodes
 *			to be used as index qual conditions in the scan.
321
 * 'pathkeys' describes the ordering of the path.
322
 * 'indexscandir' is ForwardScanDirection or BackwardScanDirection
323
 *			for an ordered index, or NoMovementScanDirection for
324
 *			an unordered index.
325
 *
326 327
 * Returns the new path node.
 */
328
IndexPath *
329
create_index_path(Query *root,
330
				  RelOptInfo *rel,
331
				  IndexOptInfo *index,
332
				  List *restriction_clauses,
333
				  List *pathkeys,
334
				  ScanDirection indexscandir)
335
{
336
	IndexPath  *pathnode = makeNode(IndexPath);
337
	List	   *indexquals;
338 339 340

	pathnode->path.pathtype = T_IndexScan;
	pathnode->path.parent = rel;
341
	pathnode->path.pathkeys = pathkeys;
342

343 344 345 346
	indexquals = get_actual_clauses(restriction_clauses);
	/* expand special operators to indexquals the executor can handle */
	indexquals = expand_indexqual_conditions(indexquals);

347
	/*
348
	 * We are making a pathnode for a single-scan indexscan; therefore,
349
	 * both indexinfo and indexqual should be single-element lists.
350
	 */
351
	pathnode->indexinfo = makeList1(index);
352
	pathnode->indexqual = makeList1(indexquals);
353

354
	pathnode->indexscandir = indexscandir;
355 356

	/*
357 358
	 * The number of rows is the same as the parent rel's estimate, since
	 * this isn't a join inner indexscan.
359 360
	 */
	pathnode->rows = rel->rows;
361

362
	/*
363 364
	 * Not sure if this is necessary, but it should help if the statistics
	 * are too far off
365 366 367 368
	 */
	if (index->indpred && index->tuples < pathnode->rows)
		pathnode->rows = index->tuples;

369
	cost_index(&pathnode->path, root, rel, index, indexquals, false);
370

371
	return pathnode;
372 373
}

374 375 376 377 378
/*
 * create_tidscan_path
 *	  Creates a path corresponding to a tid_direct scan, returning the
 *	  pathnode.
 */
379 380
TidPath *
create_tidscan_path(Query *root, RelOptInfo *rel, List *tideval)
381
{
382
	TidPath    *pathnode = makeNode(TidPath);
383 384 385 386

	pathnode->path.pathtype = T_TidScan;
	pathnode->path.parent = rel;
	pathnode->path.pathkeys = NIL;
387 388
	pathnode->tideval = copyObject(tideval);	/* is copy really
												 * necessary? */
389 390
	pathnode->unjoined_relids = NIL;

391
	cost_tidscan(&pathnode->path, root, rel, tideval);
392 393 394 395

	/*
	 * divide selectivity for each clause to get an equal selectivity as
	 * IndexScan does OK ?
396 397
	 */

398 399 400
	return pathnode;
}

401 402 403 404 405 406 407 408 409 410 411 412 413
/*
 * create_append_path
 *	  Creates a path corresponding to an Append plan, returning the
 *	  pathnode.
 */
AppendPath *
create_append_path(RelOptInfo *rel, List *subpaths)
{
	AppendPath *pathnode = makeNode(AppendPath);
	List	   *l;

	pathnode->path.pathtype = T_Append;
	pathnode->path.parent = rel;
414 415
	pathnode->path.pathkeys = NIL;		/* result is always considered
										 * unsorted */
416 417 418 419 420 421
	pathnode->subpaths = subpaths;

	pathnode->path.startup_cost = 0;
	pathnode->path.total_cost = 0;
	foreach(l, subpaths)
	{
422
		Path	   *subpath = (Path *) lfirst(l);
423 424 425 426 427 428 429 430 431

		if (l == subpaths)		/* first node? */
			pathnode->path.startup_cost = subpath->startup_cost;
		pathnode->path.total_cost += subpath->total_cost;
	}

	return pathnode;
}

432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466
/*
 * create_result_path
 *	  Creates a path corresponding to a Result plan, returning the
 *	  pathnode.
 */
ResultPath *
create_result_path(RelOptInfo *rel, Path *subpath, List *constantqual)
{
	ResultPath *pathnode = makeNode(ResultPath);

	pathnode->path.pathtype = T_Result;
	pathnode->path.parent = rel; /* may be NULL */

	if (subpath)
		pathnode->path.pathkeys = subpath->pathkeys;
	else
		pathnode->path.pathkeys = NIL;

	pathnode->subpath = subpath;
	pathnode->constantqual = constantqual;

	if (subpath)
	{
		pathnode->path.startup_cost = subpath->startup_cost;
		pathnode->path.total_cost = subpath->total_cost;
	}
	else
	{
		pathnode->path.startup_cost = 0;
		pathnode->path.total_cost = cpu_tuple_cost;
	}

	return pathnode;
}

467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487
/*
 * create_subqueryscan_path
 *	  Creates a path corresponding to a sequential scan of a subquery,
 *	  returning the pathnode.
 */
Path *
create_subqueryscan_path(RelOptInfo *rel)
{
	Path	   *pathnode = makeNode(Path);

	pathnode->pathtype = T_SubqueryScan;
	pathnode->parent = rel;
	pathnode->pathkeys = NIL;	/* for now, assume unordered result */

	/* just copy the subplan's cost estimates */
	pathnode->startup_cost = rel->subplan->startup_cost;
	pathnode->total_cost = rel->subplan->total_cost;

	return pathnode;
}

488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506
/*
 * create_functionscan_path
 *	  Creates a path corresponding to a sequential scan of a function,
 *	  returning the pathnode.
 */
Path *
create_functionscan_path(Query *root, RelOptInfo *rel)
{
	Path	   *pathnode = makeNode(Path);

	pathnode->pathtype = T_FunctionScan;
	pathnode->parent = rel;
	pathnode->pathkeys = NIL;	/* for now, assume unordered result */

	cost_functionscan(pathnode, root, rel);

	return pathnode;
}

507
/*
508
 * create_nestloop_path
509 510 511
 *	  Creates a pathnode corresponding to a nestloop join between two
 *	  relations.
 *
512
 * 'joinrel' is the join relation.
513
 * 'jointype' is the type of join required
514 515
 * 'outer_path' is the outer path
 * 'inner_path' is the inner path
516
 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
517
 * 'pathkeys' are the path keys of the new join path
518
 *
519 520
 * Returns the resulting path node.
 */
521 522 523
NestPath *
create_nestloop_path(Query *root,
					 RelOptInfo *joinrel,
524
					 JoinType jointype,
525 526
					 Path *outer_path,
					 Path *inner_path,
527
					 List *restrict_clauses,
528
					 List *pathkeys)
529
{
530
	NestPath   *pathnode = makeNode(NestPath);
531 532 533

	pathnode->path.pathtype = T_NestLoop;
	pathnode->path.parent = joinrel;
534
	pathnode->jointype = jointype;
535 536
	pathnode->outerjoinpath = outer_path;
	pathnode->innerjoinpath = inner_path;
537
	pathnode->joinrestrictinfo = restrict_clauses;
538
	pathnode->path.pathkeys = pathkeys;
539

540 541
	cost_nestloop(&pathnode->path, root, outer_path, inner_path,
				  restrict_clauses);
542

543
	return pathnode;
544 545
}

546
/*
547
 * create_mergejoin_path
548
 *	  Creates a pathnode corresponding to a mergejoin join between
549 550
 *	  two relations
 *
551
 * 'joinrel' is the join relation
552
 * 'jointype' is the type of join required
553 554
 * 'outer_path' is the outer path
 * 'inner_path' is the inner path
555
 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
556
 * 'pathkeys' are the path keys of the new join path
557 558
 * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
 *		(this should be a subset of the restrict_clauses list)
559 560 561
 * 'outersortkeys' are the sort varkeys for the outer relation
 * 'innersortkeys' are the sort varkeys for the inner relation
 */
562 563 564
MergePath *
create_mergejoin_path(Query *root,
					  RelOptInfo *joinrel,
565
					  JoinType jointype,
566 567
					  Path *outer_path,
					  Path *inner_path,
568
					  List *restrict_clauses,
569
					  List *pathkeys,
570 571 572
					  List *mergeclauses,
					  List *outersortkeys,
					  List *innersortkeys)
573
{
574
	MergePath  *pathnode = makeNode(MergePath);
575

576 577 578 579 580 581 582 583 584 585 586
	/*
	 * If the given paths are already well enough ordered, we can skip
	 * doing an explicit sort.
	 */
	if (outersortkeys &&
		pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
		outersortkeys = NIL;
	if (innersortkeys &&
		pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
		innersortkeys = NIL;

587 588
	pathnode->jpath.path.pathtype = T_MergeJoin;
	pathnode->jpath.path.parent = joinrel;
589
	pathnode->jpath.jointype = jointype;
590 591
	pathnode->jpath.outerjoinpath = outer_path;
	pathnode->jpath.innerjoinpath = inner_path;
592
	pathnode->jpath.joinrestrictinfo = restrict_clauses;
593
	pathnode->jpath.path.pathkeys = pathkeys;
594 595 596
	pathnode->path_mergeclauses = mergeclauses;
	pathnode->outersortkeys = outersortkeys;
	pathnode->innersortkeys = innersortkeys;
597 598

	cost_mergejoin(&pathnode->jpath.path,
599
				   root,
600 601 602
				   outer_path,
				   inner_path,
				   restrict_clauses,
603
				   mergeclauses,
604 605
				   outersortkeys,
				   innersortkeys);
606

607
	return pathnode;
608 609
}

610
/*
611
 * create_hashjoin_path
612 613
 *	  Creates a pathnode corresponding to a hash join between two relations.
 *
614
 * 'joinrel' is the join relation
615
 * 'jointype' is the type of join required
616 617
 * 'outer_path' is the cheapest outer path
 * 'inner_path' is the cheapest inner path
618
 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
619
 * 'hashclauses' is a list of the hash join clause (always a 1-element list)
620
 *		(this should be a subset of the restrict_clauses list)
621
 */
622 623 624
HashPath *
create_hashjoin_path(Query *root,
					 RelOptInfo *joinrel,
625
					 JoinType jointype,
626 627
					 Path *outer_path,
					 Path *inner_path,
628
					 List *restrict_clauses,
629
					 List *hashclauses)
630
{
631
	HashPath   *pathnode = makeNode(HashPath);
632 633 634

	pathnode->jpath.path.pathtype = T_HashJoin;
	pathnode->jpath.path.parent = joinrel;
635
	pathnode->jpath.jointype = jointype;
636 637
	pathnode->jpath.outerjoinpath = outer_path;
	pathnode->jpath.innerjoinpath = inner_path;
638
	pathnode->jpath.joinrestrictinfo = restrict_clauses;
639 640
	/* A hashjoin never has pathkeys, since its ordering is unpredictable */
	pathnode->jpath.path.pathkeys = NIL;
641
	pathnode->path_hashclauses = hashclauses;
642 643

	cost_hashjoin(&pathnode->jpath.path,
644
				  root,
645 646 647
				  outer_path,
				  inner_path,
				  restrict_clauses,
648
				  hashclauses);
649

650
	return pathnode;
651
}