execMain.c 64.9 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * execMain.c
4
 *	  top level executor interface routines
5 6
 *
 * INTERFACE ROUTINES
7 8 9
 *	ExecutorStart()
 *	ExecutorRun()
 *	ExecutorEnd()
10
 *
11 12 13 14
 *	The old ExecutorMain() has been replaced by ExecutorStart(),
 *	ExecutorRun() and ExecutorEnd()
 *
 *	These three procedures are the external interfaces to the executor.
15
 *	In each case, the query descriptor is required as an argument.
16
 *
17
 *	ExecutorStart() must be called at the beginning of execution of any
18 19 20
 *	query plan and ExecutorEnd() should always be called at the end of
 *	execution of a plan.
 *
21
 *	ExecutorRun accepts direction and count arguments that specify whether
22
 *	the plan is to be executed forwards, backwards, and for how many tuples.
23
 *
24
 * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
Bruce Momjian's avatar
Add:  
Bruce Momjian committed
25
 * Portions Copyright (c) 1994, Regents of the University of California
26 27 28
 *
 *
 * IDENTIFICATION
29
 *	  src/backend/executor/execMain.c
30 31 32
 *
 *-------------------------------------------------------------------------
 */
33 34
#include "postgres.h"

35
#include "access/reloptions.h"
36
#include "access/sysattr.h"
37 38
#include "access/transam.h"
#include "access/xact.h"
39
#include "catalog/heap.h"
40
#include "catalog/namespace.h"
41
#include "catalog/toasting.h"
42
#include "commands/tablespace.h"
43
#include "commands/trigger.h"
Bruce Momjian's avatar
Bruce Momjian committed
44
#include "executor/execdebug.h"
45
#include "executor/instrument.h"
Bruce Momjian's avatar
Bruce Momjian committed
46
#include "miscadmin.h"
47
#include "optimizer/clauses.h"
48
#include "parser/parse_clause.h"
49
#include "parser/parsetree.h"
50 51
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
52
#include "storage/smgr.h"
53
#include "tcop/utility.h"
Bruce Momjian's avatar
Bruce Momjian committed
54
#include "utils/acl.h"
55
#include "utils/lsyscache.h"
56
#include "utils/memutils.h"
57
#include "utils/snapmgr.h"
58
#include "utils/tqual.h"
59

60

61
/* Hooks for plugins to get control in ExecutorStart/Run/End() */
62 63 64
ExecutorStart_hook_type ExecutorStart_hook = NULL;
ExecutorRun_hook_type ExecutorRun_hook = NULL;
ExecutorEnd_hook_type ExecutorEnd_hook = NULL;
65

66 67 68
/* Hook for plugin to get control in ExecCheckRTPerms() */
ExecutorCheckPerms_hook_type ExecutorCheckPerms_hook = NULL;

69
/* decls for local routines only used within this module */
70
static void InitPlan(QueryDesc *queryDesc, int eflags);
71
static void ExecEndPlan(PlanState *planstate, EState *estate);
72
static void ExecutePlan(EState *estate, PlanState *planstate,
73
			CmdType operation,
74
			bool sendTuples,
75 76
			long numberTuples,
			ScanDirection direction,
77
			DestReceiver *dest);
78
static bool ExecCheckRTEPerms(RangeTblEntry *rte);
79
static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt);
80
static void EvalPlanQualStart(EPQState *epqstate, EState *parentestate,
Bruce Momjian's avatar
Bruce Momjian committed
81
				  Plan *planTree);
82 83 84 85 86 87
static void OpenIntoRel(QueryDesc *queryDesc);
static void CloseIntoRel(QueryDesc *queryDesc);
static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
static void intorel_receive(TupleTableSlot *slot, DestReceiver *self);
static void intorel_shutdown(DestReceiver *self);
static void intorel_destroy(DestReceiver *self);
88

89 90
/* end of local decls */

91

92
/* ----------------------------------------------------------------
93 94 95 96 97
 *		ExecutorStart
 *
 *		This routine must be called at the beginning of any execution of any
 *		query plan
 *
98
 * Takes a QueryDesc previously created by CreateQueryDesc (it's not real
Bruce Momjian's avatar
Bruce Momjian committed
99
 * clear why we bother to separate the two functions, but...).	The tupDesc
100 101
 * field of the QueryDesc is filled in to describe the tuples that will be
 * returned, and the internal fields (estate and planstate) are set up.
102
 *
103
 * eflags contains flag bits as described in executor.h.
104
 *
105 106
 * NB: the CurrentMemoryContext when this is called will become the parent
 * of the per-query context used for this Executor invocation.
107 108 109 110 111
 *
 * We provide a function hook variable that lets loadable plugins
 * get control when ExecutorStart is called.  Such a plugin would
 * normally call standard_ExecutorStart().
 *
112 113
 * ----------------------------------------------------------------
 */
114
void
115
ExecutorStart(QueryDesc *queryDesc, int eflags)
116 117 118 119 120 121 122 123 124
{
	if (ExecutorStart_hook)
		(*ExecutorStart_hook) (queryDesc, eflags);
	else
		standard_ExecutorStart(queryDesc, eflags);
}

void
standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
125
{
126
	EState	   *estate;
127
	MemoryContext oldcontext;
128

129
	/* sanity checks: queryDesc must not be started already */
130
	Assert(queryDesc != NULL);
131 132
	Assert(queryDesc->estate == NULL);

133
	/*
Bruce Momjian's avatar
Bruce Momjian committed
134
	 * If the transaction is read-only, we need to check if any writes are
135
	 * planned to non-temporary tables.  EXPLAIN is considered read-only.
136
	 */
137
	if (XactReadOnly && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
138
		ExecCheckXactReadOnly(queryDesc->plannedstmt);
139

140
	/*
141
	 * Build EState, switch into per-query memory context for startup.
142 143 144 145
	 */
	estate = CreateExecutorState();
	queryDesc->estate = estate;

146 147 148
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	/*
149 150
	 * Fill in external parameters, if any, from queryDesc; and allocate
	 * workspace for internal parameters
151
	 */
152
	estate->es_param_list_info = queryDesc->params;
153

154
	if (queryDesc->plannedstmt->nParamExec > 0)
155
		estate->es_param_exec_vals = (ParamExecData *)
156
			palloc0(queryDesc->plannedstmt->nParamExec * sizeof(ParamExecData));
157

158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
	/*
	 * If non-read-only query, set the command ID to mark output tuples with
	 */
	switch (queryDesc->operation)
	{
		case CMD_SELECT:
			/* SELECT INTO and SELECT FOR UPDATE/SHARE need to mark tuples */
			if (queryDesc->plannedstmt->intoClause != NULL ||
				queryDesc->plannedstmt->rowMarks != NIL)
				estate->es_output_cid = GetCurrentCommandId(true);
			break;

		case CMD_INSERT:
		case CMD_DELETE:
		case CMD_UPDATE:
			estate->es_output_cid = GetCurrentCommandId(true);
			break;

		default:
			elog(ERROR, "unrecognized operation code: %d",
				 (int) queryDesc->operation);
			break;
	}

182
	/*
183
	 * Copy other important information into the EState
184
	 */
185 186
	estate->es_snapshot = RegisterSnapshot(queryDesc->snapshot);
	estate->es_crosscheck_snapshot = RegisterSnapshot(queryDesc->crosscheck_snapshot);
187
	estate->es_instrument = queryDesc->instrument_options;
188

189
	/*
190
	 * Initialize the plan state tree
191
	 */
192
	InitPlan(queryDesc, eflags);
193 194

	MemoryContextSwitchTo(oldcontext);
195 196 197
}

/* ----------------------------------------------------------------
198 199 200 201 202 203 204
 *		ExecutorRun
 *
 *		This is the main routine of the executor module. It accepts
 *		the query descriptor from the traffic cop and executes the
 *		query plan.
 *
 *		ExecutorStart must have been called already.
205
 *
206 207 208
 *		If direction is NoMovementScanDirection then nothing is done
 *		except to start up/shut down the destination.  Otherwise,
 *		we retrieve up to 'count' tuples in the specified direction.
209
 *
210
 *		Note: count = 0 is interpreted as no portal limit, i.e., run to
211
 *		completion.
212
 *
213 214 215 216 217
 *		There is no return value, but output tuples (if any) are sent to
 *		the destination receiver specified in the QueryDesc; and the number
 *		of tuples processed at the top level can be found in
 *		estate->es_processed.
 *
218 219 220 221
 *		We provide a function hook variable that lets loadable plugins
 *		get control when ExecutorRun is called.  Such a plugin would
 *		normally call standard_ExecutorRun().
 *
222 223
 * ----------------------------------------------------------------
 */
224
void
225
ExecutorRun(QueryDesc *queryDesc,
226
			ScanDirection direction, long count)
227 228
{
	if (ExecutorRun_hook)
229
		(*ExecutorRun_hook) (queryDesc, direction, count);
230
	else
231
		standard_ExecutorRun(queryDesc, direction, count);
232 233
}

234
void
235 236
standard_ExecutorRun(QueryDesc *queryDesc,
					 ScanDirection direction, long count)
237
{
238
	EState	   *estate;
239
	CmdType		operation;
240
	DestReceiver *dest;
241
	bool		sendTuples;
242 243 244 245 246 247 248 249
	MemoryContext oldcontext;

	/* sanity checks */
	Assert(queryDesc != NULL);

	estate = queryDesc->estate;

	Assert(estate != NULL);
250

Bruce Momjian's avatar
Bruce Momjian committed
251
	/*
252
	 * Switch into per-query memory context
253
	 */
254
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
255

256 257 258 259
	/* Allow instrumentation of ExecutorRun overall runtime */
	if (queryDesc->totaltime)
		InstrStartNode(queryDesc->totaltime);

Bruce Momjian's avatar
Bruce Momjian committed
260
	/*
261
	 * extract information from the query descriptor and the query feature.
262
	 */
263 264 265
	operation = queryDesc->operation;
	dest = queryDesc->dest;

Bruce Momjian's avatar
Bruce Momjian committed
266
	/*
267
	 * startup tuple receiver, if we will be emitting tuples
268
	 */
269 270
	estate->es_processed = 0;
	estate->es_lastoid = InvalidOid;
271

272
	sendTuples = (operation == CMD_SELECT ||
273
				  queryDesc->plannedstmt->hasReturning);
274 275 276

	if (sendTuples)
		(*dest->rStartup) (dest, operation, queryDesc->tupDesc);
277

278 279 280
	/*
	 * run plan
	 */
281 282 283 284
	if (!ScanDirectionIsNoMovement(direction))
		ExecutePlan(estate,
					queryDesc->planstate,
					operation,
285
					sendTuples,
286 287 288
					count,
					direction,
					dest);
289

290
	/*
291
	 * shutdown tuple receiver, if we started it
292
	 */
293 294
	if (sendTuples)
		(*dest->rShutdown) (dest);
295

296 297 298
	if (queryDesc->totaltime)
		InstrStopNode(queryDesc->totaltime, estate->es_processed);

299
	MemoryContextSwitchTo(oldcontext);
300 301 302
}

/* ----------------------------------------------------------------
303 304
 *		ExecutorEnd
 *
305
 *		This routine must be called at the end of execution of any
306
 *		query plan
307 308 309 310 311
 *
 *		We provide a function hook variable that lets loadable plugins
 *		get control when ExecutorEnd is called.  Such a plugin would
 *		normally call standard_ExecutorEnd().
 *
312 313 314
 * ----------------------------------------------------------------
 */
void
315
ExecutorEnd(QueryDesc *queryDesc)
316 317 318 319 320 321 322 323 324
{
	if (ExecutorEnd_hook)
		(*ExecutorEnd_hook) (queryDesc);
	else
		standard_ExecutorEnd(queryDesc);
}

void
standard_ExecutorEnd(QueryDesc *queryDesc)
325
{
326
	EState	   *estate;
327
	MemoryContext oldcontext;
328

329 330
	/* sanity checks */
	Assert(queryDesc != NULL);
331

332 333
	estate = queryDesc->estate;

334
	Assert(estate != NULL);
335

336
	/*
337
	 * Switch into per-query memory context to run ExecEndPlan
338
	 */
339 340 341
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	ExecEndPlan(queryDesc->planstate, estate);
342

343 344 345 346 347 348
	/*
	 * Close the SELECT INTO relation if any
	 */
	if (estate->es_select_into)
		CloseIntoRel(queryDesc);

349 350 351 352
	/* do away with our snapshots */
	UnregisterSnapshot(estate->es_snapshot);
	UnregisterSnapshot(estate->es_crosscheck_snapshot);

353
	/*
354
	 * Must switch out of context before destroying it
355
	 */
356
	MemoryContextSwitchTo(oldcontext);
357

358
	/*
359 360
	 * Release EState and per-query memory context.  This should release
	 * everything the executor has allocated.
361
	 */
362 363 364 365 366 367
	FreeExecutorState(estate);

	/* Reset queryDesc fields that no longer point to anything */
	queryDesc->tupDesc = NULL;
	queryDesc->estate = NULL;
	queryDesc->planstate = NULL;
368
	queryDesc->totaltime = NULL;
369
}
370

371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
/* ----------------------------------------------------------------
 *		ExecutorRewind
 *
 *		This routine may be called on an open queryDesc to rewind it
 *		to the start.
 * ----------------------------------------------------------------
 */
void
ExecutorRewind(QueryDesc *queryDesc)
{
	EState	   *estate;
	MemoryContext oldcontext;

	/* sanity checks */
	Assert(queryDesc != NULL);

	estate = queryDesc->estate;

	Assert(estate != NULL);

	/* It's probably not sensible to rescan updating queries */
	Assert(queryDesc->operation == CMD_SELECT);

	/*
	 * Switch into per-query memory context
	 */
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	/*
	 * rescan plan
	 */
402
	ExecReScan(queryDesc->planstate);
403 404 405 406

	MemoryContextSwitchTo(oldcontext);
}

407

408 409 410
/*
 * ExecCheckRTPerms
 *		Check access permissions for all relations listed in a range table.
411 412 413
 *
 * Returns true if permissions are adequate.  Otherwise, throws an appropriate
 * error if ereport_on_violation is true, or simply returns false otherwise.
414
 */
415 416
bool
ExecCheckRTPerms(List *rangeTable, bool ereport_on_violation)
417
{
418
	ListCell   *l;
419
	bool		result = true;
420

421
	foreach(l, rangeTable)
422
	{
423 424 425 426 427 428 429 430 431 432 433
		RangeTblEntry  *rte = (RangeTblEntry *) lfirst(l);

		result = ExecCheckRTEPerms(rte);
		if (!result)
		{
			Assert(rte->rtekind == RTE_RELATION);
			if (ereport_on_violation)
				aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
							   get_rel_name(rte->relid));
			return false;
		}
434
	}
435 436

	if (ExecutorCheckPerms_hook)
437 438 439
		result = (*ExecutorCheckPerms_hook)(rangeTable,
											ereport_on_violation);
	return result;
440 441 442 443 444 445
}

/*
 * ExecCheckRTEPerms
 *		Check access permissions for a single RTE.
 */
446
static bool
447
ExecCheckRTEPerms(RangeTblEntry *rte)
448
{
449
	AclMode		requiredPerms;
450 451
	AclMode		relPerms;
	AclMode		remainingPerms;
452
	Oid			relOid;
453
	Oid			userid;
454 455
	Bitmapset  *tmpset;
	int			col;
456

Bruce Momjian's avatar
Bruce Momjian committed
457
	/*
458
	 * Only plain-relation RTEs need to be checked here.  Function RTEs are
459
	 * checked by init_fcache when the function is prepared for execution.
460
	 * Join, subquery, and special RTEs need no checks.
Bruce Momjian's avatar
Bruce Momjian committed
461
	 */
462
	if (rte->rtekind != RTE_RELATION)
463
		return true;
464

465 466 467 468 469
	/*
	 * No work if requiredPerms is empty.
	 */
	requiredPerms = rte->requiredPerms;
	if (requiredPerms == 0)
470
		return true;
471

472
	relOid = rte->relid;
473 474

	/*
475
	 * userid to check as: current user unless we have a setuid indication.
476
	 *
477 478 479 480
	 * Note: GetUserId() is presently fast enough that there's no harm in
	 * calling it separately for each RTE.	If that stops being true, we could
	 * call it once in ExecCheckRTPerms and pass the userid down from there.
	 * But for now, no need for the extra clutter.
481
	 */
482
	userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
483

484
	/*
485 486 487
	 * We must have *all* the requiredPerms bits, but some of the bits can be
	 * satisfied from column-level rather than relation-level permissions.
	 * First, remove any bits that are satisfied by relation permissions.
488
	 */
489 490 491 492 493 494 495 496 497
	relPerms = pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL);
	remainingPerms = requiredPerms & ~relPerms;
	if (remainingPerms != 0)
	{
		/*
		 * If we lack any permissions that exist only as relation permissions,
		 * we can fail straight away.
		 */
		if (remainingPerms & ~(ACL_SELECT | ACL_INSERT | ACL_UPDATE))
498
			return false;
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517

		/*
		 * Check to see if we have the needed privileges at column level.
		 *
		 * Note: failures just report a table-level error; it would be nicer
		 * to report a column-level error if we have some but not all of the
		 * column privileges.
		 */
		if (remainingPerms & ACL_SELECT)
		{
			/*
			 * When the query doesn't explicitly reference any columns (for
			 * example, SELECT COUNT(*) FROM table), allow the query if we
			 * have SELECT on any column of the rel, as per SQL spec.
			 */
			if (bms_is_empty(rte->selectedCols))
			{
				if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
											  ACLMASK_ANY) != ACLCHECK_OK)
518
					return false;
519 520 521 522 523 524 525 526 527 528 529 530
			}

			tmpset = bms_copy(rte->selectedCols);
			while ((col = bms_first_member(tmpset)) >= 0)
			{
				/* remove the column number offset */
				col += FirstLowInvalidHeapAttributeNumber;
				if (col == InvalidAttrNumber)
				{
					/* Whole-row reference, must have priv on all cols */
					if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
												  ACLMASK_ALL) != ACLCHECK_OK)
531
						return false;
532 533 534
				}
				else
				{
535 536 537
					if (pg_attribute_aclcheck(relOid, col, userid,
											  ACL_SELECT) != ACLCHECK_OK)
						return false;
538 539 540 541 542 543
				}
			}
			bms_free(tmpset);
		}

		/*
544 545
		 * Basically the same for the mod columns, with either INSERT or
		 * UPDATE privilege as specified by remainingPerms.
546 547 548 549 550
		 */
		remainingPerms &= ~ACL_SELECT;
		if (remainingPerms != 0)
		{
			/*
551 552 553 554
			 * When the query doesn't explicitly change any columns, allow the
			 * query if we have permission on any column of the rel.  This is
			 * to handle SELECT FOR UPDATE as well as possible corner cases in
			 * INSERT and UPDATE.
555 556 557 558 559
			 */
			if (bms_is_empty(rte->modifiedCols))
			{
				if (pg_attribute_aclcheck_all(relOid, userid, remainingPerms,
											  ACLMASK_ANY) != ACLCHECK_OK)
560
					return false;
561 562 563 564 565 566 567 568 569 570 571 572 573 574
			}

			tmpset = bms_copy(rte->modifiedCols);
			while ((col = bms_first_member(tmpset)) >= 0)
			{
				/* remove the column number offset */
				col += FirstLowInvalidHeapAttributeNumber;
				if (col == InvalidAttrNumber)
				{
					/* whole-row reference can't happen here */
					elog(ERROR, "whole-row update is not implemented");
				}
				else
				{
575 576 577
					if (pg_attribute_aclcheck(relOid, col, userid,
											  remainingPerms) != ACLCHECK_OK)
						return false;
578 579 580 581 582
				}
			}
			bms_free(tmpset);
		}
	}
583
	return true;
584 585
}

586 587
/*
 * Check that the query does not imply any writes to non-temp tables.
588 589 590 591
 *
 * Note: in a Hot Standby slave this would need to reject writes to temp
 * tables as well; but an HS slave can't have created any temp tables
 * in the first place, so no need to check that.
592
 */
593
static void
594
ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
595
{
596 597
	ListCell   *l;

598 599 600
	/*
	 * CREATE TABLE AS or SELECT INTO?
	 *
Bruce Momjian's avatar
Bruce Momjian committed
601 602
	 * XXX should we allow this if the destination is temp?  Considering that
	 * it would still require catalog changes, probably not.
603
	 */
604
	if (plannedstmt->intoClause != NULL)
605
		PreventCommandIfReadOnly(CreateCommandTag((Node *) plannedstmt));
606

607
	/* Fail if write permissions are requested on any non-temp table */
608
	foreach(l, plannedstmt->rtable)
609
	{
610
		RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
611

612 613
		if (rte->rtekind != RTE_RELATION)
			continue;
614

615 616
		if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
			continue;
617

618 619
		if (isTempNamespace(get_rel_namespace(rte->relid)))
			continue;
620

621
		PreventCommandIfReadOnly(CreateCommandTag((Node *) plannedstmt));
622 623 624 625
	}
}


626
/* ----------------------------------------------------------------
627 628 629 630
 *		InitPlan
 *
 *		Initializes the query plan: open files, allocate storage
 *		and start up the rule manager
631 632
 * ----------------------------------------------------------------
 */
633
static void
634
InitPlan(QueryDesc *queryDesc, int eflags)
635
{
636
	CmdType		operation = queryDesc->operation;
637 638 639
	PlannedStmt *plannedstmt = queryDesc->plannedstmt;
	Plan	   *plan = plannedstmt->planTree;
	List	   *rangeTable = plannedstmt->rtable;
Bruce Momjian's avatar
Bruce Momjian committed
640
	EState	   *estate = queryDesc->estate;
641
	PlanState  *planstate;
Bruce Momjian's avatar
Bruce Momjian committed
642
	TupleDesc	tupType;
643
	ListCell   *l;
644
	int			i;
645

646
	/*
647
	 * Do permissions checks
648
	 */
649
	ExecCheckRTPerms(rangeTable, true);
650

Bruce Momjian's avatar
Bruce Momjian committed
651
	/*
Bruce Momjian's avatar
Bruce Momjian committed
652
	 * initialize the node's execution state
653
	 */
654
	estate->es_range_table = rangeTable;
655
	estate->es_plannedstmt = plannedstmt;
656

Bruce Momjian's avatar
Bruce Momjian committed
657
	/*
658 659
	 * initialize result relation stuff, and open/lock the result rels.
	 *
Bruce Momjian's avatar
Bruce Momjian committed
660 661
	 * We must do this before initializing the plan tree, else we might try to
	 * do a lock upgrade if a result rel is also a source rel.
662
	 */
663
	if (plannedstmt->resultRelations)
664
	{
665 666
		List	   *resultRelations = plannedstmt->resultRelations;
		int			numResultRelations = list_length(resultRelations);
667
		ResultRelInfo *resultRelInfos;
668
		ResultRelInfo *resultRelInfo;
Bruce Momjian's avatar
Bruce Momjian committed
669

670 671 672 673
		resultRelInfos = (ResultRelInfo *)
			palloc(numResultRelations * sizeof(ResultRelInfo));
		resultRelInfo = resultRelInfos;
		foreach(l, resultRelations)
674
		{
675 676 677 678 679 680
			Index		resultRelationIndex = lfirst_int(l);
			Oid			resultRelationOid;
			Relation	resultRelation;

			resultRelationOid = getrelid(resultRelationIndex, rangeTable);
			resultRelation = heap_open(resultRelationOid, RowExclusiveLock);
681
			InitResultRelInfo(resultRelInfo,
682 683
							  resultRelation,
							  resultRelationIndex,
684 685
							  operation,
							  estate->es_instrument);
686
			resultRelInfo++;
687 688 689
		}
		estate->es_result_relations = resultRelInfos;
		estate->es_num_result_relations = numResultRelations;
690 691
		/* es_result_relation_info is NULL except when within ModifyTable */
		estate->es_result_relation_info = NULL;
692
	}
693 694
	else
	{
Bruce Momjian's avatar
Bruce Momjian committed
695
		/*
Bruce Momjian's avatar
Bruce Momjian committed
696
		 * if no result relation, then set state appropriately
697
		 */
698 699
		estate->es_result_relations = NULL;
		estate->es_num_result_relations = 0;
700 701 702
		estate->es_result_relation_info = NULL;
	}

703
	/*
704
	 * Similarly, we have to lock relations selected FOR UPDATE/FOR SHARE
Bruce Momjian's avatar
Bruce Momjian committed
705 706
	 * before we initialize the plan tree, else we'd be risking lock upgrades.
	 * While we are at it, build the ExecRowMark list.
707
	 */
708
	estate->es_rowMarks = NIL;
709
	foreach(l, plannedstmt->rowMarks)
710
	{
711
		PlanRowMark *rc = (PlanRowMark *) lfirst(l);
712
		Oid			relid;
713 714 715
		Relation	relation;
		ExecRowMark *erm;

716 717 718 719
		/* ignore "parent" rowmarks; they are irrelevant at runtime */
		if (rc->isParent)
			continue;

720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
		switch (rc->markType)
		{
			case ROW_MARK_EXCLUSIVE:
			case ROW_MARK_SHARE:
				relid = getrelid(rc->rti, rangeTable);
				relation = heap_open(relid, RowShareLock);
				break;
			case ROW_MARK_REFERENCE:
				relid = getrelid(rc->rti, rangeTable);
				relation = heap_open(relid, AccessShareLock);
				break;
			case ROW_MARK_COPY:
				/* there's no real table here ... */
				relation = NULL;
				break;
			default:
				elog(ERROR, "unrecognized markType: %d", rc->markType);
				relation = NULL;	/* keep compiler quiet */
				break;
		}

741 742 743
		erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
		erm->relation = relation;
		erm->rti = rc->rti;
744
		erm->prti = rc->prti;
745
		erm->markType = rc->markType;
746
		erm->noWait = rc->noWait;
747 748 749
		erm->ctidAttNo = rc->ctidAttNo;
		erm->toidAttNo = rc->toidAttNo;
		erm->wholeAttNo = rc->wholeAttNo;
750
		ItemPointerSetInvalid(&(erm->curCtid));
751
		estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
752
	}
753

754 755 756 757 758 759 760 761 762 763 764 765
	/*
	 * Detect whether we're doing SELECT INTO.  If so, set the es_into_oids
	 * flag appropriately so that the plan tree will be initialized with the
	 * correct tuple descriptors.  (Other SELECT INTO stuff comes later.)
	 */
	estate->es_select_into = false;
	if (operation == CMD_SELECT && plannedstmt->intoClause != NULL)
	{
		estate->es_select_into = true;
		estate->es_into_oids = interpretOidsOption(plannedstmt->intoClause->options);
	}

Bruce Momjian's avatar
Bruce Momjian committed
766
	/*
767
	 * Initialize the executor's tuple table to empty.
768
	 */
769
	estate->es_tupleTable = NIL;
770
	estate->es_trig_tuple_slot = NULL;
771
	estate->es_trig_oldtup_slot = NULL;
772

773
	/* mark EvalPlanQual not active */
774 775 776
	estate->es_epqTuple = NULL;
	estate->es_epqTupleSet = NULL;
	estate->es_epqScanDone = NULL;
777

Bruce Momjian's avatar
Bruce Momjian committed
778
	/*
Bruce Momjian's avatar
Bruce Momjian committed
779 780
	 * Initialize private state information for each SubPlan.  We must do this
	 * before running ExecInitNode on the main query tree, since
781 782 783 784 785 786
	 * ExecInitSubPlan expects to be able to find these entries.
	 */
	Assert(estate->es_subplanstates == NIL);
	i = 1;						/* subplan indices count from 1 */
	foreach(l, plannedstmt->subplans)
	{
Bruce Momjian's avatar
Bruce Momjian committed
787 788 789
		Plan	   *subplan = (Plan *) lfirst(l);
		PlanState  *subplanstate;
		int			sp_eflags;
790 791

		/*
Bruce Momjian's avatar
Bruce Momjian committed
792 793 794
		 * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If
		 * it is a parameterless subplan (not initplan), we suggest that it be
		 * prepared to handle REWIND efficiently; otherwise there is no need.
795 796 797 798 799 800 801 802 803 804 805 806 807 808 809
		 */
		sp_eflags = eflags & EXEC_FLAG_EXPLAIN_ONLY;
		if (bms_is_member(i, plannedstmt->rewindPlanIDs))
			sp_eflags |= EXEC_FLAG_REWIND;

		subplanstate = ExecInitNode(subplan, estate, sp_eflags);

		estate->es_subplanstates = lappend(estate->es_subplanstates,
										   subplanstate);

		i++;
	}

	/*
	 * Initialize the private state information for all the nodes in the query
810 811
	 * tree.  This opens files, allocates storage and leaves us ready to start
	 * processing tuples.
812
	 */
813
	planstate = ExecInitNode(plan, estate, eflags);
814

Bruce Momjian's avatar
Bruce Momjian committed
815
	/*
816 817 818
	 * Get the tuple descriptor describing the type of tuples to return. (this
	 * is especially important if we are creating a relation with "SELECT
	 * INTO")
819
	 */
820
	tupType = ExecGetResultType(planstate);
821

Bruce Momjian's avatar
Bruce Momjian committed
822
	/*
Bruce Momjian's avatar
Bruce Momjian committed
823 824
	 * Initialize the junk filter if needed.  SELECT queries need a filter if
	 * there are any junk attrs in the top-level tlist.
825
	 */
826
	if (operation == CMD_SELECT)
827
	{
828
		bool		junk_filter_needed = false;
829
		ListCell   *tlist;
830

831
		foreach(tlist, plan->targetlist)
832
		{
833
			TargetEntry *tle = (TargetEntry *) lfirst(tlist);
834

835 836
			if (tle->resjunk)
			{
837 838
				junk_filter_needed = true;
				break;
839
			}
840 841
		}

842
		if (junk_filter_needed)
843
		{
844
			JunkFilter *j;
845

846 847 848 849
			j = ExecInitJunkFilter(planstate->plan->targetlist,
								   tupType->tdhasoid,
								   ExecInitExtraTupleSlot(estate));
			estate->es_junkFilter = j;
850

851 852
			/* Want to return the cleaned tuple type */
			tupType = j->jf_cleanTupType;
853
		}
854
	}
855

856 857
	queryDesc->tupDesc = tupType;
	queryDesc->planstate = planstate;
858 859 860 861 862 863 864 865 866 867

	/*
	 * If doing SELECT INTO, initialize the "into" relation.  We must wait
	 * till now so we have the "clean" result tuple type to create the new
	 * table from.
	 *
	 * If EXPLAIN, skip creating the "into" relation.
	 */
	if (estate->es_select_into && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
		OpenIntoRel(queryDesc);
868 869
}

870 871 872
/*
 * Initialize ResultRelInfo data for one result relation
 */
873 874
void
InitResultRelInfo(ResultRelInfo *resultRelInfo,
875
				  Relation resultRelationDesc,
876
				  Index resultRelationIndex,
877
				  CmdType operation,
878
				  int instrument_options)
879
{
880
	/*
Bruce Momjian's avatar
Bruce Momjian committed
881 882
	 * Check valid relkind ... parser and/or planner should have noticed this
	 * already, but let's make sure.
883
	 */
884 885
	switch (resultRelationDesc->rd_rel->relkind)
	{
886 887 888
		case RELKIND_RELATION:
			/* OK */
			break;
889
		case RELKIND_SEQUENCE:
890 891
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
892
					 errmsg("cannot change sequence \"%s\"",
893
							RelationGetRelationName(resultRelationDesc))));
894 895
			break;
		case RELKIND_TOASTVALUE:
896 897
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
898
					 errmsg("cannot change TOAST relation \"%s\"",
899
							RelationGetRelationName(resultRelationDesc))));
900 901
			break;
		case RELKIND_VIEW:
902 903
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
904
					 errmsg("cannot change view \"%s\"",
905
							RelationGetRelationName(resultRelationDesc))));
906
			break;
907 908 909 910 911 912
		default:
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
					 errmsg("cannot change relation \"%s\"",
							RelationGetRelationName(resultRelationDesc))));
			break;
913 914
	}

915
	/* OK, fill in the node */
916 917 918 919 920 921 922
	MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
	resultRelInfo->type = T_ResultRelInfo;
	resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
	resultRelInfo->ri_RelationDesc = resultRelationDesc;
	resultRelInfo->ri_NumIndices = 0;
	resultRelInfo->ri_IndexRelationDescs = NULL;
	resultRelInfo->ri_IndexRelationInfo = NULL;
923 924
	/* make a copy so as not to depend on relcache info not changing... */
	resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
925 926
	if (resultRelInfo->ri_TrigDesc)
	{
927
		int			n = resultRelInfo->ri_TrigDesc->numtriggers;
928 929 930

		resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
			palloc0(n * sizeof(FmgrInfo));
931 932
		resultRelInfo->ri_TrigWhenExprs = (List **)
			palloc0(n * sizeof(List *));
933 934
		if (instrument_options)
			resultRelInfo->ri_TrigInstrument = InstrAlloc(n, instrument_options);
935 936 937 938
	}
	else
	{
		resultRelInfo->ri_TrigFunctions = NULL;
939
		resultRelInfo->ri_TrigWhenExprs = NULL;
940 941
		resultRelInfo->ri_TrigInstrument = NULL;
	}
942 943
	resultRelInfo->ri_ConstraintExprs = NULL;
	resultRelInfo->ri_junkFilter = NULL;
944
	resultRelInfo->ri_projectReturning = NULL;
945 946 947

	/*
	 * If there are indices on the result relation, open them and save
948 949 950
	 * descriptors in the result relation info, so that we can add new index
	 * entries for the tuples we add/update.  We need not do this for a
	 * DELETE, however, since deletion doesn't affect indexes.
951 952 953 954 955 956
	 */
	if (resultRelationDesc->rd_rel->relhasindex &&
		operation != CMD_DELETE)
		ExecOpenIndices(resultRelInfo);
}

957 958 959 960 961 962 963 964 965 966
/*
 *		ExecGetTriggerResultRel
 *
 * Get a ResultRelInfo for a trigger target relation.  Most of the time,
 * triggers are fired on one of the result relations of the query, and so
 * we can just return a member of the es_result_relations array.  (Note: in
 * self-join situations there might be multiple members with the same OID;
 * if so it doesn't matter which one we pick.)  However, it is sometimes
 * necessary to fire triggers on other relations; this happens mainly when an
 * RI update trigger queues additional triggers on other relations, which will
Bruce Momjian's avatar
Bruce Momjian committed
967
 * be processed in the context of the outer query.	For efficiency's sake,
968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002
 * we want to have a ResultRelInfo for those triggers too; that can avoid
 * repeated re-opening of the relation.  (It also provides a way for EXPLAIN
 * ANALYZE to report the runtimes of such triggers.)  So we make additional
 * ResultRelInfo's as needed, and save them in es_trig_target_relations.
 */
ResultRelInfo *
ExecGetTriggerResultRel(EState *estate, Oid relid)
{
	ResultRelInfo *rInfo;
	int			nr;
	ListCell   *l;
	Relation	rel;
	MemoryContext oldcontext;

	/* First, search through the query result relations */
	rInfo = estate->es_result_relations;
	nr = estate->es_num_result_relations;
	while (nr > 0)
	{
		if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
			return rInfo;
		rInfo++;
		nr--;
	}
	/* Nope, but maybe we already made an extra ResultRelInfo for it */
	foreach(l, estate->es_trig_target_relations)
	{
		rInfo = (ResultRelInfo *) lfirst(l);
		if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
			return rInfo;
	}
	/* Nope, so we need a new one */

	/*
	 * Open the target relation's relcache entry.  We assume that an
Bruce Momjian's avatar
Bruce Momjian committed
1003 1004
	 * appropriate lock is still held by the backend from whenever the trigger
	 * event got queued, so we need take no new lock here.
1005 1006 1007 1008
	 */
	rel = heap_open(relid, NoLock);

	/*
Bruce Momjian's avatar
Bruce Momjian committed
1009 1010
	 * Make the new entry in the right context.  Currently, we don't need any
	 * index information in ResultRelInfos used only for triggers, so tell
1011
	 * InitResultRelInfo it's a DELETE.
1012 1013 1014
	 */
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
	rInfo = makeNode(ResultRelInfo);
1015
	InitResultRelInfo(rInfo,
1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026
					  rel,
					  0,		/* dummy rangetable index */
					  CMD_DELETE,
					  estate->es_instrument);
	estate->es_trig_target_relations =
		lappend(estate->es_trig_target_relations, rInfo);
	MemoryContextSwitchTo(oldcontext);

	return rInfo;
}

1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046
/*
 *		ExecContextForcesOids
 *
 * This is pretty grotty: when doing INSERT, UPDATE, or SELECT INTO,
 * we need to ensure that result tuples have space for an OID iff they are
 * going to be stored into a relation that has OIDs.  In other contexts
 * we are free to choose whether to leave space for OIDs in result tuples
 * (we generally don't want to, but we do if a physical-tlist optimization
 * is possible).  This routine checks the plan context and returns TRUE if the
 * choice is forced, FALSE if the choice is not forced.  In the TRUE case,
 * *hasoids is set to the required value.
 *
 * One reason this is ugly is that all plan nodes in the plan tree will emit
 * tuples with space for an OID, though we really only need the topmost node
 * to do so.  However, node types like Sort don't project new tuples but just
 * return their inputs, and in those cases the requirement propagates down
 * to the input node.  Eventually we might make this code smart enough to
 * recognize how far down the requirement really goes, but for now we just
 * make all plan nodes do the same thing if the top level forces the choice.
 *
1047 1048 1049 1050 1051 1052 1053
 * We assume that if we are generating tuples for INSERT or UPDATE,
 * estate->es_result_relation_info is already set up to describe the target
 * relation.  Note that in an UPDATE that spans an inheritance tree, some of
 * the target relations may have OIDs and some not.  We have to make the
 * decisions on a per-relation basis as we initialize each of the subplans of
 * the ModifyTable node, so ModifyTable has to set es_result_relation_info
 * while initializing each subplan.
1054 1055 1056 1057 1058 1059 1060 1061
 *
 * SELECT INTO is even uglier, because we don't have the INTO relation's
 * descriptor available when this code runs; we have to look aside at a
 * flag set by InitPlan().
 */
bool
ExecContextForcesOids(PlanState *planstate, bool *hasoids)
{
1062 1063 1064
	ResultRelInfo *ri = planstate->state->es_result_relation_info;

	if (ri != NULL)
1065
	{
1066
		Relation	rel = ri->ri_RelationDesc;
1067

1068
		if (rel != NULL)
1069
		{
1070 1071
			*hasoids = rel->rd_rel->relhasoids;
			return true;
1072 1073 1074
		}
	}

1075 1076 1077 1078 1079 1080
	if (planstate->state->es_select_into)
	{
		*hasoids = planstate->state->es_into_oids;
		return true;
	}

1081 1082 1083
	return false;
}

1084
/* ----------------------------------------------------------------
1085
 *		ExecEndPlan
1086
 *
1087
 *		Cleans up the query plan -- closes files and frees up storage
1088 1089 1090 1091 1092 1093
 *
 * NOTE: we are no longer very worried about freeing storage per se
 * in this code; FreeExecutorState should be guaranteed to release all
 * memory that needs to be released.  What we are worried about doing
 * is closing relations and dropping buffer pins.  Thus, for example,
 * tuple tables must be cleared or dropped to ensure pins are released.
1094 1095
 * ----------------------------------------------------------------
 */
1096
static void
1097
ExecEndPlan(PlanState *planstate, EState *estate)
1098
{
1099 1100
	ResultRelInfo *resultRelInfo;
	int			i;
1101
	ListCell   *l;
1102

Bruce Momjian's avatar
Bruce Momjian committed
1103
	/*
1104
	 * shut down the node-type-specific query processing
1105
	 */
1106
	ExecEndNode(planstate);
1107

1108 1109 1110 1111 1112
	/*
	 * for subplans too
	 */
	foreach(l, estate->es_subplanstates)
	{
Bruce Momjian's avatar
Bruce Momjian committed
1113
		PlanState  *subplanstate = (PlanState *) lfirst(l);
1114 1115 1116 1117

		ExecEndNode(subplanstate);
	}

Bruce Momjian's avatar
Bruce Momjian committed
1118
	/*
1119
	 * destroy the executor's tuple table.  Actually we only care about
Bruce Momjian's avatar
Bruce Momjian committed
1120 1121 1122
	 * releasing buffer pins and tupdesc refcounts; there's no need to pfree
	 * the TupleTableSlots, since the containing memory context is about to go
	 * away anyway.
1123
	 */
1124
	ExecResetTupleTable(estate->es_tupleTable, false);
1125

Bruce Momjian's avatar
Bruce Momjian committed
1126
	/*
1127
	 * close the result relation(s) if any, but hold locks until xact commit.
1128
	 */
1129 1130
	resultRelInfo = estate->es_result_relations;
	for (i = estate->es_num_result_relations; i > 0; i--)
1131
	{
1132 1133 1134 1135
		/* Close indices and then the relation itself */
		ExecCloseIndices(resultRelInfo);
		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
		resultRelInfo++;
1136 1137
	}

1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148
	/*
	 * likewise close any trigger target relations
	 */
	foreach(l, estate->es_trig_target_relations)
	{
		resultRelInfo = (ResultRelInfo *) lfirst(l);
		/* Close indices and then the relation itself */
		ExecCloseIndices(resultRelInfo);
		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
	}

1149
	/*
1150
	 * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
1151
	 */
1152
	foreach(l, estate->es_rowMarks)
1153
	{
1154
		ExecRowMark *erm = (ExecRowMark *) lfirst(l);
1155

1156 1157
		if (erm->relation)
			heap_close(erm->relation, NoLock);
1158
	}
1159 1160 1161
}

/* ----------------------------------------------------------------
1162 1163
 *		ExecutePlan
 *
1164 1165
 *		Processes the query plan until we have processed 'numberTuples' tuples,
 *		moving in the specified direction.
1166
 *
1167
 *		Runs to completion if numberTuples is 0
1168
 *
1169 1170
 * Note: the ctid attribute is a 'junk' attribute that is removed before the
 * user can see it
1171 1172
 * ----------------------------------------------------------------
 */
1173
static void
1174
ExecutePlan(EState *estate,
1175
			PlanState *planstate,
1176
			CmdType operation,
1177
			bool sendTuples,
1178
			long numberTuples,
1179
			ScanDirection direction,
1180
			DestReceiver *dest)
1181
{
Bruce Momjian's avatar
Bruce Momjian committed
1182 1183
	TupleTableSlot *slot;
	long		current_tuple_count;
1184

Bruce Momjian's avatar
Bruce Momjian committed
1185
	/*
Bruce Momjian's avatar
Bruce Momjian committed
1186
	 * initialize local variables
1187
	 */
1188 1189
	current_tuple_count = 0;

Bruce Momjian's avatar
Bruce Momjian committed
1190 1191
	/*
	 * Set the direction.
1192
	 */
1193 1194
	estate->es_direction = direction;

Bruce Momjian's avatar
Bruce Momjian committed
1195
	/*
1196
	 * Loop until we've processed the proper number of tuples from the plan.
1197 1198 1199
	 */
	for (;;)
	{
1200 1201
		/* Reset the per-output-tuple exprcontext */
		ResetPerTupleExprContext(estate);
Bruce Momjian's avatar
Bruce Momjian committed
1202

Bruce Momjian's avatar
Bruce Momjian committed
1203
		/*
Bruce Momjian's avatar
Bruce Momjian committed
1204
		 * Execute the plan and obtain a tuple
1205
		 */
1206
		slot = ExecProcNode(planstate);
1207

Bruce Momjian's avatar
Bruce Momjian committed
1208
		/*
Bruce Momjian's avatar
Bruce Momjian committed
1209
		 * if the tuple is null, then we assume there is nothing more to
1210
		 * process so we just end the loop...
1211
		 */
1212
		if (TupIsNull(slot))
1213
			break;
1214

Bruce Momjian's avatar
Bruce Momjian committed
1215
		/*
1216
		 * If we have a junk filter, then project a new tuple with the junk
1217
		 * removed.
1218
		 *
1219
		 * Store this new "clean" tuple in the junkfilter's resultSlot.
1220 1221
		 * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
		 * because that tuple slot has the wrong descriptor.)
1222
		 */
1223 1224
		if (estate->es_junkFilter != NULL)
			slot = ExecFilterJunk(estate->es_junkFilter, slot);
1225

Bruce Momjian's avatar
Bruce Momjian committed
1226
		/*
Bruce Momjian's avatar
Bruce Momjian committed
1227 1228
		 * If we are supposed to send the tuple somewhere, do so. (In
		 * practice, this is probably always the case at this point.)
1229
		 */
1230 1231
		if (sendTuples)
			(*dest->receiveSlot) (slot, dest);
1232

1233 1234 1235 1236 1237 1238 1239
		/*
		 * Count tuples processed, if this is a SELECT.  (For other operation
		 * types, the ModifyTable plan node must count the appropriate
		 * events.)
		 */
		if (operation == CMD_SELECT)
			(estate->es_processed)++;
Bruce Momjian's avatar
Bruce Momjian committed
1240

Bruce Momjian's avatar
Bruce Momjian committed
1241
		/*
1242 1243 1244
		 * check our tuple count.. if we've processed the proper number then
		 * quit, else loop again and process more tuples.  Zero numberTuples
		 * means no limit.
1245
		 */
1246
		current_tuple_count++;
1247
		if (numberTuples && numberTuples == current_tuple_count)
1248
			break;
1249 1250 1251
	}
}

Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
1252

1253 1254 1255
/*
 * ExecRelCheck --- check that tuple meets constraints for result relation
 */
1256
static const char *
1257 1258
ExecRelCheck(ResultRelInfo *resultRelInfo,
			 TupleTableSlot *slot, EState *estate)
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
1259
{
1260
	Relation	rel = resultRelInfo->ri_RelationDesc;
1261 1262
	int			ncheck = rel->rd_att->constr->num_check;
	ConstrCheck *check = rel->rd_att->constr->check;
1263
	ExprContext *econtext;
1264
	MemoryContext oldContext;
1265 1266
	List	   *qual;
	int			i;
1267

1268 1269
	/*
	 * If first time through for this result relation, build expression
1270 1271
	 * nodetrees for rel's constraint expressions.  Keep them in the per-query
	 * memory context so they'll survive throughout the query.
1272 1273 1274 1275 1276 1277 1278 1279
	 */
	if (resultRelInfo->ri_ConstraintExprs == NULL)
	{
		oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
		resultRelInfo->ri_ConstraintExprs =
			(List **) palloc(ncheck * sizeof(List *));
		for (i = 0; i < ncheck; i++)
		{
1280 1281
			/* ExecQual wants implicit-AND form */
			qual = make_ands_implicit(stringToNode(check[i].ccbin));
1282
			resultRelInfo->ri_ConstraintExprs[i] = (List *)
1283
				ExecPrepareExpr((Expr *) qual, estate);
1284 1285 1286 1287
		}
		MemoryContextSwitchTo(oldContext);
	}

1288
	/*
1289 1290
	 * We will use the EState's per-tuple context for evaluating constraint
	 * expressions (creating it if it's not already there).
1291
	 */
1292
	econtext = GetPerTupleExprContext(estate);
1293

1294 1295 1296 1297
	/* Arrange for econtext's scan tuple to be the tuple under test */
	econtext->ecxt_scantuple = slot;

	/* And evaluate the constraints */
1298 1299
	for (i = 0; i < ncheck; i++)
	{
1300
		qual = resultRelInfo->ri_ConstraintExprs[i];
1301

1302 1303
		/*
		 * NOTE: SQL92 specifies that a NULL result from a constraint
1304 1305
		 * expression is not to be treated as a failure.  Therefore, tell
		 * ExecQual to return TRUE for NULL.
1306
		 */
1307
		if (!ExecQual(qual, econtext, true))
1308
			return check[i].ccname;
1309 1310
	}

1311
	/* NULL result means no error */
1312
	return NULL;
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
1313 1314
}

1315
void
1316
ExecConstraints(ResultRelInfo *resultRelInfo,
1317
				TupleTableSlot *slot, EState *estate)
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
1318
{
1319
	Relation	rel = resultRelInfo->ri_RelationDesc;
1320 1321 1322
	TupleConstr *constr = rel->rd_att->constr;

	Assert(constr);
1323

1324
	if (constr->has_not_null)
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
1325
	{
1326
		int			natts = rel->rd_att->natts;
1327
		int			attrChk;
1328

1329
		for (attrChk = 1; attrChk <= natts; attrChk++)
1330
		{
1331
			if (rel->rd_att->attrs[attrChk - 1]->attnotnull &&
1332
				slot_attisnull(slot, attrChk))
1333 1334
				ereport(ERROR,
						(errcode(ERRCODE_NOT_NULL_VIOLATION),
1335
						 errmsg("null value in column \"%s\" violates not-null constraint",
1336
						NameStr(rel->rd_att->attrs[attrChk - 1]->attname))));
1337 1338 1339
		}
	}

1340
	if (constr->num_check > 0)
1341
	{
Bruce Momjian's avatar
Bruce Momjian committed
1342
		const char *failed;
1343

1344
		if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
1345 1346
			ereport(ERROR,
					(errcode(ERRCODE_CHECK_VIOLATION),
1347
					 errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
1348
							RelationGetRelationName(rel), failed)));
1349
	}
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
1350
}
1351

1352

1353
/*
1354 1355
 * EvalPlanQual logic --- recheck modified tuple(s) to see if we want to
 * process the updated version under READ COMMITTED rules.
1356 1357
 *
 * See backend/executor/README for some info about how this works.
1358 1359 1360 1361 1362 1363
 */


/*
 * Check a modified tuple to see if we want to process its updated version
 * under READ COMMITTED rules.
1364
 *
1365 1366 1367
 *	estate - outer executor state data
 *	epqstate - state for EvalPlanQual rechecking
 *	relation - table containing tuple
1368 1369 1370 1371 1372 1373 1374 1375 1376
 *	rti - rangetable index of table containing tuple
 *	*tid - t_ctid from the outdated tuple (ie, next updated version)
 *	priorXmax - t_xmax from the outdated tuple
 *
 * *tid is also an output parameter: it's modified to hold the TID of the
 * latest version of the tuple (note this may be changed even on failure)
 *
 * Returns a slot containing the new candidate update/delete tuple, or
 * NULL if we determine we shouldn't process the row.
1377
 */
Bruce Momjian's avatar
Bruce Momjian committed
1378
TupleTableSlot *
1379 1380
EvalPlanQual(EState *estate, EPQState *epqstate,
			 Relation relation, Index rti,
1381
			 ItemPointer tid, TransactionId priorXmax)
1382
{
1383 1384 1385
	TupleTableSlot *slot;
	HeapTuple	copyTuple;

1386
	Assert(rti > 0);
1387 1388

	/*
1389
	 * Get and lock the updated version of the row; if fail, return NULL.
1390
	 */
1391 1392
	copyTuple = EvalPlanQualFetch(estate, relation, LockTupleExclusive,
								  tid, priorXmax);
1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403

	if (copyTuple == NULL)
		return NULL;

	/*
	 * For UPDATE/DELETE we have to return tid of actual row we're executing
	 * PQ for.
	 */
	*tid = copyTuple->t_self;

	/*
1404
	 * Need to run a recheck subquery.	Initialize or reinitialize EPQ state.
1405
	 */
1406
	EvalPlanQualBegin(epqstate, estate);
1407 1408

	/*
Bruce Momjian's avatar
Bruce Momjian committed
1409 1410
	 * Free old test tuple, if any, and store new tuple where relation's scan
	 * node will see it
1411
	 */
1412
	EvalPlanQualSetTuple(epqstate, rti, copyTuple);
1413 1414

	/*
1415
	 * Fetch any non-locked source rows
1416
	 */
1417
	EvalPlanQualFetchRowMarks(epqstate);
1418 1419

	/*
1420
	 * Run the EPQ query.  We assume it will return at most one tuple.
1421
	 */
1422
	slot = EvalPlanQualNext(epqstate);
1423

1424
	/*
Bruce Momjian's avatar
Bruce Momjian committed
1425 1426
	 * If we got a tuple, force the slot to materialize the tuple so that it
	 * is not dependent on any local state in the EPQ query (in particular,
1427
	 * it's highly likely that the slot contains references to any pass-by-ref
Bruce Momjian's avatar
Bruce Momjian committed
1428 1429
	 * datums that may be present in copyTuple).  As with the next step, this
	 * is to guard against early re-use of the EPQ query.
1430 1431 1432 1433
	 */
	if (!TupIsNull(slot))
		(void) ExecMaterializeSlot(slot);

1434
	/*
Bruce Momjian's avatar
Bruce Momjian committed
1435 1436 1437
	 * Clear out the test tuple.  This is needed in case the EPQ query is
	 * re-used to test a tuple for a different relation.  (Not clear that can
	 * really happen, but let's be safe.)
1438
	 */
1439
	EvalPlanQualSetTuple(epqstate, rti, NULL);
1440 1441 1442 1443 1444 1445 1446 1447

	return slot;
}

/*
 * Fetch a copy of the newest version of an outdated tuple
 *
 *	estate - executor state data
1448 1449
 *	relation - table containing tuple
 *	lockmode - requested tuple lock mode
1450 1451 1452 1453 1454
 *	*tid - t_ctid from the outdated tuple (ie, next updated version)
 *	priorXmax - t_xmax from the outdated tuple
 *
 * Returns a palloc'd copy of the newest tuple version, or NULL if we find
 * that there is no newest version (ie, the row was deleted not updated).
1455 1456
 * If successful, we have locked the newest tuple version, so caller does not
 * need to worry about it changing anymore.
1457
 *
1458 1459
 * Note: properly, lockmode should be declared as enum LockTupleMode,
 * but we use "int" to avoid having to include heapam.h in executor.h.
1460 1461
 */
HeapTuple
1462
EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
1463 1464 1465
				  ItemPointer tid, TransactionId priorXmax)
{
	HeapTuple	copyTuple = NULL;
Bruce Momjian's avatar
Bruce Momjian committed
1466
	HeapTupleData tuple;
1467
	SnapshotData SnapshotDirty;
1468

1469
	/*
1470
	 * fetch target tuple
1471 1472 1473
	 *
	 * Loop here to deal with updated or busy tuples
	 */
1474
	InitDirtySnapshot(SnapshotDirty);
1475 1476 1477 1478 1479
	tuple.t_self = *tid;
	for (;;)
	{
		Buffer		buffer;

1480
		if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL))
1481
		{
1482 1483 1484 1485
			HTSU_Result test;
			ItemPointerData update_ctid;
			TransactionId update_xmax;

1486 1487
			/*
			 * If xmin isn't what we're expecting, the slot must have been
1488 1489 1490
			 * recycled and reused for an unrelated tuple.	This implies that
			 * the latest version of the row was deleted, so we need do
			 * nothing.  (Should be safe to examine xmin without getting
1491 1492 1493 1494 1495 1496 1497 1498 1499
			 * buffer's content lock, since xmin never changes in an existing
			 * tuple.)
			 */
			if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
									 priorXmax))
			{
				ReleaseBuffer(buffer);
				return NULL;
			}
1500

1501
			/* otherwise xmin should not be dirty... */
1502
			if (TransactionIdIsValid(SnapshotDirty.xmin))
1503
				elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
1504 1505

			/*
1506 1507
			 * If tuple is being updated by other transaction then we have to
			 * wait for its commit/abort.
1508
			 */
1509
			if (TransactionIdIsValid(SnapshotDirty.xmax))
1510 1511
			{
				ReleaseBuffer(buffer);
1512
				XactLockTableWait(SnapshotDirty.xmax);
1513
				continue;		/* loop back to repeat heap_fetch */
1514 1515
			}

1516 1517
			/*
			 * If tuple was inserted by our own transaction, we have to check
1518 1519 1520 1521
			 * cmin against es_output_cid: cmin >= current CID means our
			 * command cannot see the tuple, so we should ignore it.  Without
			 * this we are open to the "Halloween problem" of indefinitely
			 * re-updating the same tuple. (We need not check cmax because
Bruce Momjian's avatar
Bruce Momjian committed
1522 1523 1524 1525
			 * HeapTupleSatisfiesDirty will consider a tuple deleted by our
			 * transaction dead, regardless of cmax.)  We just checked that
			 * priorXmax == xmin, so we can test that variable instead of
			 * doing HeapTupleHeaderGetXmin again.
1526 1527
			 */
			if (TransactionIdIsCurrentTransactionId(priorXmax) &&
1528
				HeapTupleHeaderGetCmin(tuple.t_data) >= estate->es_output_cid)
1529 1530 1531 1532 1533
			{
				ReleaseBuffer(buffer);
				return NULL;
			}

1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556
			/*
			 * This is a live tuple, so now try to lock it.
			 */
			test = heap_lock_tuple(relation, &tuple, &buffer,
								   &update_ctid, &update_xmax,
								   estate->es_output_cid,
								   lockmode, false);
			/* We now have two pins on the buffer, get rid of one */
			ReleaseBuffer(buffer);

			switch (test)
			{
				case HeapTupleSelfUpdated:
					/* treat it as deleted; do not process */
					ReleaseBuffer(buffer);
					return NULL;

				case HeapTupleMayBeUpdated:
					/* successfully locked */
					break;

				case HeapTupleUpdated:
					ReleaseBuffer(buffer);
1557
					if (IsolationUsesXactSnapshot())
1558 1559 1560 1561 1562 1563 1564
						ereport(ERROR,
								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
								 errmsg("could not serialize access due to concurrent update")));
					if (!ItemPointerEquals(&update_ctid, &tuple.t_self))
					{
						/* it was updated, so look at the updated version */
						tuple.t_self = update_ctid;
1565 1566
						/* updated row should have xmin matching this xmax */
						priorXmax = update_xmax;
1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578
						continue;
					}
					/* tuple was deleted, so give up */
					return NULL;

				default:
					ReleaseBuffer(buffer);
					elog(ERROR, "unrecognized heap_lock_tuple status: %u",
						 test);
					return NULL;	/* keep compiler quiet */
			}

1579 1580 1581 1582 1583 1584 1585 1586 1587
			/*
			 * We got tuple - now copy it for use by recheck query.
			 */
			copyTuple = heap_copytuple(&tuple);
			ReleaseBuffer(buffer);
			break;
		}

		/*
1588 1589
		 * If the referenced slot was actually empty, the latest version of
		 * the row must have been deleted, so we need do nothing.
1590
		 */
1591
		if (tuple.t_data == NULL)
1592
		{
1593 1594
			ReleaseBuffer(buffer);
			return NULL;
1595 1596 1597
		}

		/*
1598
		 * As above, if xmin isn't what we're expecting, do nothing.
1599
		 */
1600 1601 1602 1603 1604 1605 1606 1607 1608
		if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
								 priorXmax))
		{
			ReleaseBuffer(buffer);
			return NULL;
		}

		/*
		 * If we get here, the tuple was found but failed SnapshotDirty.
1609 1610 1611 1612 1613
		 * Assuming the xmin is either a committed xact or our own xact (as it
		 * certainly should be if we're trying to modify the tuple), this must
		 * mean that the row was updated or deleted by either a committed xact
		 * or our own xact.  If it was deleted, we can ignore it; if it was
		 * updated then chain up to the next version and repeat the whole
1614
		 * process.
1615
		 *
1616 1617
		 * As above, it should be safe to examine xmax and t_ctid without the
		 * buffer content lock, because they can't be changing.
1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631
		 */
		if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
		{
			/* deleted, so forget about it */
			ReleaseBuffer(buffer);
			return NULL;
		}

		/* updated, so look at the updated row */
		tuple.t_self = tuple.t_data->t_ctid;
		/* updated row should have xmin matching this xmax */
		priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
		ReleaseBuffer(buffer);
		/* loop back to fetch next in chain */
1632 1633 1634
	}

	/*
1635
	 * Return the copied tuple
1636
	 */
1637 1638 1639 1640
	return copyTuple;
}

/*
1641 1642 1643
 * EvalPlanQualInit -- initialize during creation of a plan state node
 * that might need to invoke EPQ processing.
 * Note: subplan can be NULL if it will be set later with EvalPlanQualSetPlan.
1644 1645
 */
void
1646 1647
EvalPlanQualInit(EPQState *epqstate, EState *estate,
				 Plan *subplan, int epqParam)
1648
{
1649 1650 1651 1652 1653 1654 1655 1656 1657
	/* Mark the EPQ state inactive */
	epqstate->estate = NULL;
	epqstate->planstate = NULL;
	epqstate->origslot = NULL;
	/* ... and remember data that EvalPlanQualBegin will need */
	epqstate->plan = subplan;
	epqstate->rowMarks = NIL;
	epqstate->epqParam = epqParam;
}
1658

1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671
/*
 * EvalPlanQualSetPlan -- set or change subplan of an EPQState.
 *
 * We need this so that ModifyTuple can deal with multiple subplans.
 */
void
EvalPlanQualSetPlan(EPQState *epqstate, Plan *subplan)
{
	/* If we have a live EPQ query, shut it down */
	EvalPlanQualEnd(epqstate);
	/* And set/change the plan pointer */
	epqstate->plan = subplan;
}
1672

1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684
/*
 * EvalPlanQualAddRowMark -- add an ExecRowMark that EPQ needs to handle.
 *
 * Currently, only non-locking RowMarks are supported.
 */
void
EvalPlanQualAddRowMark(EPQState *epqstate, ExecRowMark *erm)
{
	if (RowMarkRequiresRowShareLock(erm->markType))
		elog(ERROR, "EvalPlanQual doesn't support locking rowmarks");
	epqstate->rowMarks = lappend(epqstate->rowMarks, erm);
}
1685

1686 1687 1688 1689 1690 1691 1692 1693 1694
/*
 * Install one test tuple into EPQ state, or clear test tuple if tuple == NULL
 *
 * NB: passed tuple must be palloc'd; it may get freed later
 */
void
EvalPlanQualSetTuple(EPQState *epqstate, Index rti, HeapTuple tuple)
{
	EState	   *estate = epqstate->estate;
1695

1696
	Assert(rti > 0);
1697

Bruce Momjian's avatar
Bruce Momjian committed
1698
	/*
Bruce Momjian's avatar
Bruce Momjian committed
1699 1700
	 * free old test tuple, if any, and store new tuple where relation's scan
	 * node will see it
1701
	 */
1702 1703 1704 1705 1706
	if (estate->es_epqTuple[rti - 1] != NULL)
		heap_freetuple(estate->es_epqTuple[rti - 1]);
	estate->es_epqTuple[rti - 1] = tuple;
	estate->es_epqTupleSet[rti - 1] = true;
}
1707

1708 1709 1710 1711 1712 1713 1714
/*
 * Fetch back the current test tuple (if any) for the specified RTI
 */
HeapTuple
EvalPlanQualGetTuple(EPQState *epqstate, Index rti)
{
	EState	   *estate = epqstate->estate;
1715

1716
	Assert(rti > 0);
1717

1718
	return estate->es_epqTuple[rti - 1];
1719 1720 1721
}

/*
1722
 * Fetch the current row values for any non-locked relations that need
Bruce Momjian's avatar
Bruce Momjian committed
1723
 * to be scanned by an EvalPlanQual operation.	origslot must have been set
1724
 * to contain the current result row (top-level row) that we need to recheck.
1725 1726
 */
void
1727
EvalPlanQualFetchRowMarks(EPQState *epqstate)
1728
{
1729
	ListCell   *l;
1730

1731
	Assert(epqstate->origslot != NULL);
1732

1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813
	foreach(l, epqstate->rowMarks)
	{
		ExecRowMark *erm = (ExecRowMark *) lfirst(l);
		Datum		datum;
		bool		isNull;
		HeapTupleData tuple;

		/* clear any leftover test tuple for this rel */
		EvalPlanQualSetTuple(epqstate, erm->rti, NULL);

		if (erm->relation)
		{
			Buffer		buffer;

			Assert(erm->markType == ROW_MARK_REFERENCE);

			/* if child rel, must check whether it produced this row */
			if (erm->rti != erm->prti)
			{
				Oid			tableoid;

				datum = ExecGetJunkAttribute(epqstate->origslot,
											 erm->toidAttNo,
											 &isNull);
				/* non-locked rels could be on the inside of outer joins */
				if (isNull)
					continue;
				tableoid = DatumGetObjectId(datum);

				if (tableoid != RelationGetRelid(erm->relation))
				{
					/* this child is inactive right now */
					continue;
				}
			}

			/* fetch the tuple's ctid */
			datum = ExecGetJunkAttribute(epqstate->origslot,
										 erm->ctidAttNo,
										 &isNull);
			/* non-locked rels could be on the inside of outer joins */
			if (isNull)
				continue;
			tuple.t_self = *((ItemPointer) DatumGetPointer(datum));

			/* okay, fetch the tuple */
			if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
							false, NULL))
				elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");

			/* successful, copy and store tuple */
			EvalPlanQualSetTuple(epqstate, erm->rti,
								 heap_copytuple(&tuple));
			ReleaseBuffer(buffer);
		}
		else
		{
			HeapTupleHeader td;

			Assert(erm->markType == ROW_MARK_COPY);

			/* fetch the whole-row Var for the relation */
			datum = ExecGetJunkAttribute(epqstate->origslot,
										 erm->wholeAttNo,
										 &isNull);
			/* non-locked rels could be on the inside of outer joins */
			if (isNull)
				continue;
			td = DatumGetHeapTupleHeader(datum);

			/* build a temporary HeapTuple control structure */
			tuple.t_len = HeapTupleHeaderGetDatumLength(td);
			ItemPointerSetInvalid(&(tuple.t_self));
			tuple.t_tableOid = InvalidOid;
			tuple.t_data = td;

			/* copy and store tuple */
			EvalPlanQualSetTuple(epqstate, erm->rti,
								 heap_copytuple(&tuple));
		}
	}
1814 1815
}

1816 1817
/*
 * Fetch the next row (if any) from EvalPlanQual testing
1818 1819
 *
 * (In practice, there should never be more than one row...)
1820 1821
 */
TupleTableSlot *
1822
EvalPlanQualNext(EPQState *epqstate)
1823
{
1824
	MemoryContext oldcontext;
Bruce Momjian's avatar
Bruce Momjian committed
1825
	TupleTableSlot *slot;
1826

1827 1828
	oldcontext = MemoryContextSwitchTo(epqstate->estate->es_query_cxt);
	slot = ExecProcNode(epqstate->planstate);
1829
	MemoryContextSwitchTo(oldcontext);
1830

1831 1832 1833 1834
	return slot;
}

/*
1835
 * Initialize or reset an EvalPlanQual state tree
1836 1837
 */
void
1838
EvalPlanQualBegin(EPQState *epqstate, EState *parentestate)
1839
{
1840
	EState	   *estate = epqstate->estate;
1841

1842
	if (estate == NULL)
1843
	{
1844 1845
		/* First time through, so create a child EState */
		EvalPlanQualStart(epqstate, parentestate, epqstate->plan);
1846
	}
1847
	else
1848
	{
1849 1850 1851 1852 1853
		/*
		 * We already have a suitable child EPQ tree, so just reset it.
		 */
		int			rtsize = list_length(parentestate->es_range_table);
		PlanState  *planstate = epqstate->planstate;
1854

1855 1856 1857 1858
		MemSet(estate->es_epqScanDone, 0, rtsize * sizeof(bool));

		/* Recopy current values of parent parameters */
		if (parentestate->es_plannedstmt->nParamExec > 0)
1859
		{
Bruce Momjian's avatar
Bruce Momjian committed
1860
			int			i = parentestate->es_plannedstmt->nParamExec;
1861 1862 1863 1864 1865 1866 1867 1868 1869

			while (--i >= 0)
			{
				/* copy value if any, but not execPlan link */
				estate->es_param_exec_vals[i].value =
					parentestate->es_param_exec_vals[i].value;
				estate->es_param_exec_vals[i].isnull =
					parentestate->es_param_exec_vals[i].isnull;
			}
1870
		}
1871 1872 1873 1874 1875 1876 1877

		/*
		 * Mark child plan tree as needing rescan at all scan nodes.  The
		 * first ExecProcNode will take care of actually doing the rescan.
		 */
		planstate->chgParam = bms_add_member(planstate->chgParam,
											 epqstate->epqParam);
1878 1879 1880 1881
	}
}

/*
1882
 * Start execution of an EvalPlanQual plan tree.
1883 1884 1885 1886 1887
 *
 * This is a cut-down version of ExecutorStart(): we copy some state from
 * the top-level estate rather than initializing it fresh.
 */
static void
1888
EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree)
1889
{
1890
	EState	   *estate;
1891 1892
	int			rtsize;
	MemoryContext oldcontext;
1893
	ListCell   *l;
1894

1895
	rtsize = list_length(parentestate->es_range_table);
1896

1897
	epqstate->estate = estate = CreateExecutorState();
1898

1899
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
1900 1901

	/*
1902
	 * Child EPQ EStates share the parent's copy of unchanging state such as
1903 1904 1905
	 * the snapshot, rangetable, result-rel info, and external Param info.
	 * They need their own copies of local state, including a tuple table,
	 * es_param_exec_vals, etc.
1906
	 */
1907 1908 1909 1910 1911 1912 1913 1914 1915 1916
	estate->es_direction = ForwardScanDirection;
	estate->es_snapshot = parentestate->es_snapshot;
	estate->es_crosscheck_snapshot = parentestate->es_crosscheck_snapshot;
	estate->es_range_table = parentestate->es_range_table;
	estate->es_plannedstmt = parentestate->es_plannedstmt;
	estate->es_junkFilter = parentestate->es_junkFilter;
	estate->es_output_cid = parentestate->es_output_cid;
	estate->es_result_relations = parentestate->es_result_relations;
	estate->es_num_result_relations = parentestate->es_num_result_relations;
	estate->es_result_relation_info = parentestate->es_result_relation_info;
1917
	/* es_trig_target_relations must NOT be copied */
1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931
	estate->es_rowMarks = parentestate->es_rowMarks;
	estate->es_instrument = parentestate->es_instrument;
	estate->es_select_into = parentestate->es_select_into;
	estate->es_into_oids = parentestate->es_into_oids;

	/*
	 * The external param list is simply shared from parent.  The internal
	 * param workspace has to be local state, but we copy the initial values
	 * from the parent, so as to have access to any param values that were
	 * already set from other parts of the parent's plan tree.
	 */
	estate->es_param_list_info = parentestate->es_param_list_info;
	if (parentestate->es_plannedstmt->nParamExec > 0)
	{
Bruce Momjian's avatar
Bruce Momjian committed
1932
		int			i = parentestate->es_plannedstmt->nParamExec;
1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947

		estate->es_param_exec_vals = (ParamExecData *)
			palloc0(i * sizeof(ParamExecData));
		while (--i >= 0)
		{
			/* copy value if any, but not execPlan link */
			estate->es_param_exec_vals[i].value =
				parentestate->es_param_exec_vals[i].value;
			estate->es_param_exec_vals[i].isnull =
				parentestate->es_param_exec_vals[i].isnull;
		}
	}

	/*
	 * Each EState must have its own es_epqScanDone state, but if we have
Bruce Momjian's avatar
Bruce Momjian committed
1948
	 * nested EPQ checks they should share es_epqTuple arrays.	This allows
1949 1950 1951 1952 1953 1954 1955 1956
	 * sub-rechecks to inherit the values being examined by an outer recheck.
	 */
	estate->es_epqScanDone = (bool *) palloc0(rtsize * sizeof(bool));
	if (parentestate->es_epqTuple != NULL)
	{
		estate->es_epqTuple = parentestate->es_epqTuple;
		estate->es_epqTupleSet = parentestate->es_epqTupleSet;
	}
1957
	else
1958 1959 1960 1961 1962 1963
	{
		estate->es_epqTuple = (HeapTuple *)
			palloc0(rtsize * sizeof(HeapTuple));
		estate->es_epqTupleSet = (bool *)
			palloc0(rtsize * sizeof(bool));
	}
1964

1965
	/*
1966
	 * Each estate also has its own tuple table.
1967
	 */
1968
	estate->es_tupleTable = NIL;
1969

1970
	/*
Bruce Momjian's avatar
Bruce Momjian committed
1971 1972
	 * Initialize private state information for each SubPlan.  We must do this
	 * before running ExecInitNode on the main query tree, since
Bruce Momjian's avatar
Bruce Momjian committed
1973 1974 1975 1976
	 * ExecInitSubPlan expects to be able to find these entries. Some of the
	 * SubPlans might not be used in the part of the plan tree we intend to
	 * run, but since it's not easy to tell which, we just initialize them
	 * all.
1977
	 */
1978 1979
	Assert(estate->es_subplanstates == NIL);
	foreach(l, parentestate->es_plannedstmt->subplans)
1980
	{
Bruce Momjian's avatar
Bruce Momjian committed
1981 1982
		Plan	   *subplan = (Plan *) lfirst(l);
		PlanState  *subplanstate;
1983

1984
		subplanstate = ExecInitNode(subplan, estate, 0);
1985

1986 1987
		estate->es_subplanstates = lappend(estate->es_subplanstates,
										   subplanstate);
1988 1989 1990
	}

	/*
Bruce Momjian's avatar
Bruce Momjian committed
1991 1992 1993
	 * Initialize the private state information for all the nodes in the part
	 * of the plan tree we need to run.  This opens files, allocates storage
	 * and leaves us ready to start processing tuples.
1994
	 */
1995
	epqstate->planstate = ExecInitNode(planTree, estate, 0);
1996 1997 1998 1999 2000

	MemoryContextSwitchTo(oldcontext);
}

/*
2001 2002
 * EvalPlanQualEnd -- shut down at termination of parent plan state node,
 * or if we are done with the current EPQ child.
2003 2004 2005
 *
 * This is a cut-down version of ExecutorEnd(); basically we want to do most
 * of the normal cleanup, but *not* close result relations (which we are
Bruce Momjian's avatar
Bruce Momjian committed
2006
 * just sharing from the outer query).	We do, however, have to close any
2007
 * trigger target relations that got opened, since those are not shared.
2008
 * (There probably shouldn't be any of the latter, but just in case...)
2009
 */
2010 2011
void
EvalPlanQualEnd(EPQState *epqstate)
2012
{
2013
	EState	   *estate = epqstate->estate;
2014
	MemoryContext oldcontext;
2015
	ListCell   *l;
2016

2017 2018
	if (estate == NULL)
		return;					/* idle, so nothing to do */
2019

2020 2021 2022
	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

	ExecEndNode(epqstate->planstate);
2023

2024
	foreach(l, estate->es_subplanstates)
2025
	{
Bruce Momjian's avatar
Bruce Momjian committed
2026
		PlanState  *subplanstate = (PlanState *) lfirst(l);
2027 2028 2029 2030

		ExecEndNode(subplanstate);
	}

2031 2032
	/* throw away the per-estate tuple table */
	ExecResetTupleTable(estate->es_tupleTable, false);
2033

2034 2035
	/* close any trigger target relations attached to this EState */
	foreach(l, estate->es_trig_target_relations)
2036 2037 2038 2039 2040 2041 2042 2043
	{
		ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(l);

		/* Close indices and then the relation itself */
		ExecCloseIndices(resultRelInfo);
		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
	}

2044 2045
	MemoryContextSwitchTo(oldcontext);

2046
	FreeExecutorState(estate);
2047

2048 2049 2050 2051
	/* Mark EPQState idle */
	epqstate->estate = NULL;
	epqstate->planstate = NULL;
	epqstate->origslot = NULL;
2052
}
2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065


/*
 * Support for SELECT INTO (a/k/a CREATE TABLE AS)
 *
 * We implement SELECT INTO by diverting SELECT's normal output with
 * a specialized DestReceiver type.
 */

typedef struct
{
	DestReceiver pub;			/* publicly-known function pointers */
	EState	   *estate;			/* EState we are working with */
2066
	Relation	rel;			/* Relation to write to */
2067 2068
	int			hi_options;		/* heap_insert performance options */
	BulkInsertState bistate;	/* bulk insert state */
2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080
} DR_intorel;

/*
 * OpenIntoRel --- actually create the SELECT INTO target relation
 *
 * This also replaces QueryDesc->dest with the special DestReceiver for
 * SELECT INTO.  We assume that the correct result tuple type has already
 * been placed in queryDesc->tupDesc.
 */
static void
OpenIntoRel(QueryDesc *queryDesc)
{
2081
	IntoClause *into = queryDesc->plannedstmt->intoClause;
2082 2083 2084 2085 2086 2087 2088 2089 2090 2091
	EState	   *estate = queryDesc->estate;
	Relation	intoRelationDesc;
	char	   *intoName;
	Oid			namespaceId;
	Oid			tablespaceId;
	Datum		reloptions;
	AclResult	aclresult;
	Oid			intoRelationId;
	TupleDesc	tupdesc;
	DR_intorel *myState;
2092
	static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
2093

2094 2095
	Assert(into);

2096
	/*
Bruce Momjian's avatar
Bruce Momjian committed
2097 2098
	 * XXX This code needs to be kept in sync with DefineRelation(). Maybe we
	 * should try to use that function instead.
2099 2100
	 */

2101 2102 2103
	/*
	 * Check consistency of arguments
	 */
2104
	if (into->onCommit != ONCOMMIT_NOOP && !into->rel->istemp)
2105 2106 2107 2108
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
				 errmsg("ON COMMIT can only be used on temporary tables")));

2109 2110 2111 2112 2113 2114 2115 2116 2117 2118
	/*
	 * Security check: disallow creating temp tables from security-restricted
	 * code.  This is needed because calling code might not expect untrusted
	 * tables to appear in pg_temp at the front of its search path.
	 */
	if (into->rel->istemp && InSecurityRestrictedOperation())
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 errmsg("cannot create temporary table within security-restricted operation")));

2119 2120 2121
	/*
	 * Find namespace to create in, check its permissions
	 */
2122 2123
	intoName = into->rel->relname;
	namespaceId = RangeVarGetCreationNamespace(into->rel);
2124 2125 2126 2127 2128 2129 2130 2131

	aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
									  ACL_CREATE);
	if (aclresult != ACLCHECK_OK)
		aclcheck_error(aclresult, ACL_KIND_NAMESPACE,
					   get_namespace_name(namespaceId));

	/*
2132
	 * Select tablespace to use.  If not specified, use default tablespace
2133 2134
	 * (which may in turn default to database's default).
	 */
2135
	if (into->tableSpaceName)
2136
	{
2137
		tablespaceId = get_tablespace_oid(into->tableSpaceName, false);
Bruce Momjian's avatar
Bruce Momjian committed
2138 2139
	}
	else
2140
	{
2141
		tablespaceId = GetDefaultTablespace(into->rel->istemp);
2142 2143 2144 2145
		/* note InvalidOid is OK in this case */
	}

	/* Check permissions except when using the database's default space */
2146
	if (OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159
	{
		AclResult	aclresult;

		aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
										   ACL_CREATE);

		if (aclresult != ACLCHECK_OK)
			aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
						   get_tablespace_name(tablespaceId));
	}

	/* Parse and validate any reloptions */
	reloptions = transformRelOptions((Datum) 0,
2160
									 into->options,
2161 2162
									 NULL,
									 validnsps,
2163 2164 2165 2166
									 true,
									 false);
	(void) heap_reloptions(RELKIND_RELATION, reloptions, true);

2167
	/* Copy the tupdesc because heap_create_with_catalog modifies it */
2168 2169 2170 2171 2172 2173 2174
	tupdesc = CreateTupleDescCopy(queryDesc->tupDesc);

	/* Now we can actually create the new relation */
	intoRelationId = heap_create_with_catalog(intoName,
											  namespaceId,
											  tablespaceId,
											  InvalidOid,
2175
											  InvalidOid,
Peter Eisentraut's avatar
Peter Eisentraut committed
2176
											  InvalidOid,
2177 2178
											  GetUserId(),
											  tupdesc,
2179
											  NIL,
2180 2181
											  RELKIND_RELATION,
											  false,
2182
											  false,
2183 2184
											  true,
											  0,
2185
											  into->onCommit,
2186
											  reloptions,
2187
											  true,
Robert Haas's avatar
Robert Haas committed
2188 2189 2190
											  allowSystemTableMods,
											  false);
	Assert(intoRelationId != InvalidOid);
2191 2192 2193 2194

	FreeTupleDesc(tupdesc);

	/*
Bruce Momjian's avatar
Bruce Momjian committed
2195 2196
	 * Advance command counter so that the newly-created relation's catalog
	 * tuples will be visible to heap_open.
2197 2198 2199 2200 2201
	 */
	CommandCounterIncrement();

	/*
	 * If necessary, create a TOAST table for the INTO relation. Note that
Bruce Momjian's avatar
Bruce Momjian committed
2202 2203
	 * AlterTableCreateToastTable ends with CommandCounterIncrement(), so that
	 * the TOAST table will be visible for insertion.
2204
	 */
2205 2206 2207 2208 2209 2210 2211 2212 2213
	reloptions = transformRelOptions((Datum) 0,
									 into->options,
									 "toast",
									 validnsps,
									 true,
									 false);

	(void) heap_reloptions(RELKIND_TOASTVALUE, reloptions, true);

Bruce Momjian's avatar
Bruce Momjian committed
2214
	AlterTableCreateToastTable(intoRelationId, reloptions);
2215 2216 2217 2218 2219 2220 2221 2222 2223

	/*
	 * And open the constructed table for writing.
	 */
	intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);

	/*
	 * Now replace the query's DestReceiver with one for SELECT INTO
	 */
2224
	queryDesc->dest = CreateDestReceiver(DestIntoRel);
2225 2226 2227
	myState = (DR_intorel *) queryDesc->dest;
	Assert(myState->pub.mydest == DestIntoRel);
	myState->estate = estate;
2228
	myState->rel = intoRelationDesc;
2229 2230

	/*
2231 2232
	 * We can skip WAL-logging the insertions, unless PITR or streaming
	 * replication is in use. We can skip the FSM in any case.
2233
	 */
2234
	myState->hi_options = HEAP_INSERT_SKIP_FSM |
2235
		(XLogIsNeeded() ? 0 : HEAP_INSERT_SKIP_WAL);
2236
	myState->bistate = GetBulkInsertState();
2237

2238 2239
	/* Not using WAL requires smgr_targblock be initially invalid */
	Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber);
2240 2241 2242 2243 2244 2245 2246 2247
}

/*
 * CloseIntoRel --- clean up SELECT INTO at ExecutorEnd time
 */
static void
CloseIntoRel(QueryDesc *queryDesc)
{
2248
	DR_intorel *myState = (DR_intorel *) queryDesc->dest;
2249 2250

	/* OpenIntoRel might never have gotten called */
2251
	if (myState && myState->pub.mydest == DestIntoRel && myState->rel)
2252
	{
2253 2254
		FreeBulkInsertState(myState->bistate);

2255
		/* If we skipped using WAL, must heap_sync before commit */
2256
		if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
2257
			heap_sync(myState->rel);
2258 2259

		/* close rel, but keep lock until commit */
2260
		heap_close(myState->rel, NoLock);
2261

2262
		myState->rel = NULL;
2263 2264 2265 2266 2267 2268 2269 2270 2271
	}
}

/*
 * CreateIntoRelDestReceiver -- create a suitable DestReceiver object
 */
DestReceiver *
CreateIntoRelDestReceiver(void)
{
2272
	DR_intorel *self = (DR_intorel *) palloc0(sizeof(DR_intorel));
2273 2274 2275 2276 2277 2278 2279

	self->pub.receiveSlot = intorel_receive;
	self->pub.rStartup = intorel_startup;
	self->pub.rShutdown = intorel_shutdown;
	self->pub.rDestroy = intorel_destroy;
	self->pub.mydest = DestIntoRel;

2280 2281
	/* private fields will be set by OpenIntoRel */

2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302
	return (DestReceiver *) self;
}

/*
 * intorel_startup --- executor startup
 */
static void
intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
{
	/* no-op */
}

/*
 * intorel_receive --- receive one tuple
 */
static void
intorel_receive(TupleTableSlot *slot, DestReceiver *self)
{
	DR_intorel *myState = (DR_intorel *) self;
	HeapTuple	tuple;

2303 2304 2305 2306 2307
	/*
	 * get the heap tuple out of the tuple table slot, making sure we have a
	 * writable copy
	 */
	tuple = ExecMaterializeSlot(slot);
2308

2309 2310 2311 2312 2313 2314
	/*
	 * force assignment of new OID (see comments in ExecInsert)
	 */
	if (myState->rel->rd_rel->relhasoids)
		HeapTupleSetOid(tuple, InvalidOid);

2315
	heap_insert(myState->rel,
2316
				tuple,
2317
				myState->estate->es_output_cid,
2318 2319
				myState->hi_options,
				myState->bistate);
2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340

	/* We know this is a newly created relation, so there are no indexes */
}

/*
 * intorel_shutdown --- executor end
 */
static void
intorel_shutdown(DestReceiver *self)
{
	/* no-op */
}

/*
 * intorel_destroy --- release DestReceiver object
 */
static void
intorel_destroy(DestReceiver *self)
{
	pfree(self);
}