ipc.c 18.3 KB
Newer Older
1
/*-------------------------------------------------------------------------
2
 *
3
 * ipc.c
4
 *	  POSTGRES inter-process communication definitions.
5
 *
Bruce Momjian's avatar
Add:  
Bruce Momjian committed
6 7
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.47 2000/05/16 20:48:48 momjian Exp $
12 13 14
 *
 * NOTES
 *
15 16 17 18 19 20 21 22 23 24
 *	  Currently, semaphores are used (my understanding anyway) in two
 *	  different ways:
 *		1. as mutexes on machines that don't have test-and-set (eg.
 *		   mips R3000).
 *		2. for putting processes to sleep when waiting on a lock
 *		   and waking them up when the lock is free.
 *	  The number of semaphores in (1) is fixed and those are shared
 *	  among all backends. In (2), there is 1 semaphore per process and those
 *	  are not shared with anyone else.
 *														  -ay 4/95
25 26 27 28 29 30 31
 *
 *-------------------------------------------------------------------------
 */
#include <sys/types.h>
#include <sys/file.h>
#include <errno.h>

Marc G. Fournier's avatar
Marc G. Fournier committed
32
#include "postgres.h"
33

34
#include "storage/ipc.h"
35
#include "storage/s_lock.h"
36 37 38
/* In Ultrix, sem.h and shm.h must be included AFTER ipc.h */
#include <sys/sem.h>
#include <sys/shm.h>
39
#include "utils/memutils.h"
40
#include "libpq/libpq.h"
41
#include "utils/trace.h"
42

Bruce Momjian's avatar
Bruce Momjian committed
43
#if defined(solaris_sparc)
44 45 46
#include <sys/ipc.h>
#endif

47 48 49 50 51
/*
 * This flag is set during proc_exit() to change elog()'s behavior,
 * so that an elog() from an on_proc_exit routine cannot get us out
 * of the exit procedure.  We do NOT want to go back to the idle loop...
 */
52
bool		proc_exit_inprogress = false;
53

54
static int	UsePrivateMemory = 0;
55

56
static void IpcMemoryDetach(int status, char *shmaddr);
57
static void IpcConfigTip(void);
58

59
/* ----------------------------------------------------------------
60
 *						exit() handling stuff
61 62 63 64 65
 * ----------------------------------------------------------------
 */

#define MAX_ON_EXITS 20

66 67
static struct ONEXIT
{
68 69
	void		(*function) ();
	caddr_t		arg;
70
}			on_proc_exit_list[MAX_ON_EXITS], on_shmem_exit_list[MAX_ON_EXITS];
71

72 73
static int	on_proc_exit_index,
			on_shmem_exit_index;
74

75 76
typedef struct _PrivateMemStruct
{
77 78
	int			id;
	char	   *memptr;
79
} PrivateMem;
80

81
static PrivateMem IpcPrivateMem[16];
82

83 84 85

static int
PrivateMemoryCreate(IpcMemoryKey memKey,
86
					uint32 size)
87
{
88
	static int	memid = 0;
89 90 91 92 93 94

	UsePrivateMemory = 1;

	IpcPrivateMem[memid].id = memid;
	IpcPrivateMem[memid].memptr = malloc(size);
	if (IpcPrivateMem[memid].memptr == NULL)
95
		elog(ERROR, "PrivateMemoryCreate: not enough memory to malloc");
Bruce Momjian's avatar
Bruce Momjian committed
96
	MemSet(IpcPrivateMem[memid].memptr, 0, size);		/* XXX PURIFY */
97

98
	return memid++;
99 100
}

101
static char *
102 103
PrivateMemoryAttach(IpcMemoryId memid)
{
104
	return IpcPrivateMem[memid].memptr;
105 106 107 108
}


/* ----------------------------------------------------------------
109
 *		proc_exit
110
 *
111 112 113 114
 *		this function calls all the callbacks registered
 *		for it (to free resources) and then calls exit.
 *		This should be the only function to call exit().
 *		-cim 2/6/90
115 116 117
 * ----------------------------------------------------------------
 */
void
118
proc_exit(int code)
119
{
120

121
	/*
122 123
	 * Once we set this flag, we are committed to exit.  Any elog() will
	 * NOT send control back to the main loop, but right back here.
124
	 */
125
	proc_exit_inprogress = true;
126

127
	TPRINTF(TRACE_VERBOSE, "proc_exit(%d)", code);
128

129 130
	/* do our shared memory exits first */
	shmem_exit(code);
131

132 133
	/* ----------------
	 *	call all the callbacks registered before calling exit().
134 135 136 137 138 139
	 *
	 *	Note that since we decrement on_proc_exit_index each time,
	 *	if a callback calls elog(ERROR) or elog(FATAL) then it won't
	 *	be invoked again when control comes back here (nor will the
	 *	previously-completed callbacks).  So, an infinite loop
	 *	should not be possible.
140 141
	 * ----------------
	 */
142 143
	while (--on_proc_exit_index >= 0)
		(*on_proc_exit_list[on_proc_exit_index].function) (code,
144
							  on_proc_exit_list[on_proc_exit_index].arg);
145

146
	TPRINTF(TRACE_VERBOSE, "exit(%d)", code);
147
	exit(code);
148 149 150
}

/* ------------------
151
 * Run all of the on_shmem_exit routines but don't exit in the end.
152 153 154 155 156
 * This is used by the postmaster to re-initialize shared memory and
 * semaphores after a backend dies horribly
 * ------------------
 */
void
157
shmem_exit(int code)
158
{
159
	TPRINTF(TRACE_VERBOSE, "shmem_exit(%d)", code);
160

161
	/* ----------------
162 163 164 165
	 *	call all the registered callbacks.
	 *
	 *	As with proc_exit(), we remove each callback from the list
	 *	before calling it, to avoid infinite loop in case of error.
166 167
	 * ----------------
	 */
168 169
	while (--on_shmem_exit_index >= 0)
		(*on_shmem_exit_list[on_shmem_exit_index].function) (code,
170
							on_shmem_exit_list[on_shmem_exit_index].arg);
171 172 173

	on_shmem_exit_index = 0;
}
174

175 176 177 178 179 180 181 182
/* ----------------------------------------------------------------
 *		on_proc_exit
 *
 *		this function adds a callback function to the list of
 *		functions invoked by proc_exit().	-cim 2/6/90
 * ----------------------------------------------------------------
 */
int
183
			on_proc_exit(void (*function) (), caddr_t arg)
184 185
{
	if (on_proc_exit_index >= MAX_ON_EXITS)
186
		return -1;
187 188 189 190 191 192

	on_proc_exit_list[on_proc_exit_index].function = function;
	on_proc_exit_list[on_proc_exit_index].arg = arg;

	++on_proc_exit_index;

193
	return 0;
194 195 196
}

/* ----------------------------------------------------------------
197
 *		on_shmem_exit
198
 *
199
 *		this function adds a callback function to the list of
200
 *		functions invoked by shmem_exit().	-cim 2/6/90
201 202 203
 * ----------------------------------------------------------------
 */
int
204
			on_shmem_exit(void (*function) (), caddr_t arg)
205
{
206
	if (on_shmem_exit_index >= MAX_ON_EXITS)
207
		return -1;
208

209 210
	on_shmem_exit_list[on_shmem_exit_index].function = function;
	on_shmem_exit_list[on_shmem_exit_index].arg = arg;
211

212
	++on_shmem_exit_index;
213

214
	return 0;
215 216
}

217
/* ----------------------------------------------------------------
218
 *		on_exit_reset
219
 *
220
 *		this function clears all proc_exit() registered functions.
221 222 223
 * ----------------------------------------------------------------
 */
void
224
on_exit_reset(void)
225
{
226
	on_shmem_exit_index = 0;
227
	on_proc_exit_index = 0;
228 229
}

230
/****************************************************************************/
231 232
/*	 IPCPrivateSemaphoreKill(status, semId)									*/
/*																			*/
233 234 235
/****************************************************************************/
static void
IPCPrivateSemaphoreKill(int status,
236
						int semId)		/* caddr_t */
237
{
238
	union semun semun;
239
	semun.val = 0;		/* unused */
240 241

	semctl(semId, 0, IPC_RMID, semun);
242 243 244 245
}


/****************************************************************************/
246 247
/*	 IPCPrivateMemoryKill(status, shmId)									*/
/*																			*/
248 249 250
/****************************************************************************/
static void
IPCPrivateMemoryKill(int status,
251
					 int shmId) /* caddr_t */
252
{
253 254 255 256 257 258 259 260 261 262 263
	if (UsePrivateMemory)
	{
		/* free ( IpcPrivateMem[shmId].memptr ); */
	}
	else
	{
		if (shmctl(shmId, IPC_RMID, (struct shmid_ds *) NULL) < 0)
		{
			elog(NOTICE, "IPCPrivateMemoryKill: shmctl(%d, %d, 0) failed: %m",
				 shmId, IPC_RMID);
		}
264 265 266 267 268
	}
}

/*
 * Note:
269 270
 * XXX	This should be split into two different calls.	One should
 * XXX	be used to create a semaphore set.	The other to "attach" a
271 272 273
 * XXX	existing set.  It should be an error for the semaphore set
 * XXX	to to already exist or for it not to, respectively.
 *
274 275
 *		Currently, the semaphore sets are "attached" and an error
 *		is detected only when a later shared memory attach fails.
276 277 278 279
 */

IpcSemaphoreId
IpcSemaphoreCreate(IpcSemaphoreKey semKey,
280 281 282
				   int semNum,
				   int permission,
				   int semStartValue,
283
				   int removeOnExit)
284
{
285 286 287 288 289
	int			i;
	int			errStatus;
	int			semId;
	u_short		array[IPC_NMAXSEM];
	union semun semun;
290 291 292

	/* check arguments	*/
	if (semNum > IPC_NMAXSEM || semNum <= 0)
293
		return (-1);
294 295 296 297 298

	semId = semget(semKey, 0, 0);

	if (semId == -1)
	{
299
#ifdef DEBUG_IPC
300
		EPRINTF("calling semget with %d, %d , %d\n",
301 302 303
				semKey,
				semNum,
				IPC_CREAT | permission);
304
#endif
305 306 307 308
		semId = semget(semKey, semNum, IPC_CREAT | permission);

		if (semId < 0)
		{
309 310 311
			EPRINTF("IpcSemaphoreCreate: semget failed (%s) "
					"key=%d, num=%d, permission=%o",
					strerror(errno), semKey, semNum, permission);
312
			IpcConfigTip();
313
			return (-1);
314 315 316 317 318 319 320
		}
		for (i = 0; i < semNum; i++)
			array[i] = semStartValue;
		semun.array = array;
		errStatus = semctl(semId, 0, SETALL, semun);
		if (errStatus == -1)
		{
321 322
			EPRINTF("IpcSemaphoreCreate: semctl failed (%s) id=%d",
					strerror(errno), semId);
323
			semctl(semId, 0, IPC_RMID, semun);
324
			IpcConfigTip();
325
			return (-1);
326 327 328
		}

		if (removeOnExit)
329
			on_shmem_exit(IPCPrivateSemaphoreKill, (caddr_t) semId);
330
	}
331

332
#ifdef DEBUG_IPC
333
	EPRINTF("\nIpcSemaphoreCreate, returns %d\n", semId);
334 335
	fflush(stdout);
	fflush(stderr);
336
#endif
337
	return semId;
338 339 340 341
}


/****************************************************************************/
342 343 344
/*	 IpcSemaphoreSet()			- sets the initial value of the semaphore	*/
/*																			*/
/*		note: the xxx_return variables are only used for debugging.			*/
345
/****************************************************************************/
346
#ifdef NOT_USED
347
static int	IpcSemaphoreSet_return;
348 349 350 351

void
IpcSemaphoreSet(int semId, int semno, int value)
{
352 353
	int			errStatus;
	union semun semun;
354 355 356 357 358 359 360

	semun.val = value;
	errStatus = semctl(semId, semno, SETVAL, semun);
	IpcSemaphoreSet_return = errStatus;

	if (errStatus == -1)
	{
361
		EPRINTF("IpcSemaphoreSet: semctl failed (%s) id=%d",
362
				strerror(errno), semId);
363
	}
364
}
365

366
#endif
367 368

/****************************************************************************/
369 370
/*	 IpcSemaphoreKill(key)		- removes a semaphore						*/
/*																			*/
371 372 373 374
/****************************************************************************/
void
IpcSemaphoreKill(IpcSemaphoreKey key)
{
375 376
	int			semId;
	union semun semun;
377
	semun.val = 0;		/* unused */
378 379 380 381 382 383

	/* kill semaphore if existent */

	semId = semget(key, 0, 0);
	if (semId != -1)
		semctl(semId, 0, IPC_RMID, semun);
384 385 386
}

/****************************************************************************/
387 388 389
/*	 IpcSemaphoreLock(semId, sem, lock) - locks a semaphore					*/
/*																			*/
/*		note: the xxx_return variables are only used for debugging.			*/
390
/****************************************************************************/
391
static int	IpcSemaphoreLock_return;
392 393 394 395

void
IpcSemaphoreLock(IpcSemaphoreId semId, int sem, int lock)
{
396 397 398
	extern int	errno;
	int			errStatus;
	struct sembuf sops;
399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423

	sops.sem_op = lock;
	sops.sem_flg = 0;
	sops.sem_num = sem;

	/* ----------------
	 *	Note: if errStatus is -1 and errno == EINTR then it means we
	 *		  returned from the operation prematurely because we were
	 *		  sent a signal.  So we try and lock the semaphore again.
	 *		  I am not certain this is correct, but the semantics aren't
	 *		  clear it fixes problems with parallel abort synchronization,
	 *		  namely that after processing an abort signal, the semaphore
	 *		  call returns with -1 (and errno == EINTR) before it should.
	 *		  -cim 3/28/90
	 * ----------------
	 */
	do
	{
		errStatus = semop(semId, &sops, 1);
	} while (errStatus == -1 && errno == EINTR);

	IpcSemaphoreLock_return = errStatus;

	if (errStatus == -1)
	{
424 425
		EPRINTF("IpcSemaphoreLock: semop failed (%s) id=%d",
				strerror(errno), semId);
426
		proc_exit(255);
427
	}
428 429 430
}

/****************************************************************************/
431 432 433
/*	 IpcSemaphoreUnlock(semId, sem, lock)		- unlocks a semaphore		*/
/*																			*/
/*		note: the xxx_return variables are only used for debugging.			*/
434
/****************************************************************************/
435
static int	IpcSemaphoreUnlock_return;
436 437 438 439

void
IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem, int lock)
{
440 441 442
	extern int	errno;
	int			errStatus;
	struct sembuf sops;
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468

	sops.sem_op = -lock;
	sops.sem_flg = 0;
	sops.sem_num = sem;


	/* ----------------
	 *	Note: if errStatus is -1 and errno == EINTR then it means we
	 *		  returned from the operation prematurely because we were
	 *		  sent a signal.  So we try and lock the semaphore again.
	 *		  I am not certain this is correct, but the semantics aren't
	 *		  clear it fixes problems with parallel abort synchronization,
	 *		  namely that after processing an abort signal, the semaphore
	 *		  call returns with -1 (and errno == EINTR) before it should.
	 *		  -cim 3/28/90
	 * ----------------
	 */
	do
	{
		errStatus = semop(semId, &sops, 1);
	} while (errStatus == -1 && errno == EINTR);

	IpcSemaphoreUnlock_return = errStatus;

	if (errStatus == -1)
	{
469 470
		EPRINTF("IpcSemaphoreUnlock: semop failed (%s) id=%d",
				strerror(errno), semId);
471
		proc_exit(255);
472
	}
473 474 475
}

int
476
IpcSemaphoreGetCount(IpcSemaphoreId semId, int sem)
477
{
478 479
	int			semncnt;
	union semun dummy;			/* for Solaris */
480
	dummy.val = 0;		/* unused */
481 482 483

	semncnt = semctl(semId, sem, GETNCNT, dummy);
	return semncnt;
484 485 486
}

int
487
IpcSemaphoreGetValue(IpcSemaphoreId semId, int sem)
488
{
489 490
	int			semval;
	union semun dummy;			/* for Solaris */
491
	dummy.val = 0;		/* unused */
492 493 494

	semval = semctl(semId, sem, GETVAL, dummy);
	return semval;
495 496 497
}

/****************************************************************************/
498 499 500 501
/*	 IpcMemoryCreate(memKey)												*/
/*																			*/
/*	  - returns the memory identifier, if creation succeeds					*/
/*		returns IpcMemCreationFailed, if failure							*/
502 503 504 505 506
/****************************************************************************/

IpcMemoryId
IpcMemoryCreate(IpcMemoryKey memKey, uint32 size, int permission)
{
507
	IpcMemoryId shmid;
508 509 510 511 512 513 514 515 516 517 518

	if (memKey == PrivateIPCKey)
	{
		/* private */
		shmid = PrivateMemoryCreate(memKey, size);
	}
	else
		shmid = shmget(memKey, size, IPC_CREAT | permission);

	if (shmid < 0)
	{
519 520 521
		EPRINTF("IpcMemoryCreate: shmget failed (%s) "
				"key=%d, size=%d, permission=%o",
				strerror(errno), memKey, size, permission);
522
		IpcConfigTip();
523
		return IpcMemCreationFailed;
524 525 526
	}

	/* if (memKey == PrivateIPCKey) */
527
	on_shmem_exit(IPCPrivateMemoryKill, (caddr_t) shmid);
528

529
	return shmid;
530 531 532
}

/****************************************************************************/
533 534
/*	IpcMemoryIdGet(memKey, size)	returns the shared memory Id			*/
/*									or IpcMemIdGetFailed					*/
535 536 537 538
/****************************************************************************/
IpcMemoryId
IpcMemoryIdGet(IpcMemoryKey memKey, uint32 size)
{
539
	IpcMemoryId shmid;
540 541 542 543 544

	shmid = shmget(memKey, size, 0);

	if (shmid < 0)
	{
545 546 547
		EPRINTF("IpcMemoryIdGet: shmget failed (%s) "
				"key=%d, size=%d, permission=%o",
				strerror(errno), memKey, size, 0);
548
		return IpcMemIdGetFailed;
549 550
	}

551
	return shmid;
552 553 554
}

/****************************************************************************/
555 556 557
/*	IpcMemoryDetach(status, shmaddr)	removes a shared memory segment		*/
/*										from a backend address space		*/
/*	(only called by backends running under the postmaster)					*/
558
/****************************************************************************/
559
static void
560 561
IpcMemoryDetach(int status, char *shmaddr)
{
562
	if (shmdt(shmaddr) < 0)
563
		elog(NOTICE, "IpcMemoryDetach: shmdt(0x%p): %m", shmaddr);
564 565 566
}

/****************************************************************************/
567 568 569 570 571
/*	IpcMemoryAttach(memId)	  returns the adress of shared memory			*/
/*							  or IpcMemAttachFailed							*/
/*																			*/
/* CALL IT:  addr = (struct <MemoryStructure> *) IpcMemoryAttach(memId);	*/
/*																			*/
572
/****************************************************************************/
573
char *
574 575
IpcMemoryAttach(IpcMemoryId memId)
{
576
	char	   *memAddress;
577 578 579 580 581 582 583 584 585

	if (UsePrivateMemory)
		memAddress = (char *) PrivateMemoryAttach(memId);
	else
		memAddress = (char *) shmat(memId, 0, 0);

	/* if ( *memAddress == -1) { XXX ??? */
	if (memAddress == (char *) -1)
	{
586 587
		EPRINTF("IpcMemoryAttach: shmat failed (%s) id=%d",
				strerror(errno), memId);
588
		return IpcMemAttachFailed;
589 590 591
	}

	if (!UsePrivateMemory)
592
		on_shmem_exit(IpcMemoryDetach, (caddr_t) memAddress);
593

594
	return (char *) memAddress;
595 596 597 598
}


/****************************************************************************/
599 600
/*	IpcMemoryKill(memKey)				removes a shared memory segment		*/
/*	(only called by the postmaster and standalone backends)					*/
601 602 603
/****************************************************************************/
void
IpcMemoryKill(IpcMemoryKey memKey)
604
{
605
	IpcMemoryId shmid;
606 607 608 609 610 611 612 613

	if (!UsePrivateMemory && (shmid = shmget(memKey, 0, 0)) >= 0)
	{
		if (shmctl(shmid, IPC_RMID, (struct shmid_ds *) NULL) < 0)
		{
			elog(NOTICE, "IpcMemoryKill: shmctl(%d, %d, 0) failed: %m",
				 shmid, IPC_RMID);
		}
614
	}
615
}
616 617 618

#ifdef HAS_TEST_AND_SET
/* ------------------
619 620 621
 *	use hardware locks to replace semaphores for sequent machines
 *	to avoid costs of swapping processes and to provide unlimited
 *	supply of locks.
622 623
 * ------------------
 */
624 625

/* used in spin.c */
626
SLock	   *SLockArray = NULL;
627

628 629
static SLock **FreeSLockPP;
static int *UnusedSLockIP;
630 631 632
static slock_t *SLockMemoryLock;
static IpcMemoryId SLockMemoryId = -1;

633 634
struct ipcdummy
{								/* to get alignment/size right */
635 636 637
	SLock	   *free;
	int			unused;
	slock_t		memlock;
638
	SLock		slocks[MAX_SPINS + 1];
639
};
640

Bruce Momjian's avatar
Bruce Momjian committed
641
#define SLOCKMEMORYSIZE		sizeof(struct ipcdummy)
642 643 644 645

void
CreateAndInitSLockMemory(IPCKey key)
{
646 647
	int			id;
	SLock	   *slckP;
648 649

	SLockMemoryId = IpcMemoryCreate(key,
650
									SLOCKMEMORYSIZE,
651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
									0700);
	AttachSLockMemory(key);
	*FreeSLockPP = NULL;
	*UnusedSLockIP = (int) FIRSTFREELOCKID;
	for (id = 0; id < (int) FIRSTFREELOCKID; id++)
	{
		slckP = &(SLockArray[id]);
		S_INIT_LOCK(&(slckP->locklock));
		slckP->flag = NOLOCK;
		slckP->nshlocks = 0;
		S_INIT_LOCK(&(slckP->shlock));
		S_INIT_LOCK(&(slckP->exlock));
		S_INIT_LOCK(&(slckP->comlock));
		slckP->next = NULL;
	}
	return;
667 668 669 670 671
}

void
AttachSLockMemory(IPCKey key)
{
672 673 674
	struct ipcdummy *slockM;

	if (SLockMemoryId == -1)
675
		SLockMemoryId = IpcMemoryIdGet(key, SLOCKMEMORYSIZE);
676 677 678 679 680
	if (SLockMemoryId == -1)
		elog(FATAL, "SLockMemory not in shared memory");
	slockM = (struct ipcdummy *) IpcMemoryAttach(SLockMemoryId);
	if (slockM == IpcMemAttachFailed)
		elog(FATAL, "AttachSLockMemory: could not attach segment");
681
	FreeSLockPP = (SLock **) &(slockM->free);
682
	UnusedSLockIP = (int *) &(slockM->unused);
683
	SLockMemoryLock = (slock_t *) &(slockM->memlock);
684
	S_INIT_LOCK(SLockMemoryLock);
685
	SLockArray = (SLock *) &(slockM->slocks[0]);
686
	return;
687 688
}

689
#ifdef NOT_USED
690 691 692
bool
LockIsFree(int lockid)
{
693
	return SLockArray[lockid].flag == NOLOCK;
694
}
695

696
#endif
697

698
#endif	 /* HAS_TEST_AND_SET */
699 700

static void
701
IpcConfigTip(void)
702
{
703
	fprintf(stderr, "This type of error is usually caused by an improper\n");
704
	fprintf(stderr, "shared memory or System V IPC semaphore configuration.\n");
705 706 707
	fprintf(stderr, "For more information, see the FAQ and platform-specific\n");
	fprintf(stderr, "FAQ's in the source directory pgsql/doc or on our\n");
	fprintf(stderr, "web site at http://www.postgresql.org.\n");
708
}