buf_init.c 7.91 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * buf_init.c
4
 *	  buffer manager initialization routines
5
 *
Bruce Momjian's avatar
Add:  
Bruce Momjian committed
6 7
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.33 2000/04/09 04:43:18 tgl Exp $
12 13 14
 *
 *-------------------------------------------------------------------------
 */
15
#include <sys/types.h>
16 17 18 19
#include <sys/file.h>
#include <math.h>
#include <signal.h>

Marc G. Fournier's avatar
Marc G. Fournier committed
20 21
#include "postgres.h"

Bruce Momjian's avatar
Bruce Momjian committed
22 23 24
#include "catalog/catalog.h"
#include "executor/execdebug.h"
#include "miscadmin.h"
25 26 27 28 29
#include "storage/buf.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
Bruce Momjian's avatar
Bruce Momjian committed
30
#include "storage/lmgr.h"
31
#include "storage/s_lock.h"
32 33
#include "storage/shmem.h"
#include "storage/smgr.h"
Bruce Momjian's avatar
Bruce Momjian committed
34
#include "storage/spin.h"
35 36 37 38 39
#include "utils/builtins.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"

/*
40 41
 *	if BMTRACE is defined, we trace the last 200 buffer allocations and
 *	deallocations in a circular buffer in shared memory.
42 43
 */
#ifdef	BMTRACE
44 45
bmtrace    *TraceBuf;
long	   *CurTraceBuf;
46 47

#define BMT_LIMIT		200
48
#endif	 /* BMTRACE */
49
int			ShowPinTrace = 0;
50

51
int			NBuffers = DEF_NBUFFERS;	/* default is set in config.h */
52 53 54 55
int			Data_Descriptors;
int			Free_List_Descriptor;
int			Lookup_List_Descriptor;
int			Num_Descriptors;
56

57 58
BufferDesc *BufferDescriptors;
BufferBlock BufferBlocks;
59

60
#ifndef HAS_TEST_AND_SET
61
long	   *NWaitIOBackendP;
Bruce Momjian's avatar
Bruce Momjian committed
62

63 64
#endif

65 66
extern IpcSemaphoreId WaitIOSemId;

67
long	   *PrivateRefCount;	/* also used in freelist.c */
68 69 70 71 72
bits8	   *BufferLocks;		/* flag bits showing locks I have set */
BufferTag  *BufferTagLastDirtied; /* tag buffer had when last dirtied by me */
BufferBlindId *BufferBlindLastDirtied; /* and its BlindId too */
bool	   *BufferDirtiedByMe;	/* T if buf has been dirtied in cur xact */

73 74 75

/*
 * Data Structures:
76 77
 *		buffers live in a freelist and a lookup data structure.
 *
78 79
 *
 * Buffer Lookup:
80 81 82 83 84
 *		Two important notes.  First, the buffer has to be
 *		available for lookup BEFORE an IO begins.  Otherwise
 *		a second process trying to read the buffer will
 *		allocate its own copy and the buffeer pool will
 *		become inconsistent.
85 86
 *
 * Buffer Replacement:
87 88
 *		see freelist.c.  A buffer cannot be replaced while in
 *		use either by data manager or during IO.
89 90
 *
 * WriteBufferBack:
91 92 93 94
 *		currently, a buffer is only written back at the time
 *		it is selected for replacement.  It should
 *		be done sooner if possible to reduce latency of
 *		BufferAlloc().	Maybe there should be a daemon process.
95 96 97
 *
 * Synchronization/Locking:
 *
98 99 100
 * BufMgrLock lock -- must be acquired before manipulating the
 *		buffer queues (lookup/freelist).  Must be released
 *		before exit and before doing any IO.
101 102
 *
 * IO_IN_PROGRESS -- this is a flag in the buffer descriptor.
103 104 105 106
 *		It must be set when an IO is initiated and cleared at
 *		the end of	the IO.  It is there to make sure that one
 *		process doesn't start to use a buffer while another is
 *		faulting it in.  see IOWait/IOSignal.
107
 *
108 109 110 111 112
 * refcount --	A buffer is pinned during IO and immediately
 *		after a BufferAlloc().	A buffer is always either pinned
 *		or on the freelist but never both.	The buffer must be
 *		released, written, or flushed before the end of
 *		transaction.
113 114
 *
 * PrivateRefCount -- Each buffer also has a private refcount the keeps
115 116 117 118 119 120 121
 *		track of the number of times the buffer is pinned in the current
 *		processes.	This is used for two purposes, first, if we pin a
 *		a buffer more than once, we only need to change the shared refcount
 *		once, thus only lock the buffer pool once, second, when a transaction
 *		aborts, it should only unpin the buffers exactly the number of times it
 *		has pinned them, so that it will not blow away buffers of another
 *		backend.
122 123 124
 *
 */

125
SPINLOCK	BufMgrLock;
126

127 128 129 130 131 132
long int	ReadBufferCount;
long int	ReadLocalBufferCount;
long int	BufferHitCount;
long int	LocalBufferHitCount;
long int	BufferFlushCount;
long int	LocalBufferFlushCount;
133 134 135 136 137 138 139 140 141 142 143


/*
 * Initialize module:
 *
 * should calculate size of pool dynamically based on the
 * amount of available memory.
 */
void
InitBufferPool(IPCKey key)
{
144 145 146
	bool		foundBufs,
				foundDescs;
	int			i;
147 148 149 150 151 152 153 154

	Data_Descriptors = NBuffers;
	Free_List_Descriptor = Data_Descriptors;
	Lookup_List_Descriptor = Data_Descriptors + 1;
	Num_Descriptors = Data_Descriptors + 1;

	SpinAcquire(BufMgrLock);

155
#ifdef BMTRACE
156 157 158 159
	CurTraceBuf = (long *) ShmemInitStruct("Buffer trace",
							(BMT_LIMIT * sizeof(bmtrace)) + sizeof(long),
										   &foundDescs);
	if (!foundDescs)
Bruce Momjian's avatar
Bruce Momjian committed
160
		MemSet(CurTraceBuf, 0, (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long));
161 162

	TraceBuf = (bmtrace *) & (CurTraceBuf[1]);
163
#endif
164 165 166 167 168 169 170 171 172

	BufferDescriptors = (BufferDesc *)
		ShmemInitStruct("Buffer Descriptors",
					  Num_Descriptors * sizeof(BufferDesc), &foundDescs);

	BufferBlocks = (BufferBlock)
		ShmemInitStruct("Buffer Blocks",
						NBuffers * BLCKSZ, &foundBufs);

173
#ifndef HAS_TEST_AND_SET
174
	{
175
		bool		foundNWaitIO;
176 177 178 179 180 181 182

		NWaitIOBackendP = (long *) ShmemInitStruct("#Backends Waiting IO",
												   sizeof(long),
												   &foundNWaitIO);
		if (!foundNWaitIO)
			*NWaitIOBackendP = 0;
	}
183
#endif
184 185 186 187 188 189 190 191 192 193

	if (foundDescs || foundBufs)
	{

		/* both should be present or neither */
		Assert(foundDescs && foundBufs);

	}
	else
	{
194 195
		BufferDesc *buf;
		unsigned long block;
196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216

		buf = BufferDescriptors;
		block = (unsigned long) BufferBlocks;

		/*
		 * link the buffers into a circular, doubly-linked list to
		 * initialize free list.  Still don't know anything about
		 * replacement strategy in this file.
		 */
		for (i = 0; i < Data_Descriptors; block += BLCKSZ, buf++, i++)
		{
			Assert(ShmemIsValid((unsigned long) block));

			buf->freeNext = i + 1;
			buf->freePrev = i - 1;

			CLEAR_BUFFERTAG(&(buf->tag));
			buf->data = MAKE_OFFSET(block);
			buf->flags = (BM_DELETED | BM_FREE | BM_VALID);
			buf->refcount = 0;
			buf->buf_id = i;
217
#ifdef HAS_TEST_AND_SET
218
			S_INIT_LOCK(&(buf->io_in_progress_lock));
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
219
			S_INIT_LOCK(&(buf->cntx_lock));
220
#endif
221 222 223 224 225
		}

		/* close the circular queue */
		BufferDescriptors[0].freePrev = Data_Descriptors - 1;
		BufferDescriptors[Data_Descriptors - 1].freeNext = 0;
226
	}
227 228 229 230 231 232 233

	/* Init the rest of the module */
	InitBufTable();
	InitFreeList(!foundDescs);

	SpinRelease(BufMgrLock);

234
#ifndef HAS_TEST_AND_SET
235
	{
236 237
		extern IpcSemaphoreId WaitIOSemId;
		extern IpcSemaphoreId WaitCLSemId;
238 239

		WaitIOSemId = IpcSemaphoreCreate(IPCKeyGetWaitIOSemaphoreKey(key),
240 241 242
										 1, IPCProtection, 0, 1);
		if (WaitIOSemId < 0)
			elog(FATAL, "InitBufferPool: IpcSemaphoreCreate(WaitIOSemId) failed");
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
243
		WaitCLSemId = IpcSemaphoreCreate(IPCKeyGetWaitCLSemaphoreKey(key),
Bruce Momjian's avatar
Bruce Momjian committed
244
										 1, IPCProtection,
245 246 247
										 IpcSemaphoreDefaultStartValue, 1);
		if (WaitCLSemId < 0)
			elog(FATAL, "InitBufferPool: IpcSemaphoreCreate(WaitCLSemId) failed");
248
	}
249
#endif
250
	PrivateRefCount = (long *) calloc(NBuffers, sizeof(long));
Bruce Momjian's avatar
Bruce Momjian committed
251
	BufferLocks = (bits8 *) calloc(NBuffers, sizeof(bits8));
252 253 254
	BufferTagLastDirtied = (BufferTag *) calloc(NBuffers, sizeof(BufferTag));
	BufferBlindLastDirtied = (BufferBlindId *) calloc(NBuffers, sizeof(BufferBlindId));
	BufferDirtiedByMe = (bool *) calloc(NBuffers, sizeof(bool));
255 256 257 258 259 260 261 262 263 264 265 266
}

/* -----------------------------------------------------
 * BufferShmemSize
 *
 * compute the size of shared memory for the buffer pool including
 * data pages, buffer descriptors, hash tables, etc.
 * ----------------------------------------------------
 */
int
BufferShmemSize()
{
267
	int			size = 0;
268 269 270 271 272

	/* size of shmem index hash table */
	size += hash_estimate_size(SHMEM_INDEX_SIZE,
							   SHMEM_INDEX_KEYSIZE,
							   SHMEM_INDEX_DATASIZE);
273 274 275 276 277 278 279 280

	/* size of buffer descriptors */
	size += MAXALIGN((NBuffers + 1) * sizeof(BufferDesc));

	/* size of data pages */
	size += NBuffers * MAXALIGN(BLCKSZ);

	/* size of buffer hash table */
281 282 283
	size += hash_estimate_size(NBuffers,
							   sizeof(BufferTag),
							   sizeof(Buffer));
284

285
#ifdef BMTRACE
286
	size += (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long);
287
#endif
288

289
	return size;
290
}