nbtree.h 20.7 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * nbtree.h
4
 *	  header file for postgres btree access method implementation.
5 6
 *
 *
7
 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
Bruce Momjian's avatar
Add:  
Bruce Momjian committed
8
 * Portions Copyright (c) 1994, Regents of the University of California
9
 *
10
 * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.101 2006/07/11 21:05:57 tgl Exp $
11 12 13
 *
 *-------------------------------------------------------------------------
 */
14 15
#ifndef NBTREE_H
#define NBTREE_H
16

17
#include "access/itup.h"
Bruce Momjian's avatar
Bruce Momjian committed
18 19
#include "access/relscan.h"
#include "access/sdir.h"
20
#include "access/xlogutils.h"
21

22 23 24 25

/* There's room for a 16-bit vacuum cycle ID in BTPageOpaqueData */
typedef uint16 BTCycleId;

26
/*
27
 *	BTPageOpaqueData -- At the end of every page, we store a pointer
28
 *	to both siblings in the tree.  This is used to do forward/backward
29 30 31
 *	index scans.  The next-page link is also critical for recovery when
 *	a search has navigated to the wrong page due to concurrent page splits
 *	or deletions; see src/backend/access/nbtree/README for more info.
32
 *
Bruce Momjian's avatar
Bruce Momjian committed
33
 *	In addition, we store the page's btree level (counting upwards from
34 35 36 37
 *	zero at a leaf page) as well as some flag bits indicating the page type
 *	and status.  If the page is deleted, we replace the level with the
 *	next-transaction-ID value indicating when it is safe to reclaim the page.
 *
38 39 40 41 42 43 44 45 46 47
 *	We also store a "vacuum cycle ID".  When a page is split while VACUUM is
 *	processing the index, a nonzero value associated with the VACUUM run is
 *	stored into both halves of the split page.  (If VACUUM is not running,
 *	both pages receive zero cycleids.)  This allows VACUUM to detect whether
 *	a page was split since it started, with a small probability of false match
 *	if the page was last split some exact multiple of 65536 VACUUMs ago.
 *	Also, during a split, the BTP_SPLIT_END flag is cleared in the left
 *	(original) page, and set in the right page, but only if the next page
 *	to its right has a different cycleid.
 *
48 49
 *	NOTE: the BTP_LEAF flag bit is redundant since level==0 could be tested
 *	instead.
50 51
 */

52 53
typedef struct BTPageOpaqueData
{
54 55 56 57
	BlockNumber btpo_prev;		/* left sibling, or P_NONE if leftmost */
	BlockNumber btpo_next;		/* right sibling, or P_NONE if rightmost */
	union
	{
Bruce Momjian's avatar
Bruce Momjian committed
58
		uint32		level;		/* tree level --- zero for leaf pages */
59
		TransactionId xact;		/* next transaction ID, if deleted */
Bruce Momjian's avatar
Bruce Momjian committed
60
	}			btpo;
61
	uint16		btpo_flags;		/* flag bits, see below */
62
	BTCycleId	btpo_cycleid;	/* vacuum cycle ID of latest split */
Bruce Momjian's avatar
Bruce Momjian committed
63 64 65 66
} BTPageOpaqueData;

typedef BTPageOpaqueData *BTPageOpaque;

67
/* Bits defined in btpo_flags */
68
#define BTP_LEAF		(1 << 0)	/* leaf page, i.e. not internal page */
69
#define BTP_ROOT		(1 << 1)	/* root page (has no parent) */
70
#define BTP_DELETED		(1 << 2)	/* page has been deleted from tree */
71
#define BTP_META		(1 << 3)	/* meta-page */
72
#define BTP_HALF_DEAD	(1 << 4)	/* empty, but still in tree */
73
#define BTP_SPLIT_END	(1 << 5)	/* rightmost page of split group */
74 75


76 77 78
/*
 * The Meta page is always the first page in the btree index.
 * Its primary purpose is to point to the location of the btree root page.
79 80
 * We also point to the "fast" root, which is the current effective root;
 * see README for discussion.
81
 */
Vadim B. Mikheev's avatar
WAL  
Vadim B. Mikheev committed
82 83 84

typedef struct BTMetaPageData
{
85 86 87 88 89 90
	uint32		btm_magic;		/* should contain BTREE_MAGIC */
	uint32		btm_version;	/* should contain BTREE_VERSION */
	BlockNumber btm_root;		/* current root location */
	uint32		btm_level;		/* tree level of the root page */
	BlockNumber btm_fastroot;	/* current "fast" root location */
	uint32		btm_fastlevel;	/* tree level of the "fast" root page */
Vadim B. Mikheev's avatar
WAL  
Vadim B. Mikheev committed
91 92 93
} BTMetaPageData;

#define BTPageGetMeta(p) \
94
	((BTMetaPageData *) PageGetContents(p))
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
95

96
#define BTREE_METAPAGE	0		/* first page is meta */
97
#define BTREE_MAGIC		0x053162	/* magic number of btree pages */
98
#define BTREE_VERSION	2		/* current version number */
99

100 101 102 103 104 105 106 107
/*
 * We actually need to be able to fit three items on every page,
 * so restrict any one item to 1/3 the per-page available space.
 */
#define BTMaxItemSize(page) \
	((PageGetPageSize(page) - \
	  sizeof(PageHeaderData) - \
	  MAXALIGN(sizeof(BTPageOpaqueData))) / 3 - sizeof(ItemIdData))
108

109
/*
110 111 112 113 114
 * The leaf-page fillfactor defaults to 90% but is user-adjustable.
 * For pages above the leaf level, we use a fixed 70% fillfactor.
 * The fillfactor is applied during index build and when splitting
 * a rightmost page; when splitting non-rightmost pages we try to
 * divide the data equally.
115
 */
116
#define BTREE_MIN_FILLFACTOR		10
117
#define BTREE_DEFAULT_FILLFACTOR	90
118
#define BTREE_NONLEAF_FILLFACTOR	70
119

120
/*
121
 *	Test whether two btree entries are "the same".
122 123 124 125 126 127 128 129 130
 *
 *	Old comments:
 *	In addition, we must guarantee that all tuples in the index are unique,
 *	in order to satisfy some assumptions in Lehman and Yao.  The way that we
 *	do this is by generating a new OID for every insertion that we do in the
 *	tree.  This adds eight bytes to the size of btree index tuples.  Note
 *	that we do not use the OID as part of a composite key; the OID only
 *	serves as a unique identifier for a given index tuple (logical position
 *	within a page).
131
 *
132 133
 *	New comments:
 *	actually, we must guarantee that all tuples in A LEVEL
134
 *	are unique, not in ALL INDEX. So, we can use the t_tid
135
 *	as unique identifier for a given index tuple (logical position
136
 *	within a level). - vadim 04/09/97
137
 */
138 139 140 141
#define BTTidSame(i1, i2)	\
	( (i1).ip_blkid.bi_hi == (i2).ip_blkid.bi_hi && \
	  (i1).ip_blkid.bi_lo == (i2).ip_blkid.bi_lo && \
	  (i1).ip_posid == (i2).ip_posid )
142 143
#define BTEntrySame(i1, i2)	\
	BTTidSame((i1)->t_tid, (i2)->t_tid)
144

145

146
/*
147 148 149
 *	In general, the btree code tries to localize its knowledge about
 *	page layout to a couple of routines.  However, we need a special
 *	value to indicate "no page number" in those places where we expect
150 151
 *	page numbers.  We can use zero for this because we never need to
 *	make a pointer to the metadata page.
152 153
 */

154
#define P_NONE			0
155 156 157 158 159

/*
 * Macros to test whether a page is leftmost or rightmost on its tree level,
 * as well as other state info kept in the opaque data.
 */
160 161
#define P_LEFTMOST(opaque)		((opaque)->btpo_prev == P_NONE)
#define P_RIGHTMOST(opaque)		((opaque)->btpo_next == P_NONE)
162 163
#define P_ISLEAF(opaque)		((opaque)->btpo_flags & BTP_LEAF)
#define P_ISROOT(opaque)		((opaque)->btpo_flags & BTP_ROOT)
164
#define P_ISDELETED(opaque)		((opaque)->btpo_flags & BTP_DELETED)
165
#define P_IGNORE(opaque)		((opaque)->btpo_flags & (BTP_DELETED|BTP_HALF_DEAD))
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183

/*
 *	Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost
 *	page.  The high key is not a data key, but gives info about what range of
 *	keys is supposed to be on this page.  The high key on a page is required
 *	to be greater than or equal to any data key that appears on the page.
 *	If we find ourselves trying to insert a key > high key, we know we need
 *	to move right (this should only happen if the page was split since we
 *	examined the parent page).
 *
 *	Our insertion algorithm guarantees that we can use the initial least key
 *	on our right sibling as the high key.  Once a page is created, its high
 *	key changes only if the page is split.
 *
 *	On a non-rightmost page, the high key lives in item 1 and data items
 *	start in item 2.  Rightmost pages have no high key, so we store data
 *	items beginning in item 1.
 */
184

Bruce Momjian's avatar
Bruce Momjian committed
185 186
#define P_HIKEY				((OffsetNumber) 1)
#define P_FIRSTKEY			((OffsetNumber) 2)
187
#define P_FIRSTDATAKEY(opaque)	(P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY)
188

Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
189
/*
190 191
 * XLOG records for btree operations
 *
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
192 193 194
 * XLOG allows to store some information in high 4 bits of log
 * record xl_info field
 */
195
#define XLOG_BTREE_INSERT_LEAF	0x00	/* add index tuple without split */
Bruce Momjian's avatar
Bruce Momjian committed
196
#define XLOG_BTREE_INSERT_UPPER 0x10	/* same, on a non-leaf page */
197
#define XLOG_BTREE_INSERT_META	0x20	/* same, plus update metapage */
198
#define XLOG_BTREE_SPLIT_L		0x30	/* add index tuple with split */
199
#define XLOG_BTREE_SPLIT_R		0x40	/* as above, new item on right */
200
#define XLOG_BTREE_SPLIT_L_ROOT 0x50	/* add tuple with split of root */
Bruce Momjian's avatar
Bruce Momjian committed
201
#define XLOG_BTREE_SPLIT_R_ROOT 0x60	/* as above, new item on right */
202
#define XLOG_BTREE_DELETE		0x70	/* delete leaf index tuple */
203
#define XLOG_BTREE_DELETE_PAGE	0x80	/* delete an entire page */
204
#define XLOG_BTREE_DELETE_PAGE_META 0x90	/* same, plus update metapage */
205
#define XLOG_BTREE_NEWROOT		0xA0	/* new root page */
206

Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
207
/*
208
 * All that we need to find changed index tuple
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
209 210 211
 */
typedef struct xl_btreetid
{
212 213
	RelFileNode node;
	ItemPointerData tid;		/* changed tuple id */
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
214 215
} xl_btreetid;

Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
216
/*
217
 * All that we need to regenerate the meta-data page
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
218
 */
219
typedef struct xl_btree_metadata
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
220
{
221 222 223 224
	BlockNumber root;
	uint32		level;
	BlockNumber fastroot;
	uint32		fastlevel;
225
} xl_btree_metadata;
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
226

227
/*
228 229 230 231
 * This is what we need to know about simple (without split) insert.
 *
 * This data record is used for INSERT_LEAF, INSERT_UPPER, INSERT_META.
 * Note that INSERT_META implies it's not a leaf page.
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
232
 */
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
233 234
typedef struct xl_btree_insert
{
235
	xl_btreetid target;			/* inserted tuple id */
236
	/* BlockNumber downlink field FOLLOWS IF NOT XLOG_BTREE_INSERT_LEAF */
237
	/* xl_btree_metadata FOLLOWS IF XLOG_BTREE_INSERT_META */
238
	/* INDEX TUPLE FOLLOWS AT END OF STRUCT */
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
239 240
} xl_btree_insert;

Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
241
#define SizeOfBtreeInsert	(offsetof(xl_btreetid, tid) + SizeOfIptrData)
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
242

243
/*
244
 * On insert with split we save items of both left and right siblings
245 246 247 248 249 250
 * and restore content of both pages from log record.  This way takes less
 * xlog space than the normal approach, because if we did it standardly,
 * XLogInsert would almost always think the right page is new and store its
 * whole page image.
 *
 * Note: the four XLOG_BTREE_SPLIT xl_info codes all use this data record.
251
 * The _L and _R variants indicate whether the inserted tuple went into the
252 253 254
 * left or right split page (and thus, whether otherblk is the right or left
 * page of the split pair).  The _ROOT variants indicate that we are splitting
 * the root page, and thus that a newroot record rather than an insert or
Bruce Momjian's avatar
Bruce Momjian committed
255
 * split record should follow.	Note that a split record never carries a
256
 * metapage update --- we'll do that in the parent-level update.
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
257
 */
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
258 259
typedef struct xl_btree_split
{
260
	xl_btreetid target;			/* inserted tuple id */
261
	BlockNumber otherblk;		/* second block participated in split: */
262
	/* first one is stored in target' tid */
263 264 265
	BlockNumber leftblk;		/* prev/left block */
	BlockNumber rightblk;		/* next/right block */
	uint32		level;			/* tree level of page being split */
266
	uint16		leftlen;		/* len of left page items below */
267
	/* LEFT AND RIGHT PAGES TUPLES FOLLOW AT THE END */
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
268 269
} xl_btree_split;

270
#define SizeOfBtreeSplit	(offsetof(xl_btree_split, leftlen) + sizeof(uint16))
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
271

272
/*
273 274
 * This is what we need to know about delete of individual leaf index tuples.
 * The WAL record can represent deletion of any number of index tuples on a
275
 * single index page.
276 277 278
 */
typedef struct xl_btree_delete
{
279 280 281
	RelFileNode node;
	BlockNumber block;
	/* TARGET OFFSET NUMBERS FOLLOW AT THE END */
282 283
} xl_btree_delete;

284
#define SizeOfBtreeDelete	(offsetof(xl_btree_delete, block) + sizeof(BlockNumber))
285 286 287 288

/*
 * This is what we need to know about deletion of a btree page.  The target
 * identifies the tuple removed from the parent page (note that we remove
Bruce Momjian's avatar
Bruce Momjian committed
289
 * this tuple's downlink and the *following* tuple's key).	Note we do not
290 291 292 293 294 295 296 297 298 299
 * store any content for the deleted page --- it is just rewritten as empty
 * during recovery.
 */
typedef struct xl_btree_delete_page
{
	xl_btreetid target;			/* deleted tuple id in parent page */
	BlockNumber deadblk;		/* child block being deleted */
	BlockNumber leftblk;		/* child block's left sibling, if any */
	BlockNumber rightblk;		/* child block's right sibling */
	/* xl_btree_metadata FOLLOWS IF XLOG_BTREE_DELETE_PAGE_META */
300
} xl_btree_delete_page;
301 302 303 304

#define SizeOfBtreeDeletePage	(offsetof(xl_btree_delete_page, rightblk) + sizeof(BlockNumber))

/*
305
 * New root log record.  There are zero tuples if this is to establish an
306 307 308 309
 * empty root, or two if it is the result of splitting an old root.
 *
 * Note that although this implies rewriting the metadata page, we don't need
 * an xl_btree_metadata record --- the rootblk and level are sufficient.
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
310 311 312
 */
typedef struct xl_btree_newroot
{
313
	RelFileNode node;
314 315
	BlockNumber rootblk;		/* location of new root */
	uint32		level;			/* its tree level */
316
	/* 0 or 2 INDEX TUPLES FOLLOW AT END OF STRUCT */
Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
317 318
} xl_btree_newroot;

319 320
#define SizeOfBtreeNewroot	(offsetof(xl_btree_newroot, level) + sizeof(uint32))

Vadim B. Mikheev's avatar
Vadim B. Mikheev committed
321

322
/*
323
 *	Operator strategy numbers for B-tree have been moved to access/skey.h,
324
 *	because many places need to use them in ScanKeyInit() calls.
325 326 327
 */

/*
328 329 330 331 332
 *	When a new operator class is declared, we require that the user
 *	supply us with an amproc procedure for determining whether, for
 *	two keys a and b, a < b, a = b, or a > b.  This routine must
 *	return < 0, 0, > 0, respectively, in these three cases.  Since we
 *	only have one such proc in amproc, it's number 1.
333 334 335 336
 */

#define BTORDER_PROC	1

337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
/*
 *	We need to be able to tell the difference between read and write
 *	requests for pages, in order to do locking correctly.
 */

#define BT_READ			BUFFER_LOCK_SHARE
#define BT_WRITE		BUFFER_LOCK_EXCLUSIVE

/*
 *	BTStackData -- As we descend a tree, we push the (location, downlink)
 *	pairs from internal pages onto a private stack.  If we split a
 *	leaf, we use this stack to walk back up the tree and insert data
 *	into parent pages (and possibly to split them, too).  Lehman and
 *	Yao's update algorithm guarantees that under no circumstances can
 *	our private stack give us an irredeemably bad picture up the tree.
 *	Again, see the paper for details.
 */

typedef struct BTStackData
{
	BlockNumber bts_blkno;
	OffsetNumber bts_offset;
359
	IndexTupleData bts_btentry;
360 361 362 363 364 365
	struct BTStackData *bts_parent;
} BTStackData;

typedef BTStackData *BTStack;

/*
366 367 368 369 370 371 372
 * BTScanOpaqueData is the btree-private state needed for an indexscan.
 * This consists of preprocessed scan keys (see _bt_preprocess_keys() for
 * details of the preprocessing), information about the current location
 * of the scan, and information about the marked location, if any.  (We use
 * BTScanPosData to represent the data needed for each of current and marked
 * locations.)  In addition we can remember some known-killed index entries
 * that must be marked before we can move off the current page.
373
 *
374 375 376 377 378 379 380 381 382 383
 * Index scans work a page at a time: we pin and read-lock the page, identify
 * all the matching items on the page and save them in BTScanPosData, then
 * release the read-lock while returning the items to the caller for
 * processing.  This approach minimizes lock/unlock traffic.  Note that we
 * keep the pin on the index page until the caller is done with all the items
 * (this is needed for VACUUM synchronization, see nbtree/README).  When we
 * are ready to step to the next page, if the caller has told us any of the
 * items were killed, we re-lock the page to mark them killed, then unlock.
 * Finally we drop the pin and step to the next page in the appropriate
 * direction.
384
 *
385 386
 * NOTE: in this implementation, btree does not use or set the
 * currentItemData and currentMarkData fields of IndexScanDesc at all.
387 388
 */

389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427
typedef struct BTScanPosItem	/* what we remember about each match */
{
	ItemPointerData heapTid;	/* TID of referenced heap item */
	OffsetNumber indexOffset;	/* index item's location within page */
} BTScanPosItem;

typedef struct BTScanPosData
{
	Buffer		buf;			/* if valid, the buffer is pinned */

	BlockNumber nextPage;		/* page's right link when we scanned it */

	/*
	 * moreLeft and moreRight track whether we think there may be matching
	 * index entries to the left and right of the current page, respectively.
	 * We can clear the appropriate one of these flags when _bt_checkkeys()
	 * returns continuescan = false.
	 */
	bool		moreLeft;
	bool		moreRight;

	/*
	 * The items array is always ordered in index order (ie, increasing
	 * indexoffset).  When scanning backwards it is convenient to fill the
	 * array back-to-front, so we start at the last slot and fill downwards.
	 * Hence we need both a first-valid-entry and a last-valid-entry counter.
	 * itemIndex is a cursor showing which entry was last returned to caller.
	 */
	int			firstItem;		/* first valid index in items[] */
	int			lastItem;		/* last valid index in items[] */
	int			itemIndex;		/* current index in items[] */

	BTScanPosItem items[MaxIndexTuplesPerPage];		/* MUST BE LAST */
} BTScanPosData;

typedef BTScanPosData *BTScanPos;

#define BTScanPosIsValid(scanpos) BufferIsValid((scanpos).buf)

428 429
typedef struct BTScanOpaqueData
{
430
	/* these fields are set by _bt_preprocess_keys(): */
431
	bool		qual_ok;		/* false if qual can never be satisfied */
432 433
	int			numberOfKeys;	/* number of preprocessed scan keys */
	ScanKey		keyData;		/* array of preprocessed scan keys */
434 435 436 437 438 439 440 441

	/* info about killed items if any (killedItems is NULL if never used) */
	int		   *killedItems;	/* currPos.items indexes of killed items */
	int			numKilled;		/* number of currently stored items */

	/* keep these last in struct for efficiency */
	BTScanPosData currPos;		/* current position data */
	BTScanPosData markPos;		/* marked position, if any */
442 443 444 445
} BTScanOpaqueData;

typedef BTScanOpaqueData *BTScanOpaque;

446 447 448 449 450 451 452
/*
 * We use these private sk_flags bits in preprocessed scan keys
 */
#define SK_BT_REQFWD	0x00010000	/* required to continue forward scan */
#define SK_BT_REQBKWD	0x00020000	/* required to continue backward scan */


453 454 455 456 457 458
/*
 * prototypes for functions in nbtree.c (external entry points for btree)
 */
extern Datum btbuild(PG_FUNCTION_ARGS);
extern Datum btinsert(PG_FUNCTION_ARGS);
extern Datum btbeginscan(PG_FUNCTION_ARGS);
459 460
extern Datum btgettuple(PG_FUNCTION_ARGS);
extern Datum btgetmulti(PG_FUNCTION_ARGS);
461 462 463 464
extern Datum btrescan(PG_FUNCTION_ARGS);
extern Datum btendscan(PG_FUNCTION_ARGS);
extern Datum btmarkpos(PG_FUNCTION_ARGS);
extern Datum btrestrpos(PG_FUNCTION_ARGS);
465
extern Datum btbulkdelete(PG_FUNCTION_ARGS);
466
extern Datum btvacuumcleanup(PG_FUNCTION_ARGS);
467
extern Datum btoptions(PG_FUNCTION_ARGS);
468

469 470 471
/*
 * prototypes for functions in nbtinsert.c
 */
472
extern void _bt_doinsert(Relation rel, IndexTuple itup,
473
			 bool index_is_unique, Relation heapRel);
474
extern Buffer _bt_getstackbuf(Relation rel, BTStack stack, int access);
475
extern void _bt_insert_parent(Relation rel, Buffer buf, Buffer rbuf,
Bruce Momjian's avatar
Bruce Momjian committed
476
				  BTStack stack, bool is_root, bool is_only);
477 478 479 480

/*
 * prototypes for functions in nbtpage.c
 */
481
extern void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level);
482
extern Buffer _bt_getroot(Relation rel, int access);
483
extern Buffer _bt_gettrueroot(Relation rel);
484
extern void _bt_checkpage(Relation rel, Buffer buf);
485
extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access);
486
extern Buffer _bt_relandgetbuf(Relation rel, Buffer obuf,
Bruce Momjian's avatar
Bruce Momjian committed
487
				 BlockNumber blkno, int access);
488
extern void _bt_relbuf(Relation rel, Buffer buf);
489
extern void _bt_pageinit(Page page, Size size);
490
extern bool _bt_page_recyclable(Page page);
491
extern void _bt_delitems(Relation rel, Buffer buf,
Bruce Momjian's avatar
Bruce Momjian committed
492
			 OffsetNumber *itemnos, int nitems);
493
extern int	_bt_pagedel(Relation rel, Buffer buf, bool vacuum_full);
494 495 496 497

/*
 * prototypes for functions in nbtsearch.c
 */
498
extern BTStack _bt_search(Relation rel,
Bruce Momjian's avatar
Bruce Momjian committed
499 500
		   int keysz, ScanKey scankey, bool nextkey,
		   Buffer *bufP, int access);
501
extern Buffer _bt_moveright(Relation rel, Buffer buf, int keysz,
502
			  ScanKey scankey, bool nextkey, int access);
503
extern OffsetNumber _bt_binsrch(Relation rel, Buffer buf, int keysz,
504
			ScanKey scankey, bool nextkey);
505
extern int32 _bt_compare(Relation rel, int keysz, ScanKey scankey,
506
			Page page, OffsetNumber offnum);
507
extern bool _bt_first(IndexScanDesc scan, ScanDirection dir);
508
extern bool _bt_next(IndexScanDesc scan, ScanDirection dir);
509
extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost);
510 511 512 513

/*
 * prototypes for functions in nbtutils.c
 */
514
extern ScanKey _bt_mkscankey(Relation rel, IndexTuple itup);
515
extern ScanKey _bt_mkscankey_nodata(Relation rel);
516 517
extern void _bt_freeskey(ScanKey skey);
extern void _bt_freestack(BTStack stack);
518
extern void _bt_preprocess_keys(IndexScanDesc scan);
519 520 521
extern bool _bt_checkkeys(IndexScanDesc scan,
						  Page page, OffsetNumber offnum,
						  ScanDirection dir, bool *continuescan);
522
extern void _bt_killitems(IndexScanDesc scan, bool haveLock);
523 524 525 526 527
extern BTCycleId _bt_vacuum_cycleid(Relation rel);
extern BTCycleId _bt_start_vacuum(Relation rel);
extern void _bt_end_vacuum(Relation rel);
extern Size BTreeShmemSize(void);
extern void BTreeShmemInit(void);
528 529 530 531

/*
 * prototypes for functions in nbtsort.c
 */
532
typedef struct BTSpool BTSpool; /* opaque type known only within nbtsort.c */
533

534
extern BTSpool *_bt_spoolinit(Relation index, bool isunique, bool isdead);
535
extern void _bt_spooldestroy(BTSpool *btspool);
536
extern void _bt_spool(IndexTuple itup, BTSpool *btspool);
537
extern void _bt_leafbuild(BTSpool *btspool, BTSpool *spool2);
538

539 540 541 542
/*
 * prototypes for functions in nbtxlog.c
 */
extern void btree_redo(XLogRecPtr lsn, XLogRecord *record);
543
extern void btree_desc(StringInfo buf, uint8 xl_info, char *rec);
544 545 546
extern void btree_xlog_startup(void);
extern void btree_xlog_cleanup(void);

547
#endif   /* NBTREE_H */