Commit 88dc31e3 authored by Tom Lane's avatar Tom Lane

First cut at recycling space in btree indexes. Still some rough edges

to fix, but it seems to basically work...
parent 27854915
......@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/common/indextuple.c,v 1.63 2002/11/13 00:39:46 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/common/indextuple.c,v 1.64 2003/02/23 06:17:12 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -394,17 +394,16 @@ nocache_index_getattr(IndexTuple tup,
}
/*
* Copies source into target. If *target == NULL, we palloc space; otherwise
* we assume we have space that is already palloc'ed.
* Create a palloc'd copy of an index tuple.
*/
void
CopyIndexTuple(IndexTuple source, IndexTuple *target)
IndexTuple
CopyIndexTuple(IndexTuple source)
{
IndexTuple result;
Size size;
size = IndexTupleSize(source);
if (*target == NULL)
*target = (IndexTuple) palloc(size);
memmove((char *) *target, (char *) source, size);
result = (IndexTuple) palloc(size);
memcpy(result, source, size);
return result;
}
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.98 2003/02/22 00:45:03 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.99 2003/02/23 06:17:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -58,7 +58,6 @@ static OffsetNumber _bt_findsplitloc(Relation rel, Page page,
static void _bt_checksplitloc(FindSplitData *state, OffsetNumber firstright,
int leftfree, int rightfree,
bool newitemonleft, Size firstrightitemsz);
static Buffer _bt_getstackbuf(Relation rel, BTStack stack, int access);
static void _bt_pgaddtup(Relation rel, Page page,
Size itemsize, BTItem btitem,
OffsetNumber itup_off, const char *where);
......@@ -666,7 +665,6 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
rightoff;
OffsetNumber maxoff;
OffsetNumber i;
BTItem lhikey;
rbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
origpage = BufferGetPage(buf);
......@@ -730,7 +728,6 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
itemsz = ItemIdGetLength(itemid);
item = (BTItem) PageGetItem(origpage, itemid);
}
lhikey = item;
if (PageAddItem(leftpage, (Item) item, itemsz, leftoff,
LP_USED) == InvalidOffsetNumber)
elog(PANIC, "btree: failed to add hikey to the left sibling");
......@@ -1262,7 +1259,7 @@ _bt_insert_parent(Relation rel,
*
* Returns InvalidBuffer if item not found (should not happen).
*/
static Buffer
Buffer
_bt_getstackbuf(Relation rel, BTStack stack, int access)
{
BlockNumber blkno;
......
This diff is collapsed.
......@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.96 2003/02/22 00:45:04 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.97 2003/02/23 06:17:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -710,15 +710,16 @@ Datum
btvacuumcleanup(PG_FUNCTION_ARGS)
{
Relation rel = (Relation) PG_GETARG_POINTER(0);
#ifdef NOT_USED
IndexVacuumCleanupInfo *info = (IndexVacuumCleanupInfo *) PG_GETARG_POINTER(1);
#endif
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(2);
BlockNumber num_pages;
BlockNumber blkno;
PageFreeSpaceInfo *pageSpaces;
int nFreePages,
maxFreePages;
BlockNumber pages_deleted = 0;
MemoryContext mycontext;
MemoryContext oldcontext;
Assert(stats != NULL);
......@@ -731,6 +732,13 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
pageSpaces = (PageFreeSpaceInfo *) palloc(maxFreePages * sizeof(PageFreeSpaceInfo));
nFreePages = 0;
/* Create a temporary memory context to run _bt_pagedel in */
mycontext = AllocSetContextCreate(CurrentMemoryContext,
"_bt_pagedel",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
/*
* Scan through all pages of index, except metapage. (Any pages added
* after we start the scan will not be examined; this should be fine,
......@@ -745,17 +753,53 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
buf = _bt_getbuf(rel, blkno, BT_READ);
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
if (P_ISDELETED(opaque))
if (_bt_page_recyclable(page))
{
/* XXX if safe-to-reclaim... */
/* Okay to recycle this page */
if (nFreePages < maxFreePages)
{
pageSpaces[nFreePages].blkno = blkno;
/* The avail-space value is bogus, but must be < BLCKSZ */
/* claimed avail-space must be < BLCKSZ */
pageSpaces[nFreePages].avail = BLCKSZ-1;
nFreePages++;
}
}
else if ((opaque->btpo_flags & BTP_HALF_DEAD) ||
P_FIRSTDATAKEY(opaque) > PageGetMaxOffsetNumber(page))
{
/* Empty, try to delete */
int ndel;
/* Run pagedel in a temp context to avoid memory leakage */
MemoryContextReset(mycontext);
oldcontext = MemoryContextSwitchTo(mycontext);
ndel = _bt_pagedel(rel, buf, info->vacuum_full);
pages_deleted += ndel;
/*
* During VACUUM FULL it's okay to recycle deleted pages
* immediately, since there can be no other transactions
* scanning the index. Note that we will only recycle the
* current page and not any parent pages that _bt_pagedel
* might have recursed to; this seems reasonable in the name
* of simplicity. (Trying to do otherwise would mean we'd
* have to sort the list of recyclable pages we're building.)
*/
if (ndel && info->vacuum_full)
{
if (nFreePages < maxFreePages)
{
pageSpaces[nFreePages].blkno = blkno;
/* claimed avail-space must be < BLCKSZ */
pageSpaces[nFreePages].avail = BLCKSZ-1;
nFreePages++;
}
}
MemoryContextSwitchTo(oldcontext);
continue; /* pagedel released buffer */
}
_bt_relbuf(rel, buf);
}
......@@ -768,6 +812,13 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
pfree(pageSpaces);
MemoryContextDelete(mycontext);
if (pages_deleted > 0)
elog(info->message_level, "Index %s: %u pages, deleted %u; %u now free",
RelationGetRelationName(rel),
num_pages, pages_deleted, nFreePages);
/* update statistics */
stats->num_pages = num_pages;
stats->pages_free = nFreePages;
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.1 2003/02/21 00:06:21 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.2 2003/02/23 06:17:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -403,6 +403,171 @@ btree_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record)
UnlockAndWriteBuffer(buffer);
}
static void
btree_xlog_delete_page(bool redo, bool ismeta,
XLogRecPtr lsn, XLogRecord *record)
{
xl_btree_delete_page *xlrec = (xl_btree_delete_page *) XLogRecGetData(record);
Relation reln;
BlockNumber parent;
BlockNumber target;
BlockNumber leftsib;
BlockNumber rightsib;
Buffer buffer;
Page page;
BTPageOpaque pageop;
char *op = (redo) ? "redo" : "undo";
reln = XLogOpenRelation(redo, RM_BTREE_ID, xlrec->target.node);
if (!RelationIsValid(reln))
return;
parent = ItemPointerGetBlockNumber(&(xlrec->target.tid));
target = xlrec->deadblk;
leftsib = xlrec->leftblk;
rightsib = xlrec->rightblk;
/* parent page */
if (redo && !(record->xl_info & XLR_BKP_BLOCK_1))
{
buffer = XLogReadBuffer(false, reln, parent);
if (!BufferIsValid(buffer))
elog(PANIC, "btree_delete_page_redo: parent block unfound");
page = (Page) BufferGetPage(buffer);
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
if (PageIsNew((PageHeader) page))
elog(PANIC, "btree_delete_page_redo: uninitialized parent page");
if (XLByteLE(lsn, PageGetLSN(page)))
{
UnlockAndReleaseBuffer(buffer);
}
else
{
OffsetNumber poffset;
poffset = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
if (poffset >= PageGetMaxOffsetNumber(page))
{
Assert(poffset == P_FIRSTDATAKEY(pageop));
PageIndexTupleDelete(page, poffset);
pageop->btpo_flags |= BTP_HALF_DEAD;
}
else
{
ItemId itemid;
BTItem btitem;
OffsetNumber nextoffset;
itemid = PageGetItemId(page, poffset);
btitem = (BTItem) PageGetItem(page, itemid);
ItemPointerSet(&(btitem->bti_itup.t_tid), rightsib, P_HIKEY);
nextoffset = OffsetNumberNext(poffset);
PageIndexTupleDelete(page, nextoffset);
}
PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID);
UnlockAndWriteBuffer(buffer);
}
}
/* Fix left-link of right sibling */
if (redo && !(record->xl_info & XLR_BKP_BLOCK_2))
{
buffer = XLogReadBuffer(false, reln, rightsib);
if (!BufferIsValid(buffer))
elog(PANIC, "btree_delete_page_redo: lost right sibling");
page = (Page) BufferGetPage(buffer);
if (PageIsNew((PageHeader) page))
elog(PANIC, "btree_delete_page_redo: uninitialized right sibling");
if (XLByteLE(lsn, PageGetLSN(page)))
{
UnlockAndReleaseBuffer(buffer);
}
else
{
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
pageop->btpo_prev = leftsib;
PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID);
UnlockAndWriteBuffer(buffer);
}
}
/* Fix right-link of left sibling, if any */
if (redo && !(record->xl_info & XLR_BKP_BLOCK_3))
{
if (leftsib != P_NONE)
{
buffer = XLogReadBuffer(false, reln, leftsib);
if (!BufferIsValid(buffer))
elog(PANIC, "btree_delete_page_redo: lost left sibling");
page = (Page) BufferGetPage(buffer);
if (PageIsNew((PageHeader) page))
elog(PANIC, "btree_delete_page_redo: uninitialized left sibling");
if (XLByteLE(lsn, PageGetLSN(page)))
{
UnlockAndReleaseBuffer(buffer);
}
else
{
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
pageop->btpo_next = rightsib;
PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID);
UnlockAndWriteBuffer(buffer);
}
}
}
/* Rewrite target page as empty deleted page */
buffer = XLogReadBuffer(false, reln, target);
if (!BufferIsValid(buffer))
elog(PANIC, "btree_delete_page_%s: lost target page", op);
page = (Page) BufferGetPage(buffer);
if (redo)
_bt_pageinit(page, BufferGetPageSize(buffer));
else if (PageIsNew((PageHeader) page))
elog(PANIC, "btree_delete_page_undo: uninitialized target page");
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
if (redo)
{
pageop->btpo_prev = leftsib;
pageop->btpo_next = rightsib;
pageop->btpo.xact = FrozenTransactionId;
pageop->btpo_flags = BTP_DELETED;
PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID);
UnlockAndWriteBuffer(buffer);
}
else
{
/* undo */
if (XLByteLT(PageGetLSN(page), lsn))
elog(PANIC, "btree_delete_page_undo: bad left sibling LSN");
elog(PANIC, "btree_delete_page_undo: unimplemented");
}
/* Update metapage if needed */
if (redo) /* metapage changes not undoable */
{
if (ismeta)
{
xl_btree_metadata md;
memcpy(&md, (char *) xlrec + SizeOfBtreeDeletePage,
sizeof(xl_btree_metadata));
_bt_restore_meta(reln, lsn,
md.root, md.level,
md.fastroot, md.fastlevel);
}
}
}
static void
btree_xlog_newroot(bool redo, XLogRecPtr lsn, XLogRecord *record)
{
......@@ -534,8 +699,10 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record)
btree_xlog_delete(true, lsn, record);
break;
case XLOG_BTREE_DELETE_PAGE:
btree_xlog_delete_page(true, false, lsn, record);
break;
case XLOG_BTREE_DELETE_PAGE_META:
// ???
btree_xlog_delete_page(true, true, lsn, record);
break;
case XLOG_BTREE_NEWROOT:
btree_xlog_newroot(true, lsn, record);
......@@ -583,8 +750,10 @@ btree_undo(XLogRecPtr lsn, XLogRecord *record)
btree_xlog_delete(false, lsn, record);
break;
case XLOG_BTREE_DELETE_PAGE:
btree_xlog_delete_page(false, false, lsn, record);
break;
case XLOG_BTREE_DELETE_PAGE_META:
// ???
btree_xlog_delete_page(false, true, lsn, record);
break;
case XLOG_BTREE_NEWROOT:
btree_xlog_newroot(false, lsn, record);
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.14 2002/09/20 19:56:01 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.15 2003/02/23 06:17:13 tgl Exp $
*
*
* NOTES:
......@@ -681,7 +681,9 @@ free_chunk_chain(FSMChunk *fchunk)
* Look to see if a page with at least the specified amount of space is
* available in the given FSMRelation. If so, return its page number,
* and advance the nextPage counter so that the next inquiry will return
* a different page if possible. Return InvalidBlockNumber if no success.
* a different page if possible; also update the entry to show that the
* requested space is not available anymore. Return InvalidBlockNumber
* if no success.
*/
static BlockNumber
find_free_space(FSMRelation *fsmrel, Size spaceNeeded)
......@@ -713,6 +715,12 @@ find_free_space(FSMRelation *fsmrel, Size spaceNeeded)
/* Check the next page */
if ((Size) curChunk->bytes[chunkRelIndex] >= spaceNeeded)
{
/*
* Found what we want --- adjust the entry. In theory we could
* delete the entry immediately if it drops below threshold,
* but it seems better to wait till we next need space.
*/
curChunk->bytes[chunkRelIndex] -= (ItemLength) spaceNeeded;
fsmrel->nextPage = pageIndex + 1;
return curChunk->pages[chunkRelIndex];
}
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: itup.h,v 1.36 2002/08/25 17:20:01 tgl Exp $
* $Id: itup.h,v 1.37 2003/02/23 06:17:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -133,11 +133,11 @@ typedef InsertIndexResultData *InsertIndexResult;
)
/* indextuple.h */
/* routines in indextuple.c */
extern IndexTuple index_formtuple(TupleDesc tupleDescriptor,
Datum *value, char *null);
extern Datum nocache_index_getattr(IndexTuple tup, int attnum,
TupleDesc tupleDesc, bool *isnull);
extern void CopyIndexTuple(IndexTuple source, IndexTuple *target);
extern IndexTuple CopyIndexTuple(IndexTuple source);
#endif /* ITUP_H */
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: nbtree.h,v 1.65 2003/02/22 00:45:05 tgl Exp $
* $Id: nbtree.h,v 1.66 2003/02/23 06:17:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -118,6 +118,8 @@ typedef struct BTItemData
typedef BTItemData *BTItem;
#define CopyBTItem(btitem) ((BTItem) CopyIndexTuple((IndexTuple) (btitem)))
/*
* For XLOG: size without alignment. Sizeof works as long as
* IndexTupleData has exactly 8 bytes.
......@@ -434,6 +436,7 @@ extern Datum btvacuumcleanup(PG_FUNCTION_ARGS);
*/
extern InsertIndexResult _bt_doinsert(Relation rel, BTItem btitem,
bool index_is_unique, Relation heapRel);
extern Buffer _bt_getstackbuf(Relation rel, BTStack stack, int access);
extern void _bt_insert_parent(Relation rel, Buffer buf, Buffer rbuf,
BTStack stack, bool is_root, bool is_only);
......@@ -448,8 +451,10 @@ extern void _bt_relbuf(Relation rel, Buffer buf);
extern void _bt_wrtbuf(Relation rel, Buffer buf);
extern void _bt_wrtnorelbuf(Relation rel, Buffer buf);
extern void _bt_pageinit(Page page, Size size);
extern bool _bt_page_recyclable(Page page);
extern void _bt_metaproot(Relation rel, BlockNumber rootbknum, uint32 level);
extern void _bt_itemdel(Relation rel, Buffer buf, ItemPointer tid);
extern int _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full);
/*
* prototypes for functions in nbtsearch.c
......@@ -488,7 +493,6 @@ extern BTItem _bt_formitem(IndexTuple itup);
/*
* prototypes for functions in nbtsort.c
*/
typedef struct BTSpool BTSpool; /* opaque type known only within nbtsort.c */
extern BTSpool *_bt_spoolinit(Relation index, bool isunique);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment