Commit 94e03330 authored by Tom Lane's avatar Tom Lane

Create a routine PageIndexMultiDelete() that replaces a loop around

PageIndexTupleDelete() with a single pass of compactification ---
logic mostly lifted from PageRepairFragmentation.  I noticed while
profiling that a VACUUM that's cleaning up a whole lot of deleted
tuples would spend as much as a third of its CPU time in
PageIndexTupleDelete; not too surprising considering the loop method
was roughly O(N^2) in the number of tuples involved.
parent 775d2830
......@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.81 2004/12/31 21:59:22 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.82 2005/03/22 06:17:03 tgl Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
......@@ -639,17 +639,12 @@ _bt_delitems(Relation rel, Buffer buf,
OffsetNumber *itemnos, int nitems)
{
Page page = BufferGetPage(buf);
int i;
/* No ereport(ERROR) until changes are logged */
START_CRIT_SECTION();
/*
* Delete the items in reverse order so we don't have to think about
* adjusting item numbers for previous deletions.
*/
for (i = nitems - 1; i >= 0; i--)
PageIndexTupleDelete(page, itemnos[i]);
/* Fix the page */
PageIndexMultiDelete(page, itemnos, nitems);
/* XLOG stuff */
if (!rel->rd_istemp)
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.19 2004/12/31 21:59:22 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.20 2005/03/22 06:17:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -411,12 +411,7 @@ btree_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record)
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
/* be careful to delete from back to front */
while (unused < unend)
{
unend--;
PageIndexTupleDelete(page, *unend);
}
PageIndexMultiDelete(page, unused, unend - unused);
}
PageSetLSN(page, lsn);
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.62 2004/12/31 22:01:10 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.63 2005/03/22 06:17:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -274,13 +274,14 @@ PageRestoreTempPage(Page tempPage, Page oldPage)
}
/*
* sorting support for PageRepairFragmentation
* sorting support for PageRepairFragmentation and PageIndexMultiDelete
*/
typedef struct itemIdSortData
{
int offsetindex; /* linp array index */
int itemoff; /* page offset of item data */
Size alignedlen; /* MAXALIGN(item data len) */
ItemIdData olditemid; /* used only in PageIndexMultiDelete */
} itemIdSortData;
typedef itemIdSortData *itemIdSort;
......@@ -297,7 +298,8 @@ itemoffcompare(const void *itemidp1, const void *itemidp2)
*
* Frees fragmented space on a page.
* It doesn't remove unused line pointers! Please don't change this.
* This routine is usable for heap pages only.
*
* This routine is usable for heap pages only, but see PageIndexMultiDelete.
*
* Returns number of unused line pointers on page. If "unused" is not NULL
* then the unused[] array is filled with indexes of unused line pointers.
......@@ -543,3 +545,135 @@ PageIndexTupleDelete(Page page, OffsetNumber offnum)
}
}
}
/*
* PageIndexMultiDelete
*
* This routine handles the case of deleting multiple tuples from an
* index page at once. It is considerably faster than a loop around
* PageIndexTupleDelete ... however, the caller *must* supply the array
* of item numbers to be deleted in item number order!
*/
void
PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
{
PageHeader phdr = (PageHeader) page;
Offset pd_lower = phdr->pd_lower;
Offset pd_upper = phdr->pd_upper;
Offset pd_special = phdr->pd_special;
itemIdSort itemidbase,
itemidptr;
ItemId lp;
int nline,
nused;
int i;
Size totallen;
Offset upper;
Size size;
unsigned offset;
int nextitm;
OffsetNumber offnum;
/*
* If there aren't very many items to delete, then retail
* PageIndexTupleDelete is the best way. Delete the items in reverse
* order so we don't have to think about adjusting item numbers for
* previous deletions.
*
* TODO: tune the magic number here
*/
if (nitems <= 2)
{
while (--nitems >= 0)
PageIndexTupleDelete(page, itemnos[nitems]);
return;
}
/*
* As with PageRepairFragmentation, paranoia seems justified.
*/
if (pd_lower < SizeOfPageHeaderData ||
pd_lower > pd_upper ||
pd_upper > pd_special ||
pd_special > BLCKSZ ||
pd_special != MAXALIGN(pd_special))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
pd_lower, pd_upper, pd_special)));
/*
* Scan the item pointer array and build a list of just the ones we
* are going to keep. Notice we do not modify the page yet, since
* we are still validity-checking.
*/
nline = PageGetMaxOffsetNumber(page);
itemidbase = (itemIdSort) palloc(sizeof(itemIdSortData) * nline);
itemidptr = itemidbase;
totallen = 0;
nused = 0;
nextitm = 0;
for (offnum = 1; offnum <= nline; offnum++)
{
lp = PageGetItemId(page, offnum);
size = ItemIdGetLength(lp);
offset = ItemIdGetOffset(lp);
if (offset < pd_upper ||
(offset + size) > pd_special ||
offset != MAXALIGN(offset))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("corrupted item pointer: offset = %u, size = %u",
offset, (unsigned int) size)));
if (nextitm < nitems && offnum == itemnos[nextitm])
{
/* skip item to be deleted */
nextitm++;
}
else
{
itemidptr->offsetindex = nused; /* where it will go */
itemidptr->itemoff = offset;
itemidptr->olditemid = *lp;
itemidptr->alignedlen = MAXALIGN(size);
totallen += itemidptr->alignedlen;
itemidptr++;
nused++;
}
}
/* this will catch invalid or out-of-order itemnos[] */
if (nextitm != nitems)
elog(ERROR, "incorrect index offsets supplied");
if (totallen > (Size) (pd_special - pd_lower))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("corrupted item lengths: total %u, available space %u",
(unsigned int) totallen, pd_special - pd_lower)));
/* sort itemIdSortData array into decreasing itemoff order */
qsort((char *) itemidbase, nused, sizeof(itemIdSortData),
itemoffcompare);
/* compactify page and install new itemids */
upper = pd_special;
for (i = 0, itemidptr = itemidbase; i < nused; i++, itemidptr++)
{
lp = PageGetItemId(page, itemidptr->offsetindex + 1);
upper -= itemidptr->alignedlen;
memmove((char *) page + upper,
(char *) page + itemidptr->itemoff,
itemidptr->alignedlen);
*lp = itemidptr->olditemid;
lp->lp_off = upper;
}
phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
phdr->pd_upper = upper;
pfree(itemidbase);
}
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.63 2004/12/31 22:03:42 pgsql Exp $
* $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.64 2005/03/22 06:17:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -323,5 +323,6 @@ extern void PageRestoreTempPage(Page tempPage, Page oldPage);
extern int PageRepairFragmentation(Page page, OffsetNumber *unused);
extern Size PageGetFreeSpace(Page page);
extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
#endif /* BUFPAGE_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment