Commit d2e5e20e authored by Peter Geoghegan's avatar Peter Geoghegan

Add xl_btree_delete optimization.

Commit 558a9165 taught _bt_delitems_delete() to produce its own XID
horizon on the primary.  Standbys no longer needed to generate their own
latestRemovedXid, since they could just use the explicitly logged value
from the primary instead.  The deleted offset numbers array from the
xl_btree_delete WAL record was no longer used by the REDO routine for
anything other than deleting the items.

This enables a minor optimization:  We now treat the array as buffer
state, not generic WAL data, following _bt_delitems_vacuum()'s example.
This should be a minor win, since it allows us to avoid including the
deleted items array in cases where XLogInsert() stores the whole buffer
anyway.  The primary goal here is to make the code more maintainable,
though.  Removing inessential differences between the two functions
highlights the fundamental differences that remain.

Also change xl_btree_delete to use uint32 for the size of the array of
item offsets being deleted.  This brings xl_btree_delete closer to
xl_btree_vacuum.  Furthermore, it seems like a good idea to use an
explicit-width integer type (the field was previously an "int").

Bump XLOG_PAGE_MAGIC because xl_btree_delete changed.

Discussion: https://postgr.es/m/CAH2-Wzkz4TjmezzfAbaV1zYrh=fr0bCpzuJTvBe5iUQ3aUPsCQ@mail.gmail.com
parent 56a3921a
......@@ -961,20 +961,15 @@ _bt_page_recyclable(Page page)
}
/*
* Delete item(s) from a btree page during VACUUM.
*
* This must only be used for deleting leaf items. Deleting an item on a
* non-leaf page has to be done as part of an atomic action that includes
* deleting the page it points to.
* Delete item(s) from a btree leaf page during VACUUM.
*
* This routine assumes that the caller has a super-exclusive write lock on
* the buffer. Also, the given deletable array *must* be sorted in ascending
* order.
*
* We record VACUUMs and b-tree deletes differently in WAL. Deletes must
* generate recovery conflicts by accessing the heap inline, whereas VACUUMs
* can rely on the initial heap scan taking care of the problem (pruning would
* have generated the conflicts needed for hot standby already).
* generate their own latestRemovedXid by accessing the heap directly, whereas
* VACUUMs rely on the initial heap scan taking care of it indirectly.
*/
void
_bt_delitems_vacuum(Relation rel, Buffer buf,
......@@ -1030,9 +1025,9 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
XLogRegisterData((char *) &xlrec_vacuum, SizeOfBtreeVacuum);
/*
* The target-offsets array is not in the buffer, but pretend that it
* is. When XLogInsert stores the whole buffer, the offsets array
* need not be stored too.
* The deletable array is not in the buffer, but pretend that it is.
* When XLogInsert stores the whole buffer, the array need not be
* stored too.
*/
XLogRegisterBufData(0, (char *) deletable,
ndeletable * sizeof(OffsetNumber));
......@@ -1046,21 +1041,19 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
}
/*
* Delete item(s) from a btree page during single-page cleanup.
*
* As above, must only be used on leaf pages.
* Delete item(s) from a btree leaf page during single-page cleanup.
*
* This routine assumes that the caller has pinned and write locked the
* buffer. Also, the given itemnos *must* appear in increasing order in the
* array.
* buffer. Also, the given deletable array *must* be sorted in ascending
* order.
*
* This is nearly the same as _bt_delitems_vacuum as far as what it does to
* the page, but it needs to generate its own recovery conflicts by accessing
* the heap. See comments for _bt_delitems_vacuum.
* the page, but it needs to generate its own latestRemovedXid by accessing
* the heap. This is used by the REDO routine to generate recovery conflicts.
*/
void
_bt_delitems_delete(Relation rel, Buffer buf,
OffsetNumber *itemnos, int nitems,
OffsetNumber *deletable, int ndeletable,
Relation heapRel)
{
Page page = BufferGetPage(buf);
......@@ -1068,18 +1061,18 @@ _bt_delitems_delete(Relation rel, Buffer buf,
TransactionId latestRemovedXid = InvalidTransactionId;
/* Shouldn't be called unless there's something to do */
Assert(nitems > 0);
Assert(ndeletable > 0);
if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
latestRemovedXid =
index_compute_xid_horizon_for_tuples(rel, heapRel, buf,
itemnos, nitems);
deletable, ndeletable);
/* No ereport(ERROR) until changes are logged */
START_CRIT_SECTION();
/* Fix the page */
PageIndexMultiDelete(page, itemnos, nitems);
PageIndexMultiDelete(page, deletable, ndeletable);
/*
* Unlike _bt_delitems_vacuum, we *must not* clear the vacuum cycle ID,
......@@ -1098,18 +1091,19 @@ _bt_delitems_delete(Relation rel, Buffer buf,
xl_btree_delete xlrec_delete;
xlrec_delete.latestRemovedXid = latestRemovedXid;
xlrec_delete.nitems = nitems;
xlrec_delete.ndeleted = ndeletable;
XLogBeginInsert();
XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
XLogRegisterData((char *) &xlrec_delete, SizeOfBtreeDelete);
/*
* We need the target-offsets array whether or not we store the whole
* buffer, to allow us to find the latestRemovedXid on a standby
* server.
* The deletable array is not in the buffer, but pretend that it is.
* When XLogInsert stores the whole buffer, the array need not be
* stored too.
*/
XLogRegisterData((char *) itemnos, nitems * sizeof(OffsetNumber));
XLogRegisterBufData(0, (char *) deletable,
ndeletable * sizeof(OffsetNumber));
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE);
......
......@@ -449,16 +449,11 @@ btree_xlog_delete(XLogReaderState *record)
*/
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = (Page) BufferGetPage(buffer);
if (XLogRecGetDataLen(record) > SizeOfBtreeDelete)
{
OffsetNumber *unused;
char *ptr = XLogRecGetBlockData(record, 0, NULL);
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
page = (Page) BufferGetPage(buffer);
PageIndexMultiDelete(page, unused, xlrec->nitems);
}
PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted);
/* Mark the page as not containing any LP_DEAD items */
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
......
......@@ -53,8 +53,8 @@ btree_desc(StringInfo buf, XLogReaderState *record)
{
xl_btree_delete *xlrec = (xl_btree_delete *) rec;
appendStringInfo(buf, "%d items, latest removed xid %u",
xlrec->nitems, xlrec->latestRemovedXid);
appendStringInfo(buf, "latestRemovedXid %u; ndeleted %u",
xlrec->latestRemovedXid, xlrec->ndeleted);
break;
}
case XLOG_BTREE_MARK_PAGE_HALFDEAD:
......
......@@ -779,7 +779,8 @@ extern bool _bt_page_recyclable(Page page);
extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
OffsetNumber *deletable, int ndeletable);
extern void _bt_delitems_delete(Relation rel, Buffer buf,
OffsetNumber *itemnos, int nitems, Relation heapRel);
OffsetNumber *deletable, int ndeletable,
Relation heapRel);
extern int _bt_pagedel(Relation rel, Buffer buf);
/*
......
......@@ -126,12 +126,12 @@ typedef struct xl_btree_split
typedef struct xl_btree_delete
{
TransactionId latestRemovedXid;
int nitems;
uint32 ndeleted;
/* TARGET OFFSET NUMBERS FOLLOW AT THE END */
/* DELETED TARGET OFFSET NUMBERS FOLLOW */
} xl_btree_delete;
#define SizeOfBtreeDelete (offsetof(xl_btree_delete, nitems) + sizeof(int))
#define SizeOfBtreeDelete (offsetof(xl_btree_delete, ndeleted) + sizeof(uint32))
/*
* This is what we need to know about page reuse within btree. This record
......
......@@ -31,7 +31,7 @@
/*
* Each page of XLOG file has a header like this:
*/
#define XLOG_PAGE_MAGIC 0xD103 /* can be used as WAL version indicator */
#define XLOG_PAGE_MAGIC 0xD104 /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment