Commit a3115f0d authored by Heikki Linnakangas's avatar Heikki Linnakangas

Only WAL-log the modified portion in an UPDATE, if possible.

When a row is updated, and the new tuple version is put on the same page as
the old one, only WAL-log the part of the new tuple that's not identical to
the old. This saves significantly on the amount of WAL that needs to be
written, in the common case that most fields are not modified.

Amit Kapila, with a lot of back and forth with me, Robert Haas, and others.
parent 17d787a3
This diff is collapsed.
......@@ -2335,6 +2335,29 @@ XLogRecPtrToBytePos(XLogRecPtr ptr)
return result;
}
/*
* Determine whether the buffer referenced has to be backed up.
*
* Since we don't yet have the insert lock, fullPageWrites and forcePageWrites
* could change later, so the result should be used for optimization purposes
* only.
*/
bool
XLogCheckBufferNeedsBackup(Buffer buffer)
{
bool doPageWrites;
Page page;
page = BufferGetPage(buffer);
doPageWrites = XLogCtl->Insert.fullPageWrites || XLogCtl->Insert.forcePageWrites;
if (doPageWrites && PageGetLSN(page) <= RedoRecPtr)
return true; /* buffer requires backup */
return false; /* buffer does not need to be backed up */
}
/*
* Determine whether the buffer referenced by an XLogRecData item has to
* be backed up, and if so fill a BkpBlock struct for it. In any case
......
......@@ -67,6 +67,8 @@
#define XLOG_HEAP_CONTAINS_OLD_TUPLE (1<<2)
#define XLOG_HEAP_CONTAINS_OLD_KEY (1<<3)
#define XLOG_HEAP_CONTAINS_NEW_TUPLE (1<<4)
#define XLOG_HEAP_PREFIX_FROM_OLD (1<<5)
#define XLOG_HEAP_SUFFIX_FROM_OLD (1<<6)
/* convenience macro for checking whether any form of old tuple was logged */
#define XLOG_HEAP_CONTAINS_OLD \
......@@ -179,7 +181,22 @@ typedef struct xl_heap_update
ItemPointerData newtid; /* new inserted tuple id */
uint8 old_infobits_set; /* infomask bits to set on old tuple */
uint8 flags;
/* NEW TUPLE xl_heap_header AND TUPLE DATA FOLLOWS AT END OF STRUCT */
/*
* If XLOG_HEAP_PREFIX_FROM_OLD or XLOG_HEAP_SUFFIX_FROM_OLD flags are
* set, the prefix and/or suffix come next, as one or two uint16s.
*
* After that, xl_heap_header_len and new tuple data follow. The new
* tuple data and length don't include the prefix and suffix, which are
* copied from the old tuple on replay. The new tuple data is omitted if
* a full-page image of the page was taken (unless the
* XLOG_HEAP_CONTAINS_NEW_TUPLE flag is set, in which case it's included
* anyway).
*
* If XLOG_HEAP_CONTAINS_OLD_TUPLE or XLOG_HEAP_CONTAINS_OLD_KEY flags are
* set, another xl_heap_header_len struct and tuple data for the old tuple
* follows.
*/
} xl_heap_update;
#define SizeOfHeapUpdate (offsetof(xl_heap_update, flags) + sizeof(uint8))
......
......@@ -279,6 +279,7 @@ typedef struct CheckpointStatsData
extern CheckpointStatsData CheckpointStats;
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
extern void XLogFlush(XLogRecPtr RecPtr);
extern bool XLogBackgroundFlush(void);
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment