Commit a8d539f1 authored by Tom Lane's avatar Tom Lane

To support external compression of archived WAL data, add a flag bit to

WAL records that shows whether it is safe to remove full-page images
(ie, whether or not an on-line backup was in progress when the WAL entry
was made).  Also make provision for an XLOG_NOOP record type that can be
used to fill in the extra space when decompressing the data for restore.

This is the portion of Koichi Suzuki's "full page writes" patch that
has to go into the core database.  The remainder of that work is two
external compression and decompression programs, which for the time being
will undergo separate development on pgfoundry.  Per discussion.

Also, twiddle the handling of BTREE_SPLIT records to ensure it'll be
possible to compress them (the previous coding caused essential info
to be omitted).  The other commonly-used record types seem OK already,
with the possible exception of GIN and GIST WAL records, which I don't
understand well enough to opine on.
parent 2f2717d1
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.156 2007/04/11 20:47:37 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.157 2007/05/20 21:08:19 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1034,21 +1034,23 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, ...@@ -1034,21 +1034,23 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
* Log the new item and its offset, if it was inserted on the left * Log the new item and its offset, if it was inserted on the left
* page. (If it was put on the right page, we don't need to explicitly * page. (If it was put on the right page, we don't need to explicitly
* WAL log it because it's included with all the other items on the * WAL log it because it's included with all the other items on the
* right page.) Show these as belonging to the left page buffer, * right page.) Show the new item as belonging to the left page buffer,
* so that they are not stored if XLogInsert decides it needs a * so that it is not stored if XLogInsert decides it needs a full-page
* full-page image of the left page. * image of the left page. We store the offset anyway, though, to
* support archive compression of these records.
*/ */
if (newitemonleft) if (newitemonleft)
{ {
lastrdata->next = lastrdata + 1; lastrdata->next = lastrdata + 1;
lastrdata++; lastrdata++;
lastrdata->data = (char *) &newitemoff; lastrdata->data = (char *) &newitemoff;
lastrdata->len = sizeof(OffsetNumber); lastrdata->len = sizeof(OffsetNumber);
lastrdata->buffer = buf; /* backup block 1 */ lastrdata->buffer = InvalidBuffer;
lastrdata->buffer_std = true;
lastrdata->next = lastrdata + 1; lastrdata->next = lastrdata + 1;
lastrdata++; lastrdata++;
lastrdata->data = (char *) newitem; lastrdata->data = (char *) newitem;
lastrdata->len = MAXALIGN(newitemsz); lastrdata->len = MAXALIGN(newitemsz);
lastrdata->buffer = buf; /* backup block 1 */ lastrdata->buffer = buf; /* backup block 1 */
...@@ -1064,6 +1066,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, ...@@ -1064,6 +1066,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
*/ */
lastrdata->next = lastrdata + 1; lastrdata->next = lastrdata + 1;
lastrdata++; lastrdata++;
lastrdata->data = NULL; lastrdata->data = NULL;
lastrdata->len = 0; lastrdata->len = 0;
lastrdata->buffer = buf; /* backup block 1 */ lastrdata->buffer = buf; /* backup block 1 */
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.43 2007/04/11 20:47:38 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.44 2007/05/20 21:08:19 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -292,14 +292,17 @@ btree_xlog_split(bool onleft, bool isroot, ...@@ -292,14 +292,17 @@ btree_xlog_split(bool onleft, bool isroot,
} }
/* Extract newitem and newitemoff, if present */ /* Extract newitem and newitemoff, if present */
if (onleft && !(record->xl_info & XLR_BKP_BLOCK_1)) if (onleft)
{ {
IndexTupleData itupdata;
/* Extract the offset (still assuming 16-bit alignment) */ /* Extract the offset (still assuming 16-bit alignment) */
memcpy(&newitemoff, datapos, sizeof(OffsetNumber)); memcpy(&newitemoff, datapos, sizeof(OffsetNumber));
datapos += sizeof(OffsetNumber); datapos += sizeof(OffsetNumber);
datalen -= sizeof(OffsetNumber); datalen -= sizeof(OffsetNumber);
}
if (onleft && !(record->xl_info & XLR_BKP_BLOCK_1))
{
IndexTupleData itupdata;
/* /*
* We need to copy the tuple header to apply IndexTupleDSize, because * We need to copy the tuple header to apply IndexTupleDSize, because
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.268 2007/04/30 21:01:52 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.269 2007/05/20 21:08:19 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -783,6 +783,19 @@ begin:; ...@@ -783,6 +783,19 @@ begin:;
} }
} }
/*
* If we backed up any full blocks and online backup is not in progress,
* mark the backup blocks as removable. This allows the WAL archiver to
* know whether it is safe to compress archived WAL data by transforming
* full-block records into the non-full-block format.
*
* Note: we could just set the flag whenever !forcePageWrites, but
* defining it like this leaves the info bit free for some potential
* other use in records without any backup blocks.
*/
if ((info & XLR_BKP_BLOCK_MASK) && !Insert->forcePageWrites)
info |= XLR_BKP_REMOVABLE;
/* /*
* If there isn't enough space on the current XLOG page for a record * If there isn't enough space on the current XLOG page for a record
* header, advance to the next page (leaving the unused space as zeroes). * header, advance to the next page (leaving the unused space as zeroes).
...@@ -5868,6 +5881,10 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -5868,6 +5881,10 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
RecoveryRestartPoint(&checkPoint); RecoveryRestartPoint(&checkPoint);
} }
else if (info == XLOG_NOOP)
{
/* nothing to do here */
}
else if (info == XLOG_SWITCH) else if (info == XLOG_SWITCH)
{ {
/* nothing to do here */ /* nothing to do here */
...@@ -5894,6 +5911,10 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec) ...@@ -5894,6 +5911,10 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
checkpoint->nextMultiOffset, checkpoint->nextMultiOffset,
(info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online"); (info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online");
} }
else if (info == XLOG_NOOP)
{
appendStringInfo(buf, "xlog no-op");
}
else if (info == XLOG_NEXTOID) else if (info == XLOG_NEXTOID)
{ {
Oid nextOid; Oid nextOid;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.76 2007/01/05 22:19:51 momjian Exp $ * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.77 2007/05/20 21:08:19 tgl Exp $
*/ */
#ifndef XLOG_H #ifndef XLOG_H
#define XLOG_H #define XLOG_H
...@@ -66,8 +66,7 @@ typedef struct XLogRecord ...@@ -66,8 +66,7 @@ typedef struct XLogRecord
/* /*
* If we backed up any disk blocks with the XLOG record, we use flag bits in * If we backed up any disk blocks with the XLOG record, we use flag bits in
* xl_info to signal it. We support backup of up to 3 disk blocks per XLOG * xl_info to signal it. We support backup of up to 3 disk blocks per XLOG
* record. (Could support 4 if we cared to dedicate all the xl_info bits for * record.
* this purpose; currently bit 0 of xl_info is unused and available.)
*/ */
#define XLR_BKP_BLOCK_MASK 0x0E /* all info bits used for bkp blocks */ #define XLR_BKP_BLOCK_MASK 0x0E /* all info bits used for bkp blocks */
#define XLR_MAX_BKP_BLOCKS 3 #define XLR_MAX_BKP_BLOCKS 3
...@@ -76,6 +75,15 @@ typedef struct XLogRecord ...@@ -76,6 +75,15 @@ typedef struct XLogRecord
#define XLR_BKP_BLOCK_2 XLR_SET_BKP_BLOCK(1) /* 0x04 */ #define XLR_BKP_BLOCK_2 XLR_SET_BKP_BLOCK(1) /* 0x04 */
#define XLR_BKP_BLOCK_3 XLR_SET_BKP_BLOCK(2) /* 0x02 */ #define XLR_BKP_BLOCK_3 XLR_SET_BKP_BLOCK(2) /* 0x02 */
/*
* Bit 0 of xl_info is set if the backed-up blocks could safely be removed
* from a compressed version of XLOG (that is, they are backed up only to
* prevent partial-page-write problems, and not to ensure consistency of PITR
* recovery). The compression algorithm would need to extract data from the
* blocks to create an equivalent non-full-page XLOG record.
*/
#define XLR_BKP_REMOVABLE 0x01
/* /*
* Sometimes we log records which are out of transaction control. * Sometimes we log records which are out of transaction control.
* Rmgr may "or" XLOG_NO_TRAN into info passed to XLogInsert to indicate this. * Rmgr may "or" XLOG_NO_TRAN into info passed to XLogInsert to indicate this.
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.20 2007/04/30 21:01:53 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.21 2007/05/20 21:08:19 tgl Exp $
*/ */
#ifndef XLOG_INTERNAL_H #ifndef XLOG_INTERNAL_H
#define XLOG_INTERNAL_H #define XLOG_INTERNAL_H
...@@ -71,7 +71,7 @@ typedef struct XLogContRecord ...@@ -71,7 +71,7 @@ typedef struct XLogContRecord
/* /*
* Each page of XLOG file has a header like this: * Each page of XLOG file has a header like this:
*/ */
#define XLOG_PAGE_MAGIC 0xD061 /* can be used as WAL version indicator */ #define XLOG_PAGE_MAGIC 0xD062 /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData typedef struct XLogPageHeaderData
{ {
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.37 2007/04/03 04:14:26 tgl Exp $ * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.38 2007/05/20 21:08:19 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -44,6 +44,7 @@ typedef struct CheckPoint ...@@ -44,6 +44,7 @@ typedef struct CheckPoint
/* XLOG info values for XLOG rmgr */ /* XLOG info values for XLOG rmgr */
#define XLOG_CHECKPOINT_SHUTDOWN 0x00 #define XLOG_CHECKPOINT_SHUTDOWN 0x00
#define XLOG_CHECKPOINT_ONLINE 0x10 #define XLOG_CHECKPOINT_ONLINE 0x10
#define XLOG_NOOP 0x20
#define XLOG_NEXTOID 0x30 #define XLOG_NEXTOID 0x30
#define XLOG_SWITCH 0x40 #define XLOG_SWITCH 0x40
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment