Commit 20ba5ca6 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Move WAL continuation record information to WAL page header.

The continuation record only contained one field, xl_rem_len, so it makes
things simpler to just include it in the WAL page header. This wastes four
bytes on pages that don't begin with a continuation from previos page, plus
four bytes on every page, because of padding.

The motivation of this is to make it easier to calculate how much space a
WAL record needs. Before this patch, it depended on how many page boundaries
the record crosses. The motivation of that, in turn, is to separate the
allocation of space in the WAL from the copying of the record data to the
allocated space. Keeping the calculation of space required simple helps to
keep the critical section of allocating the space from WAL short. But that's
not included in this patch yet.

Bump WAL version number again, as this is an incompatible change.
parent dfda6eba
...@@ -696,7 +696,6 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) ...@@ -696,7 +696,6 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
{ {
XLogCtlInsert *Insert = &XLogCtl->Insert; XLogCtlInsert *Insert = &XLogCtl->Insert;
XLogRecord *record; XLogRecord *record;
XLogContRecord *contrecord;
XLogRecPtr RecPtr; XLogRecPtr RecPtr;
XLogRecPtr WriteRqst; XLogRecPtr WriteRqst;
uint32 freespace; uint32 freespace;
...@@ -1085,9 +1084,7 @@ begin:; ...@@ -1085,9 +1084,7 @@ begin:;
curridx = Insert->curridx; curridx = Insert->curridx;
/* Insert cont-record header */ /* Insert cont-record header */
Insert->currpage->xlp_info |= XLP_FIRST_IS_CONTRECORD; Insert->currpage->xlp_info |= XLP_FIRST_IS_CONTRECORD;
contrecord = (XLogContRecord *) Insert->currpos; Insert->currpage->xlp_rem_len = write_len;
contrecord->xl_rem_len = write_len;
Insert->currpos += SizeOfXLogContRecord;
freespace = INSERT_FREESPACE(Insert); freespace = INSERT_FREESPACE(Insert);
} }
...@@ -3941,7 +3938,8 @@ retry: ...@@ -3941,7 +3938,8 @@ retry:
if (total_len > len) if (total_len > len)
{ {
/* Need to reassemble record */ /* Need to reassemble record */
XLogContRecord *contrecord; char *contrecord;
XLogPageHeader pageHeader;
XLogRecPtr pagelsn; XLogRecPtr pagelsn;
uint32 gotlen = len; uint32 gotlen = len;
...@@ -3969,30 +3967,30 @@ retry: ...@@ -3969,30 +3967,30 @@ retry:
readOff))); readOff)));
goto next_record_is_invalid; goto next_record_is_invalid;
} }
pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf); pageHeader = (XLogPageHeader) readBuf;
contrecord = (XLogContRecord *) ((char *) readBuf + pageHeaderSize); pageHeaderSize = XLogPageHeaderSize(pageHeader);
if (contrecord->xl_rem_len == 0 || contrecord = (char *) readBuf + pageHeaderSize;
total_len != (contrecord->xl_rem_len + gotlen)) if (pageHeader->xlp_rem_len == 0 ||
total_len != (pageHeader->xlp_rem_len + gotlen))
{ {
char fname[MAXFNAMELEN]; char fname[MAXFNAMELEN];
XLogFileName(fname, curFileTLI, readSegNo); XLogFileName(fname, curFileTLI, readSegNo);
ereport(emode_for_corrupt_record(emode, *RecPtr), ereport(emode_for_corrupt_record(emode, *RecPtr),
(errmsg("invalid contrecord length %u in log segment %s, offset %u", (errmsg("invalid contrecord length %u in log segment %s, offset %u",
contrecord->xl_rem_len, pageHeader->xlp_rem_len,
XLogFileNameP(curFileTLI, readSegNo), XLogFileNameP(curFileTLI, readSegNo),
readOff))); readOff)));
goto next_record_is_invalid; goto next_record_is_invalid;
} }
len = XLOG_BLCKSZ - pageHeaderSize - SizeOfXLogContRecord; len = XLOG_BLCKSZ - pageHeaderSize;
if (contrecord->xl_rem_len > len) if (pageHeader->xlp_rem_len > len)
{ {
memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord, len); memcpy(buffer, (char *) contrecord, len);
gotlen += len; gotlen += len;
buffer += len; buffer += len;
continue; continue;
} }
memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord, memcpy(buffer, (char *) contrecord, pageHeader->xlp_rem_len);
contrecord->xl_rem_len);
break; break;
} }
if (!RecordIsValid(record, *RecPtr, emode)) if (!RecordIsValid(record, *RecPtr, emode))
...@@ -4000,8 +3998,7 @@ retry: ...@@ -4000,8 +3998,7 @@ retry:
pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf); pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
XLogSegNoOffsetToRecPtr( XLogSegNoOffsetToRecPtr(
readSegNo, readSegNo,
readOff + pageHeaderSize + readOff + pageHeaderSize + MAXALIGN(pageHeader->xlp_rem_len),
MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len),
EndRecPtr); EndRecPtr);
ReadRecPtr = *RecPtr; ReadRecPtr = *RecPtr;
/* needn't worry about XLOG SWITCH, it can't cross page boundaries */ /* needn't worry about XLOG SWITCH, it can't cross page boundaries */
......
...@@ -48,30 +48,10 @@ typedef struct BkpBlock ...@@ -48,30 +48,10 @@ typedef struct BkpBlock
/* ACTUAL BLOCK DATA FOLLOWS AT END OF STRUCT */ /* ACTUAL BLOCK DATA FOLLOWS AT END OF STRUCT */
} BkpBlock; } BkpBlock;
/*
* When there is not enough space on current page for whole record, we
* continue on the next page with continuation record. (However, the
* XLogRecord header will never be split across pages; if there's less than
* SizeOfXLogRecord space left at the end of a page, we just waste it.)
*
* Note that xl_rem_len includes backup-block data; that is, it tracks
* xl_tot_len not xl_len in the initial header. Also note that the
* continuation data isn't necessarily aligned.
*/
typedef struct XLogContRecord
{
uint32 xl_rem_len; /* total len of remaining data for record */
/* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */
} XLogContRecord;
#define SizeOfXLogContRecord sizeof(XLogContRecord)
/* /*
* Each page of XLOG file has a header like this: * Each page of XLOG file has a header like this:
*/ */
#define XLOG_PAGE_MAGIC 0xD072 /* can be used as WAL version indicator */ #define XLOG_PAGE_MAGIC 0xD073 /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData typedef struct XLogPageHeaderData
{ {
...@@ -79,6 +59,19 @@ typedef struct XLogPageHeaderData ...@@ -79,6 +59,19 @@ typedef struct XLogPageHeaderData
uint16 xlp_info; /* flag bits, see below */ uint16 xlp_info; /* flag bits, see below */
TimeLineID xlp_tli; /* TimeLineID of first record on page */ TimeLineID xlp_tli; /* TimeLineID of first record on page */
XLogRecPtr xlp_pageaddr; /* XLOG address of this page */ XLogRecPtr xlp_pageaddr; /* XLOG address of this page */
/*
* When there is not enough space on current page for whole record, we
* continue on the next page. xlp_rem_len is the number of bytes
* remaining from a previous page. (However, the XLogRecord header will
* never be split across pages; if there's less than SizeOfXLogRecord
* space left at the end of a page, we just waste it.)
*
* Note that xl_rem_len includes backup-block data; that is, it tracks
* xl_tot_len not xl_len in the initial header. Also note that the
* continuation data isn't necessarily aligned.
*/
uint32 xlp_rem_len; /* total len of remaining data for record */
} XLogPageHeaderData; } XLogPageHeaderData;
#define SizeOfXLogShortPHD MAXALIGN(sizeof(XLogPageHeaderData)) #define SizeOfXLogShortPHD MAXALIGN(sizeof(XLogPageHeaderData))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment