Commit c87469e6 authored by Tom Lane's avatar Tom Lane

Fix problems with loss of tuple commit status bits during WAL redo of

VACUUM FULL tuple moves.  Store full-width t_infomask in WAL, rather
than storing low 8 bits and expecting to be able to reconstruct upper
bits.  While at it, remove redundant t_oid field from WAL headers
(the OID, if present, is now recorded in the data portion of the tuple).
WAL version number bumped --- this does not force an initdb, you can
instead run pg_resetxlog after a clean shutdown of the old postmaster.
parent 72f8efdc
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.148 2002/09/04 20:31:09 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.149 2002/09/26 22:46:29 tgl Exp $
* *
* *
* INTERFACE ROUTINES * INTERFACE ROUTINES
...@@ -1185,10 +1185,14 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid) ...@@ -1185,10 +1185,14 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
rdata[0].len = SizeOfHeapInsert; rdata[0].len = SizeOfHeapInsert;
rdata[0].next = &(rdata[1]); rdata[0].next = &(rdata[1]);
xlhdr.t_oid = HeapTupleGetOid(tup);
xlhdr.t_natts = tup->t_data->t_natts; xlhdr.t_natts = tup->t_data->t_natts;
xlhdr.t_infomask = tup->t_data->t_infomask;
xlhdr.t_hoff = tup->t_data->t_hoff; xlhdr.t_hoff = tup->t_data->t_hoff;
xlhdr.mask = tup->t_data->t_infomask; /*
* note we mark rdata[1] as belonging to buffer; if XLogInsert
* decides to write the whole page to the xlog, we don't need to
* store xl_heap_header in the xlog.
*/
rdata[1].buffer = buffer; rdata[1].buffer = buffer;
rdata[1].data = (char *) &xlhdr; rdata[1].data = (char *) &xlhdr;
rdata[1].len = SizeOfHeapHeader; rdata[1].len = SizeOfHeapHeader;
...@@ -1200,7 +1204,11 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid) ...@@ -1200,7 +1204,11 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
rdata[2].len = tup->t_len - offsetof(HeapTupleHeaderData, t_bits); rdata[2].len = tup->t_len - offsetof(HeapTupleHeaderData, t_bits);
rdata[2].next = NULL; rdata[2].next = NULL;
/* If this is the single and first tuple on page... */ /*
* If this is the single and first tuple on page, we can reinit the
* page instead of restoring the whole thing. Set flag, and hide
* buffer references from XLogInsert.
*/
if (ItemPointerGetOffsetNumber(&(tup->t_self)) == FirstOffsetNumber && if (ItemPointerGetOffsetNumber(&(tup->t_self)) == FirstOffsetNumber &&
PageGetMaxOffsetNumber(page) == FirstOffsetNumber) PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
{ {
...@@ -2041,11 +2049,10 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, ...@@ -2041,11 +2049,10 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from,
rdata[1].len = 0; rdata[1].len = 0;
rdata[1].next = &(rdata[2]); rdata[1].next = &(rdata[2]);
xlhdr.hdr.t_oid = HeapTupleGetOid(newtup);
xlhdr.hdr.t_natts = newtup->t_data->t_natts; xlhdr.hdr.t_natts = newtup->t_data->t_natts;
xlhdr.hdr.t_infomask = newtup->t_data->t_infomask;
xlhdr.hdr.t_hoff = newtup->t_data->t_hoff; xlhdr.hdr.t_hoff = newtup->t_data->t_hoff;
xlhdr.hdr.mask = newtup->t_data->t_infomask; if (move) /* remember xmax & xmin */
if (move) /* remember xmin & xmax */
{ {
TransactionId xid[2]; /* xmax, xmin */ TransactionId xid[2]; /* xmax, xmin */
...@@ -2060,6 +2067,10 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, ...@@ -2060,6 +2067,10 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from,
2 * sizeof(TransactionId)); 2 * sizeof(TransactionId));
hsize += 2 * sizeof(TransactionId); hsize += 2 * sizeof(TransactionId);
} }
/*
* As with insert records, we need not store the rdata[2] segment
* if we decide to store the whole buffer instead.
*/
rdata[2].buffer = newbuf; rdata[2].buffer = newbuf;
rdata[2].data = (char *) &xlhdr; rdata[2].data = (char *) &xlhdr;
rdata[2].len = hsize; rdata[2].len = hsize;
...@@ -2276,18 +2287,16 @@ heap_xlog_insert(bool redo, XLogRecPtr lsn, XLogRecord *record) ...@@ -2276,18 +2287,16 @@ heap_xlog_insert(bool redo, XLogRecPtr lsn, XLogRecord *record)
htup = &tbuf.hdr; htup = &tbuf.hdr;
MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData)); MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */ /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
memcpy((char *) &tbuf + offsetof(HeapTupleHeaderData, t_bits), memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
(char *) xlrec + SizeOfHeapInsert + SizeOfHeapHeader, (char *) xlrec + SizeOfHeapInsert + SizeOfHeapHeader,
newlen); newlen);
newlen += offsetof(HeapTupleHeaderData, t_bits); newlen += offsetof(HeapTupleHeaderData, t_bits);
htup->t_natts = xlhdr.t_natts; htup->t_natts = xlhdr.t_natts;
htup->t_infomask = xlhdr.t_infomask;
htup->t_hoff = xlhdr.t_hoff; htup->t_hoff = xlhdr.t_hoff;
htup->t_infomask = HEAP_XMAX_INVALID | xlhdr.mask;
HeapTupleHeaderSetXmin(htup, record->xl_xid); HeapTupleHeaderSetXmin(htup, record->xl_xid);
HeapTupleHeaderSetCmin(htup, FirstCommandId); HeapTupleHeaderSetCmin(htup, FirstCommandId);
htup->t_ctid = xlrec->target.tid; htup->t_ctid = xlrec->target.tid;
if (reln->rd_rel->relhasoids)
HeapTupleHeaderSetOid(htup, xlhdr.t_oid);
offnum = PageAddItem(page, (Item) htup, newlen, offnum, offnum = PageAddItem(page, (Item) htup, newlen, offnum,
LP_USED | OverwritePageMode); LP_USED | OverwritePageMode);
...@@ -2454,34 +2463,27 @@ newsame:; ...@@ -2454,34 +2463,27 @@ newsame:;
htup = &tbuf.hdr; htup = &tbuf.hdr;
MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData)); MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */ /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
memcpy((char *) &tbuf + offsetof(HeapTupleHeaderData, t_bits), memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
(char *) xlrec + hsize, (char *) xlrec + hsize,
newlen); newlen);
newlen += offsetof(HeapTupleHeaderData, t_bits); newlen += offsetof(HeapTupleHeaderData, t_bits);
htup->t_natts = xlhdr.t_natts; htup->t_natts = xlhdr.t_natts;
htup->t_infomask = xlhdr.t_infomask;
htup->t_hoff = xlhdr.t_hoff; htup->t_hoff = xlhdr.t_hoff;
if (reln->rd_rel->relhasoids)
HeapTupleHeaderSetOid(htup, xlhdr.t_oid);
if (move) if (move)
{ {
TransactionId xid[2]; /* xmax, xmin */ TransactionId xid[2]; /* xmax, xmin */
hsize = SizeOfHeapUpdate + SizeOfHeapHeader;
memcpy((char *) xid, memcpy((char *) xid,
(char *) xlrec + hsize, 2 * sizeof(TransactionId)); (char *) xlrec + SizeOfHeapUpdate + SizeOfHeapHeader,
htup->t_infomask = xlhdr.mask; 2 * sizeof(TransactionId));
htup->t_infomask &= ~(HEAP_XMIN_COMMITTED |
HEAP_XMIN_INVALID |
HEAP_MOVED_OFF);
htup->t_infomask |= HEAP_MOVED_IN;
HeapTupleHeaderSetXmin(htup, xid[1]); HeapTupleHeaderSetXmin(htup, xid[1]);
HeapTupleHeaderSetXmax(htup, xid[0]); HeapTupleHeaderSetXmax(htup, xid[0]);
HeapTupleHeaderSetXvac(htup, record->xl_xid); HeapTupleHeaderSetXvac(htup, record->xl_xid);
} }
else else
{ {
htup->t_infomask = HEAP_XMAX_INVALID | xlhdr.mask;
HeapTupleHeaderSetXmin(htup, record->xl_xid); HeapTupleHeaderSetXmin(htup, record->xl_xid);
HeapTupleHeaderSetCmin(htup, FirstCommandId); HeapTupleHeaderSetCmin(htup, FirstCommandId);
} }
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: htup.h,v 1.60 2002/09/04 20:31:37 momjian Exp $ * $Id: htup.h,v 1.61 2002/09/26 22:46:29 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -261,6 +261,8 @@ do { \ ...@@ -261,6 +261,8 @@ do { \
/* /*
* WAL record definitions for heapam.c's WAL operations
*
* XLOG allows to store some information in high 4 bits of log * XLOG allows to store some information in high 4 bits of log
* record xl_info field * record xl_info field
*/ */
...@@ -300,15 +302,22 @@ typedef struct xl_heap_delete ...@@ -300,15 +302,22 @@ typedef struct xl_heap_delete
#define SizeOfHeapDelete (offsetof(xl_heap_delete, target) + SizeOfHeapTid) #define SizeOfHeapDelete (offsetof(xl_heap_delete, target) + SizeOfHeapTid)
/*
* We don't store the whole fixed part (HeapTupleHeaderData) of an inserted
* or updated tuple in WAL; we can save a few bytes by reconstructing the
* fields that are available elsewhere in the WAL record, or perhaps just
* plain needn't be reconstructed. These are the fields we must store.
* NOTE: t_hoff could be recomputed, but we may as well store it because
* it will come for free due to alignment considerations.
*/
typedef struct xl_heap_header typedef struct xl_heap_header
{ {
Oid t_oid;
int16 t_natts; int16 t_natts;
uint16 t_infomask;
uint8 t_hoff; uint8 t_hoff;
uint8 mask; /* low 8 bits of t_infomask */
} xl_heap_header; } xl_heap_header;
#define SizeOfHeapHeader (offsetof(xl_heap_header, mask) + sizeof(uint8)) #define SizeOfHeapHeader (offsetof(xl_heap_header, t_hoff) + sizeof(uint8))
/* This is what we need to know about insert */ /* This is what we need to know about insert */
typedef struct xl_heap_insert typedef struct xl_heap_insert
...@@ -340,6 +349,8 @@ typedef struct xl_heap_clean ...@@ -340,6 +349,8 @@ typedef struct xl_heap_clean
#define SizeOfHeapClean (offsetof(xl_heap_clean, block) + sizeof(BlockNumber)) #define SizeOfHeapClean (offsetof(xl_heap_clean, block) + sizeof(BlockNumber))
/* /*
* MaxTupleSize is the maximum allowed size of a tuple, including header and * MaxTupleSize is the maximum allowed size of a tuple, including header and
* MAXALIGN alignment padding. Basically it's BLCKSZ minus the other stuff * MAXALIGN alignment padding. Basically it's BLCKSZ minus the other stuff
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: xlog.h,v 1.37 2002/09/04 20:31:37 momjian Exp $ * $Id: xlog.h,v 1.38 2002/09/26 22:46:29 tgl Exp $
*/ */
#ifndef XLOG_H #ifndef XLOG_H
#define XLOG_H #define XLOG_H
...@@ -110,7 +110,7 @@ typedef struct XLogContRecord ...@@ -110,7 +110,7 @@ typedef struct XLogContRecord
/* /*
* Each page of XLOG file has a header like this: * Each page of XLOG file has a header like this:
*/ */
#define XLOG_PAGE_MAGIC 0xD059 /* can be used as WAL version indicator */ #define XLOG_PAGE_MAGIC 0xD05A /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData typedef struct XLogPageHeaderData
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment