Commit 09c2e7cd authored by Robert Haas's avatar Robert Haas

hash: Fix write-ahead logging bugs related to init forks.

One, logging for CREATE INDEX was oblivious to the fact that when
an unlogged table is created, *only* operations on the init fork
should be logged.

Two, init fork buffers need to be flushed after they are written;
otherwise, a filesystem-level copy following recovery may do the
wrong thing.  (There may be a better fix for this issue than the
one used here, but this is transposed from the similar logic already
present in XLogReadBufferForRedoExtended, and a broader refactoring
after beta2 seems inadvisable.)

Amit Kapila, reviewed by Ashutosh Sharma, Kyotaro Horiguchi,
and Michael Paquier

Discussion: http://postgr.es/m/CAA4eK1JpcMsEtOL_J7WODumeEfyrPi7FPYHeVdS7fyyrCrgp4w@mail.gmail.com
parent 2f7f45a6
......@@ -33,6 +33,7 @@ hash_xlog_init_meta_page(XLogReaderState *record)
XLogRecPtr lsn = record->EndRecPtr;
Page page;
Buffer metabuf;
ForkNumber forknum;
xl_hash_init_meta_page *xlrec = (xl_hash_init_meta_page *) XLogRecGetData(record);
......@@ -44,6 +45,17 @@ hash_xlog_init_meta_page(XLogReaderState *record)
page = (Page) BufferGetPage(metabuf);
PageSetLSN(page, lsn);
MarkBufferDirty(metabuf);
/*
* Force the on-disk state of init forks to always be in sync with the
* state in shared buffers. See XLogReadBufferForRedoExtended. We need
* special handling for init forks as create index operations don't log a
* full page image of the metapage.
*/
XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
if (forknum == INIT_FORKNUM)
FlushOneBuffer(metabuf);
/* all done */
UnlockReleaseBuffer(metabuf);
}
......@@ -60,6 +72,7 @@ hash_xlog_init_bitmap_page(XLogReaderState *record)
Page page;
HashMetaPage metap;
uint32 num_buckets;
ForkNumber forknum;
xl_hash_init_bitmap_page *xlrec = (xl_hash_init_bitmap_page *) XLogRecGetData(record);
......@@ -70,6 +83,16 @@ hash_xlog_init_bitmap_page(XLogReaderState *record)
_hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
PageSetLSN(BufferGetPage(bitmapbuf), lsn);
MarkBufferDirty(bitmapbuf);
/*
* Force the on-disk state of init forks to always be in sync with the
* state in shared buffers. See XLogReadBufferForRedoExtended. We need
* special handling for init forks as create index operations don't log a
* full page image of the metapage.
*/
XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
if (forknum == INIT_FORKNUM)
FlushOneBuffer(bitmapbuf);
UnlockReleaseBuffer(bitmapbuf);
/* add the new bitmap page to the metapage's list of bitmaps */
......@@ -90,6 +113,10 @@ hash_xlog_init_bitmap_page(XLogReaderState *record)
PageSetLSN(page, lsn);
MarkBufferDirty(metabuf);
XLogRecGetBlockTag(record, 1, NULL, &forknum, NULL);
if (forknum == INIT_FORKNUM)
FlushOneBuffer(metabuf);
}
if (BufferIsValid(metabuf))
UnlockReleaseBuffer(metabuf);
......
......@@ -345,12 +345,20 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
int32 ffactor;
uint32 num_buckets;
uint32 i;
bool use_wal;
/* safety check */
if (RelationGetNumberOfBlocksInFork(rel, forkNum) != 0)
elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
RelationGetRelationName(rel));
/*
* WAL log creation of pages if the relation is persistent, or this is the
* init fork. Init forks for unlogged relations always need to be WAL
* logged.
*/
use_wal = RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM;
/*
* Determine the target fill factor (in tuples per bucket) for this index.
* The idea is to make the fill factor correspond to pages about as full
......@@ -384,7 +392,7 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
metap = HashPageGetMeta(pg);
/* XLOG stuff */
if (RelationNeedsWAL(rel))
if (use_wal)
{
xl_hash_init_meta_page xlrec;
XLogRecPtr recptr;
......@@ -427,11 +435,12 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
_hash_initbuf(buf, metap->hashm_maxbucket, i, LH_BUCKET_PAGE, false);
MarkBufferDirty(buf);
log_newpage(&rel->rd_node,
forkNum,
blkno,
BufferGetPage(buf),
true);
if (use_wal)
log_newpage(&rel->rd_node,
forkNum,
blkno,
BufferGetPage(buf),
true);
_hash_relbuf(rel, buf);
}
......@@ -459,7 +468,7 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
MarkBufferDirty(metabuf);
/* XLOG stuff */
if (RelationNeedsWAL(rel))
if (use_wal)
{
xl_hash_init_bitmap_page xlrec;
XLogRecPtr recptr;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment