Commit f54d0629 authored by Andres Freund's avatar Andres Freund

Fix ALTER TABLE ... SET TABLESPACE for unlogged relations.

Changing the tablespace of an unlogged relation did not WAL log the
creation and content of the init fork. Thus, after a standby is
promoted, unlogged relation cannot be accessed anymore, with errors
like:
ERROR:  58P01: could not open file "pg_tblspc/...": No such file or directory
Additionally the init fork was not synced to disk, independent of the
configured wal_level, a relatively small durability risk.

Investigation of that problem also brought to light that, even for
permanent relations, the creation of !main forks was not WAL logged,
i.e. no XLOG_SMGR_CREATE record were emitted. That mostly turns out not
to be a problem, because these files were created when the actual
relation data is copied; nonexistent files are not treated as an error
condition during replay. But that doesn't work for empty files, and
generally feels a bit haphazard. Luckily, outside init and main forks,
empty forks don't occur often or are not a problem.

Add the required WAL logging and syncing to disk.

Reported-By: Michael Paquier
Author: Michael Paquier and Andres Freund
Discussion: 20151210163230.GA11331@alap3.anarazel.de
Backpatch: 9.1, where unlogged relations were introduced
parent 085423e3
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include "catalog/pg_type.h" #include "catalog/pg_type.h"
#include "catalog/pg_type_fn.h" #include "catalog/pg_type_fn.h"
#include "catalog/storage.h" #include "catalog/storage.h"
#include "catalog/storage_xlog.h"
#include "catalog/toasting.h" #include "catalog/toasting.h"
#include "commands/cluster.h" #include "commands/cluster.h"
#include "commands/comment.h" #include "commands/comment.h"
...@@ -9659,6 +9660,15 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) ...@@ -9659,6 +9660,15 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
if (smgrexists(rel->rd_smgr, forkNum)) if (smgrexists(rel->rd_smgr, forkNum))
{ {
smgrcreate(dstrel, forkNum, false); smgrcreate(dstrel, forkNum, false);
/*
* WAL log creation if the relation is persistent, or this is the
* init fork of an unlogged relation.
*/
if (rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT ||
(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
forkNum == INIT_FORKNUM))
log_smgrcreate(&newrnode, forkNum);
copy_relation_data(rel->rd_smgr, dstrel, forkNum, copy_relation_data(rel->rd_smgr, dstrel, forkNum,
rel->rd_rel->relpersistence); rel->rd_rel->relpersistence);
} }
...@@ -9878,6 +9888,7 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, ...@@ -9878,6 +9888,7 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
char *buf; char *buf;
Page page; Page page;
bool use_wal; bool use_wal;
bool copying_initfork;
BlockNumber nblocks; BlockNumber nblocks;
BlockNumber blkno; BlockNumber blkno;
...@@ -9890,11 +9901,20 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, ...@@ -9890,11 +9901,20 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
buf = (char *) palloc(BLCKSZ); buf = (char *) palloc(BLCKSZ);
page = (Page) buf; page = (Page) buf;
/*
* The init fork for an unlogged relation in many respects has to be
* treated the same as normal relation, changes need to be WAL logged and
* it needs to be synced to disk.
*/
copying_initfork = relpersistence == RELPERSISTENCE_UNLOGGED &&
forkNum == INIT_FORKNUM;
/* /*
* We need to log the copied data in WAL iff WAL archiving/streaming is * We need to log the copied data in WAL iff WAL archiving/streaming is
* enabled AND it's a permanent relation. * enabled AND it's a permanent relation.
*/ */
use_wal = XLogIsNeeded() && relpersistence == RELPERSISTENCE_PERMANENT; use_wal = XLogIsNeeded() &&
(relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
nblocks = smgrnblocks(src, forkNum); nblocks = smgrnblocks(src, forkNum);
...@@ -9949,7 +9969,7 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, ...@@ -9949,7 +9969,7 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
* wouldn't replay our earlier WAL entries. If we do not fsync those pages * wouldn't replay our earlier WAL entries. If we do not fsync those pages
* here, they might still not be on disk when the crash occurs. * here, they might still not be on disk when the crash occurs.
*/ */
if (relpersistence == RELPERSISTENCE_PERMANENT) if (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork)
smgrimmedsync(dst, forkNum); smgrimmedsync(dst, forkNum);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment