Commit a7124870 authored by Andres Freund's avatar Andres Freund

Fix transient mdsync() errors of truncated relations due to 72a98a63.

Unfortunately the segment size checks from 72a98a63 had the negative
side-effect of breaking a corner case in mdsync(): When processing a
fsync request for a truncated away segment mdsync() could fail with
"could not fsync file" (if previous segment < RELSEG_SIZE) because
_mdfd_getseg() now wouldn't return the relevant segment anymore.

The cleanest fix seems to be to allow the caller of _mdfd_getseg() to
specify whether checks for RELSEG_SIZE are performed. To allow doing so,
change the ExtensionBehavior enum into a bitmask. Besides allowing for
the addition of EXTENSION_DONT_CHECK_SIZE, this makes for a nicer
implementation of EXTENSION_REALLY_RETURN_NULL.

Besides mdsync() the only callsite that should change behaviour due to
this is mdprefetch() which now doesn't create segments anymore, even in
recovery. Given the uses of mdprefetch() that seems better.

Reported-By: Thom Brown
Discussion: CAA-aLv72QazLvPdKZYpVn4a_Eh+i4_cxuB03k+iCuZM_xjc+6Q@mail.gmail.com
parent 613fb29a
...@@ -163,23 +163,29 @@ static CycleCtr mdsync_cycle_ctr = 0; ...@@ -163,23 +163,29 @@ static CycleCtr mdsync_cycle_ctr = 0;
static CycleCtr mdckpt_cycle_ctr = 0; static CycleCtr mdckpt_cycle_ctr = 0;
typedef enum /* behavior for mdopen & _mdfd_getseg */ /*** behavior for mdopen & _mdfd_getseg ***/
{ /* ereport if segment not present */
/* ereport if segment not present, create in recovery */ #define EXTENSION_FAIL (1 << 0)
EXTENSION_FAIL, /* return NULL if segment not present */
/* return NULL if not present, create in recovery */ #define EXTENSION_RETURN_NULL (1 << 1)
EXTENSION_RETURN_NULL, /* create new segments as needed */
/* return NULL if not present */ #define EXTENSION_CREATE (1 << 2)
EXTENSION_REALLY_RETURN_NULL, /* create new segments if needed during recovery */
/* create new segments as needed */ #define EXTENSION_CREATE_RECOVERY (1 << 3)
EXTENSION_CREATE /*
} ExtensionBehavior; * Allow opening segments which are preceded by segments smaller than
* RELSEG_SIZE, e.g. inactive segments (see above). Note that this is breaks
* mdnblocks() and related functionality henceforth - which currently is ok,
* because this is only required in the checkpointer which never uses
* mdnblocks().
*/
#define EXTENSION_DONT_CHECK_SIZE (1 << 4)
/* local routines */ /* local routines */
static void mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, static void mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum,
bool isRedo); bool isRedo);
static MdfdVec *mdopen(SMgrRelation reln, ForkNumber forknum, static MdfdVec *mdopen(SMgrRelation reln, ForkNumber forknum, int behavior);
ExtensionBehavior behavior);
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum,
MdfdVec *seg); MdfdVec *seg);
static void register_unlink(RelFileNodeBackend rnode); static void register_unlink(RelFileNodeBackend rnode);
...@@ -189,7 +195,7 @@ static char *_mdfd_segpath(SMgrRelation reln, ForkNumber forknum, ...@@ -189,7 +195,7 @@ static char *_mdfd_segpath(SMgrRelation reln, ForkNumber forknum,
static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forkno, static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forkno,
BlockNumber segno, int oflags); BlockNumber segno, int oflags);
static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno, static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno,
BlockNumber blkno, bool skipFsync, ExtensionBehavior behavior); BlockNumber blkno, bool skipFsync, int behavior);
static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum,
MdfdVec *seg); MdfdVec *seg);
...@@ -570,7 +576,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ...@@ -570,7 +576,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
* invent one out of whole cloth. * invent one out of whole cloth.
*/ */
static MdfdVec * static MdfdVec *
mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior) mdopen(SMgrRelation reln, ForkNumber forknum, int behavior)
{ {
MdfdVec *mdfd; MdfdVec *mdfd;
char *path; char *path;
...@@ -596,8 +602,7 @@ mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior) ...@@ -596,8 +602,7 @@ mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior)
fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600); fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
if (fd < 0) if (fd < 0)
{ {
if ((behavior == EXTENSION_RETURN_NULL || if ((behavior & EXTENSION_RETURN_NULL) &&
behavior == EXTENSION_REALLY_RETURN_NULL) &&
FILE_POSSIBLY_DELETED(errno)) FILE_POSSIBLY_DELETED(errno))
{ {
pfree(path); pfree(path);
...@@ -690,8 +695,8 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum, ...@@ -690,8 +695,8 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum,
int segnum_start, int segnum_start,
segnum_end; segnum_end;
v = _mdfd_getseg(reln, forknum, blocknum, false, v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
EXTENSION_REALLY_RETURN_NULL); EXTENSION_RETURN_NULL);
/* /*
* We might be flushing buffers of already removed relations, that's * We might be flushing buffers of already removed relations, that's
...@@ -737,7 +742,8 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ...@@ -737,7 +742,8 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
reln->smgr_rnode.node.relNode, reln->smgr_rnode.node.relNode,
reln->smgr_rnode.backend); reln->smgr_rnode.backend);
v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL); v = _mdfd_getseg(reln, forknum, blocknum, false,
EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE)); seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE));
...@@ -812,7 +818,8 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ...@@ -812,7 +818,8 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
reln->smgr_rnode.node.relNode, reln->smgr_rnode.node.relNode,
reln->smgr_rnode.backend); reln->smgr_rnode.backend);
v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_FAIL); v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE)); seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE));
...@@ -1219,7 +1226,9 @@ mdsync(void) ...@@ -1219,7 +1226,9 @@ mdsync(void)
/* Attempt to open and fsync the target segment */ /* Attempt to open and fsync the target segment */
seg = _mdfd_getseg(reln, forknum, seg = _mdfd_getseg(reln, forknum,
(BlockNumber) segno * (BlockNumber) RELSEG_SIZE, (BlockNumber) segno * (BlockNumber) RELSEG_SIZE,
false, EXTENSION_RETURN_NULL); false,
EXTENSION_RETURN_NULL
| EXTENSION_DONT_CHECK_SIZE);
INSTR_TIME_SET_CURRENT(sync_start); INSTR_TIME_SET_CURRENT(sync_start);
...@@ -1773,14 +1782,18 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, ...@@ -1773,14 +1782,18 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno,
*/ */
static MdfdVec * static MdfdVec *
_mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
bool skipFsync, ExtensionBehavior behavior) bool skipFsync, int behavior)
{ {
MdfdVec *v = mdopen(reln, forknum, behavior); MdfdVec *v = mdopen(reln, forknum, behavior);
BlockNumber targetseg; BlockNumber targetseg;
BlockNumber nextsegno; BlockNumber nextsegno;
/* some way to handle non-existant segments needs to be specified */
Assert(behavior &
(EXTENSION_FAIL | EXTENSION_CREATE | EXTENSION_RETURN_NULL));
if (!v) if (!v)
return NULL; /* if EXTENSION_(REALLY_)RETURN_NULL */ return NULL; /* if behavior & EXTENSION_RETURN_NULL */
targetseg = blkno / ((BlockNumber) RELSEG_SIZE); targetseg = blkno / ((BlockNumber) RELSEG_SIZE);
for (nextsegno = 1; nextsegno <= targetseg; nextsegno++) for (nextsegno = 1; nextsegno <= targetseg; nextsegno++)
...@@ -1795,8 +1808,8 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, ...@@ -1795,8 +1808,8 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
if (nblocks > ((BlockNumber) RELSEG_SIZE)) if (nblocks > ((BlockNumber) RELSEG_SIZE))
elog(FATAL, "segment too big"); elog(FATAL, "segment too big");
if (behavior == EXTENSION_CREATE || if ((behavior & EXTENSION_CREATE) ||
(InRecovery && behavior != EXTENSION_REALLY_RETURN_NULL)) (InRecovery && (behavior & EXTENSION_CREATE_RECOVERY)))
{ {
/* /*
* Normally we will create new segments only if authorized by * Normally we will create new segments only if authorized by
...@@ -1827,15 +1840,16 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, ...@@ -1827,15 +1840,16 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
} }
flags = O_CREAT; flags = O_CREAT;
} }
else if (nblocks < ((BlockNumber) RELSEG_SIZE)) else if (!(behavior & EXTENSION_DONT_CHECK_SIZE) &&
nblocks < ((BlockNumber) RELSEG_SIZE))
{ {
/* /*
* When not extending, only open the next segment if the * When not extending (or explicitly including truncated
* current one is exactly RELSEG_SIZE. If not (this branch), * segments), only open the next segment if the current one is
* either return NULL or fail. * exactly RELSEG_SIZE. If not (this branch), either return
* NULL or fail.
*/ */
if (behavior == EXTENSION_RETURN_NULL || if (behavior & EXTENSION_RETURN_NULL)
behavior == EXTENSION_REALLY_RETURN_NULL)
{ {
/* /*
* Some callers discern between reasons for _mdfd_getseg() * Some callers discern between reasons for _mdfd_getseg()
...@@ -1858,8 +1872,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, ...@@ -1858,8 +1872,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
if (v->mdfd_chain == NULL) if (v->mdfd_chain == NULL)
{ {
if ((behavior == EXTENSION_RETURN_NULL || if ((behavior & EXTENSION_RETURN_NULL) &&
behavior == EXTENSION_REALLY_RETURN_NULL) &&
FILE_POSSIBLY_DELETED(errno)) FILE_POSSIBLY_DELETED(errno))
return NULL; return NULL;
ereport(ERROR, ereport(ERROR,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment