Commit 8dc3c971 authored by Tom Lane's avatar Tom Lane

Treat directory open failures as hard errors in ResetUnloggedRelations().

Previously, this code just reported such problems at LOG level and kept
going.  The problem with this approach is that transient failures (e.g.,
ENFILE) could prevent us from resetting unlogged relations to empty,
yet allow recovery to appear to complete successfully.  That seems like
a data corruption hazard large enough to treat such problems as reasons
to fail startup.

For the same reason, treat unlink failures for unlogged files as hard
errors not just LOG messages.  It's a little odd that we did it like that
when file-level errors in other steps (copy_file, fsync_fname) are ERRORs.

The sole case that I left alone is that ENOENT failure on a tablespace
(not database) directory is not an error, though it will now be logged
rather than just silently ignored.  This is to cover the scenario where
a previous DROP TABLESPACE removed the tablespace directory but failed
before removing the pg_tblspc symlink.  I'm not sure that that's very
likely in practice, but that seems like the only real excuse for the
old behavior here, so let's allow for it.  (As coded, this will also
allow ENOENT on $PGDATA/base/.  But since we'll fail soon enough if
that's gone, I don't think we need to complicate this code by
distinguishing that from a true tablespace case.)

Discussion: https://postgr.es/m/21040.1512418508@sss.pgh.pa.us
parent e7cfb26f
...@@ -98,7 +98,9 @@ ResetUnloggedRelations(int op) ...@@ -98,7 +98,9 @@ ResetUnloggedRelations(int op)
MemoryContextDelete(tmpctx); MemoryContextDelete(tmpctx);
} }
/* Process one tablespace directory for ResetUnloggedRelations */ /*
* Process one tablespace directory for ResetUnloggedRelations
*/
static void static void
ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op) ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
{ {
...@@ -107,29 +109,32 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op) ...@@ -107,29 +109,32 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
char dbspace_path[MAXPGPATH * 2]; char dbspace_path[MAXPGPATH * 2];
ts_dir = AllocateDir(tsdirname); ts_dir = AllocateDir(tsdirname);
if (ts_dir == NULL)
/*
* If we get ENOENT on a tablespace directory, log it and return. This
* can happen if a previous DROP TABLESPACE crashed between removing the
* tablespace directory and removing the symlink in pg_tblspc. We don't
* really want to prevent database startup in that scenario, so let it
* pass instead. Any other type of error will be reported by ReadDir
* (causing a startup failure).
*/
if (ts_dir == NULL && errno == ENOENT)
{ {
/* anything except ENOENT is fishy */ ereport(LOG,
if (errno != ENOENT) (errcode_for_file_access(),
ereport(LOG, errmsg("could not open directory \"%s\": %m",
(errcode_for_file_access(), tsdirname)));
errmsg("could not open directory \"%s\": %m",
tsdirname)));
return; return;
} }
while ((de = ReadDir(ts_dir, tsdirname)) != NULL) while ((de = ReadDir(ts_dir, tsdirname)) != NULL)
{ {
int i = 0;
/* /*
* We're only interested in the per-database directories, which have * We're only interested in the per-database directories, which have
* numeric names. Note that this code will also (properly) ignore "." * numeric names. Note that this code will also (properly) ignore "."
* and "..". * and "..".
*/ */
while (isdigit((unsigned char) de->d_name[i])) if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
++i;
if (de->d_name[i] != '\0' || i == 0)
continue; continue;
snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s", snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
...@@ -140,7 +145,9 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op) ...@@ -140,7 +145,9 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
FreeDir(ts_dir); FreeDir(ts_dir);
} }
/* Process one per-dbspace directory for ResetUnloggedRelations */ /*
* Process one per-dbspace directory for ResetUnloggedRelations
*/
static void static void
ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
{ {
...@@ -158,20 +165,9 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) ...@@ -158,20 +165,9 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
*/ */
if ((op & UNLOGGED_RELATION_CLEANUP) != 0) if ((op & UNLOGGED_RELATION_CLEANUP) != 0)
{ {
HTAB *hash = NULL; HTAB *hash;
HASHCTL ctl; HASHCTL ctl;
/* Open the directory. */
dbspace_dir = AllocateDir(dbspacedirname);
if (dbspace_dir == NULL)
{
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
dbspacedirname)));
return;
}
/* /*
* It's possible that someone could create a ton of unlogged relations * It's possible that someone could create a ton of unlogged relations
* in the same database & tablespace, so we'd better use a hash table * in the same database & tablespace, so we'd better use a hash table
...@@ -179,11 +175,13 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) ...@@ -179,11 +175,13 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
* need to be reset. Otherwise, this cleanup operation would be * need to be reset. Otherwise, this cleanup operation would be
* O(n^2). * O(n^2).
*/ */
memset(&ctl, 0, sizeof(ctl));
ctl.keysize = sizeof(unlogged_relation_entry); ctl.keysize = sizeof(unlogged_relation_entry);
ctl.entrysize = sizeof(unlogged_relation_entry); ctl.entrysize = sizeof(unlogged_relation_entry);
hash = hash_create("unlogged hash", 32, &ctl, HASH_ELEM); hash = hash_create("unlogged hash", 32, &ctl, HASH_ELEM);
/* Scan the directory. */ /* Scan the directory. */
dbspace_dir = AllocateDir(dbspacedirname);
while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
{ {
ForkNumber forkNum; ForkNumber forkNum;
...@@ -222,21 +220,9 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) ...@@ -222,21 +220,9 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
} }
/* /*
* Now, make a second pass and remove anything that matches. First, * Now, make a second pass and remove anything that matches.
* reopen the directory.
*/ */
dbspace_dir = AllocateDir(dbspacedirname); dbspace_dir = AllocateDir(dbspacedirname);
if (dbspace_dir == NULL)
{
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
dbspacedirname)));
hash_destroy(hash);
return;
}
/* Scan the directory. */
while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
{ {
ForkNumber forkNum; ForkNumber forkNum;
...@@ -266,15 +252,11 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) ...@@ -266,15 +252,11 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
{ {
snprintf(rm_path, sizeof(rm_path), "%s/%s", snprintf(rm_path, sizeof(rm_path), "%s/%s",
dbspacedirname, de->d_name); dbspacedirname, de->d_name);
if (unlink(rm_path) < 0)
/* ereport(ERROR,
* It's tempting to actually throw an error here, but since (errcode_for_file_access(),
* this code gets run during database startup, that could errmsg("could not remove file \"%s\": %m",
* result in the database failing to start. (XXX Should we do rm_path)));
* it anyway?)
*/
if (unlink(rm_path))
elog(LOG, "could not unlink file \"%s\": %m", rm_path);
else else
elog(DEBUG2, "unlinked file \"%s\"", rm_path); elog(DEBUG2, "unlinked file \"%s\"", rm_path);
} }
...@@ -294,19 +276,8 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) ...@@ -294,19 +276,8 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
*/ */
if ((op & UNLOGGED_RELATION_INIT) != 0) if ((op & UNLOGGED_RELATION_INIT) != 0)
{ {
/* Open the directory. */
dbspace_dir = AllocateDir(dbspacedirname);
if (dbspace_dir == NULL)
{
/* we just saw this directory, so it really ought to be there */
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
dbspacedirname)));
return;
}
/* Scan the directory. */ /* Scan the directory. */
dbspace_dir = AllocateDir(dbspacedirname);
while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
{ {
ForkNumber forkNum; ForkNumber forkNum;
...@@ -350,16 +321,6 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) ...@@ -350,16 +321,6 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
* (especially the metadata ones) at once. * (especially the metadata ones) at once.
*/ */
dbspace_dir = AllocateDir(dbspacedirname); dbspace_dir = AllocateDir(dbspacedirname);
if (dbspace_dir == NULL)
{
/* we just saw this directory, so it really ought to be there */
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not open directory \"%s\": %m",
dbspacedirname)));
return;
}
while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
{ {
ForkNumber forkNum; ForkNumber forkNum;
...@@ -388,6 +349,14 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) ...@@ -388,6 +349,14 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
FreeDir(dbspace_dir); FreeDir(dbspace_dir);
/*
* Lastly, fsync the database directory itself, ensuring the
* filesystem remembers the file creations and deletions we've done.
* We don't bother with this during a call that does only
* UNLOGGED_RELATION_CLEANUP, because if recovery crashes before we
* get to doing UNLOGGED_RELATION_INIT, we'll redo the cleanup step
* too at the next startup attempt.
*/
fsync_fname(dbspacedirname, true); fsync_fname(dbspacedirname, true);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment