Commit eddbf397 authored by Tom Lane's avatar Tom Lane

Extend yesterday's patch so that the bgwriter is also told to forget

pending fsyncs during DROP DATABASE.  Obviously necessary in hindsight :-(
parent 530b10c7
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.189 2007/01/16 13:28:56 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.190 2007/01/17 16:25:01 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include "postmaster/bgwriter.h" #include "postmaster/bgwriter.h"
#include "storage/freespace.h" #include "storage/freespace.h"
#include "storage/procarray.h" #include "storage/procarray.h"
#include "storage/smgr.h"
#include "utils/acl.h" #include "utils/acl.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/flatfiles.h" #include "utils/flatfiles.h"
...@@ -643,6 +644,12 @@ dropdb(const char *dbname, bool missing_ok) ...@@ -643,6 +644,12 @@ dropdb(const char *dbname, bool missing_ok)
*/ */
FreeSpaceMapForgetDatabase(db_id); FreeSpaceMapForgetDatabase(db_id);
/*
* Tell bgwriter to forget any pending fsync requests for files in the
* database; else it'll fail at next checkpoint.
*/
ForgetDatabaseFsyncRequests(db_id);
/* /*
* On Windows, force a checkpoint so that the bgwriter doesn't hold any * On Windows, force a checkpoint so that the bgwriter doesn't hold any
* open files, which would cause rmdir() to fail. * open files, which would cause rmdir() to fail.
......
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.35 2007/01/17 00:17:20 tgl Exp $ * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.36 2007/01/17 16:25:01 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -103,7 +103,7 @@ ...@@ -103,7 +103,7 @@
typedef struct typedef struct
{ {
RelFileNode rnode; RelFileNode rnode;
BlockNumber segno; /* InvalidBlockNumber means "revoke" */ BlockNumber segno; /* see md.c for special values */
/* might add a real request-type field later; not needed yet */ /* might add a real request-type field later; not needed yet */
} BgWriterRequest; } BgWriterRequest;
...@@ -695,16 +695,16 @@ RequestCheckpoint(bool waitforit, bool warnontime) ...@@ -695,16 +695,16 @@ RequestCheckpoint(bool waitforit, bool warnontime)
* ForwardFsyncRequest * ForwardFsyncRequest
* Forward a file-fsync request from a backend to the bgwriter * Forward a file-fsync request from a backend to the bgwriter
* *
* segno specifies which segment (not block!) of the relation needs to be
* fsync'd. If segno == InvalidBlockNumber, the meaning is to revoke any
* pending fsync requests for the entire relation (this message is sent
* when the relation is about to be deleted).
*
* Whenever a backend is compelled to write directly to a relation * Whenever a backend is compelled to write directly to a relation
* (which should be seldom, if the bgwriter is getting its job done), * (which should be seldom, if the bgwriter is getting its job done),
* the backend calls this routine to pass over knowledge that the relation * the backend calls this routine to pass over knowledge that the relation
* is dirty and must be fsync'd before next checkpoint. * is dirty and must be fsync'd before next checkpoint.
* *
* segno specifies which segment (not block!) of the relation needs to be
* fsync'd. (Since the valid range is much less than BlockNumber, we can
* use high values for special flags; that's all internal to md.c, which
* see for details.)
*
* If we are unable to pass over the request (at present, this can happen * If we are unable to pass over the request (at present, this can happen
* if the shared memory queue is full), we return false. That forces * if the shared memory queue is full), we return false. That forces
* the backend to do its own fsync. We hope that will be even more seldom. * the backend to do its own fsync. We hope that will be even more seldom.
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.126 2007/01/17 00:17:21 tgl Exp $ * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.127 2007/01/17 16:25:01 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -31,6 +31,10 @@ ...@@ -31,6 +31,10 @@
/* interval for calling AbsorbFsyncRequests in mdsync */ /* interval for calling AbsorbFsyncRequests in mdsync */
#define FSYNCS_PER_ABSORB 10 #define FSYNCS_PER_ABSORB 10
/* special values for the segno arg to RememberFsyncRequest */
#define FORGET_RELATION_FSYNC (InvalidBlockNumber)
#define FORGET_DATABASE_FSYNC (InvalidBlockNumber-1)
/* /*
* On Windows, we have to interpret EACCES as possibly meaning the same as * On Windows, we have to interpret EACCES as possibly meaning the same as
* ENOENT, because if a file is unlinked-but-not-yet-gone on that platform, * ENOENT, because if a file is unlinked-but-not-yet-gone on that platform,
...@@ -258,30 +262,7 @@ mdunlink(RelFileNode rnode, bool isRedo) ...@@ -258,30 +262,7 @@ mdunlink(RelFileNode rnode, bool isRedo)
* We have to clean out any pending fsync requests for the doomed relation, * We have to clean out any pending fsync requests for the doomed relation,
* else the next mdsync() will fail. * else the next mdsync() will fail.
*/ */
if (pendingOpsTable) ForgetRelationFsyncRequests(rnode);
{
/* standalone backend or startup process: fsync state is local */
RememberFsyncRequest(rnode, InvalidBlockNumber);
}
else if (IsUnderPostmaster)
{
/*
* Notify the bgwriter about it. If we fail to queue the revoke
* message, we have to sleep and try again ... ugly, but hopefully
* won't happen often.
*
* XXX should we CHECK_FOR_INTERRUPTS in this loop? Escaping with
* an error would leave the no-longer-used file still present on
* disk, which would be bad, so I'm inclined to assume that the
* bgwriter will always empty the queue soon.
*/
while (!ForwardFsyncRequest(rnode, InvalidBlockNumber))
pg_usleep(10000L); /* 10 msec seems a good number */
/*
* Note we don't wait for the bgwriter to actually absorb the
* revoke message; see mdsync() for the implications.
*/
}
path = relpath(rnode); path = relpath(rnode);
...@@ -894,7 +875,8 @@ mdsync(void) ...@@ -894,7 +875,8 @@ mdsync(void)
* what we will do is retry the whole process after absorbing fsync * what we will do is retry the whole process after absorbing fsync
* request messages again. Since mdunlink() queues a "revoke" message * request messages again. Since mdunlink() queues a "revoke" message
* before actually unlinking, the fsync request is guaranteed to be gone * before actually unlinking, the fsync request is guaranteed to be gone
* the second time if it really was this case. * the second time if it really was this case. DROP DATABASE likewise
* has to tell us to forget fsync requests before it starts deletions.
*/ */
do { do {
HASH_SEQ_STATUS hstat; HASH_SEQ_STATUS hstat;
...@@ -1043,17 +1025,58 @@ register_dirty_segment(SMgrRelation reln, MdfdVec *seg) ...@@ -1043,17 +1025,58 @@ register_dirty_segment(SMgrRelation reln, MdfdVec *seg)
* We stuff the fsync request into the local hash table for execution * We stuff the fsync request into the local hash table for execution
* during the bgwriter's next checkpoint. * during the bgwriter's next checkpoint.
* *
* segno == InvalidBlockNumber is a "revoke" request: remove any pending * The range of possible segment numbers is way less than the range of
* fsync requests for the whole relation. * BlockNumber, so we can reserve high values of segno for special purposes.
* We define two: FORGET_RELATION_FSYNC means to drop pending fsyncs for
* a relation, and FORGET_DATABASE_FSYNC means to drop pending fsyncs for
* a whole database. (These are a tad slow because the hash table has to be
* searched linearly, but it doesn't seem worth rethinking the table structure
* for them.)
*/ */
void void
RememberFsyncRequest(RelFileNode rnode, BlockNumber segno) RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
{ {
Assert(pendingOpsTable); Assert(pendingOpsTable);
if (segno != InvalidBlockNumber) if (segno == FORGET_RELATION_FSYNC)
{
/* Remove any pending requests for the entire relation */
HASH_SEQ_STATUS hstat;
PendingOperationEntry *entry;
hash_seq_init(&hstat, pendingOpsTable);
while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
{
if (RelFileNodeEquals(entry->tag.rnode, rnode))
{
/* Okay, delete this entry */
if (hash_search(pendingOpsTable, &entry->tag,
HASH_REMOVE, NULL) == NULL)
elog(ERROR, "pendingOpsTable corrupted");
}
}
}
else if (segno == FORGET_DATABASE_FSYNC)
{
/* Remove any pending requests for the entire database */
HASH_SEQ_STATUS hstat;
PendingOperationEntry *entry;
hash_seq_init(&hstat, pendingOpsTable);
while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
{
if (entry->tag.rnode.dbNode == rnode.dbNode)
{
/* Okay, delete this entry */
if (hash_search(pendingOpsTable, &entry->tag,
HASH_REMOVE, NULL) == NULL)
elog(ERROR, "pendingOpsTable corrupted");
}
}
}
else
{ {
/* Enter a request to fsync this segment */ /* Normal case: enter a request to fsync this segment */
PendingOperationTag key; PendingOperationTag key;
PendingOperationEntry *entry; PendingOperationEntry *entry;
bool found; bool found;
...@@ -1070,29 +1093,66 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno) ...@@ -1070,29 +1093,66 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
if (!found) /* new entry, so initialize it */ if (!found) /* new entry, so initialize it */
entry->failures = 0; entry->failures = 0;
} }
else }
/*
* ForgetRelationFsyncRequests -- ensure any fsyncs for a rel are forgotten
*/
void
ForgetRelationFsyncRequests(RelFileNode rnode)
{
if (pendingOpsTable)
{
/* standalone backend or startup process: fsync state is local */
RememberFsyncRequest(rnode, FORGET_RELATION_FSYNC);
}
else if (IsUnderPostmaster)
{ {
/* /*
* Remove any pending requests for the entire relation. (This is a * Notify the bgwriter about it. If we fail to queue the revoke
* tad slow but it doesn't seem worth rethinking the table structure.) * message, we have to sleep and try again ... ugly, but hopefully
* won't happen often.
*
* XXX should we CHECK_FOR_INTERRUPTS in this loop? Escaping with
* an error would leave the no-longer-used file still present on
* disk, which would be bad, so I'm inclined to assume that the
* bgwriter will always empty the queue soon.
*/ */
HASH_SEQ_STATUS hstat; while (!ForwardFsyncRequest(rnode, FORGET_RELATION_FSYNC))
PendingOperationEntry *entry; pg_usleep(10000L); /* 10 msec seems a good number */
/*
* Note we don't wait for the bgwriter to actually absorb the
* revoke message; see mdsync() for the implications.
*/
}
}
hash_seq_init(&hstat, pendingOpsTable); /*
while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL) * ForgetDatabaseFsyncRequests -- ensure any fsyncs for a DB are forgotten
{ */
if (RelFileNodeEquals(entry->tag.rnode, rnode)) void
{ ForgetDatabaseFsyncRequests(Oid dbid)
/* Okay, delete this entry */ {
if (hash_search(pendingOpsTable, &entry->tag, RelFileNode rnode;
HASH_REMOVE, NULL) == NULL)
elog(ERROR, "pendingOpsTable corrupted"); rnode.dbNode = dbid;
} rnode.spcNode = 0;
} rnode.relNode = 0;
if (pendingOpsTable)
{
/* standalone backend or startup process: fsync state is local */
RememberFsyncRequest(rnode, FORGET_DATABASE_FSYNC);
}
else if (IsUnderPostmaster)
{
/* see notes in ForgetRelationFsyncRequests */
while (!ForwardFsyncRequest(rnode, FORGET_DATABASE_FSYNC))
pg_usleep(10000L); /* 10 msec seems a good number */
} }
} }
/* /*
* _fdvec_alloc() -- Make a MdfdVec object. * _fdvec_alloc() -- Make a MdfdVec object.
*/ */
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.57 2007/01/05 22:19:58 momjian Exp $ * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.58 2007/01/17 16:25:01 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -106,6 +106,8 @@ extern void mdimmedsync(SMgrRelation reln); ...@@ -106,6 +106,8 @@ extern void mdimmedsync(SMgrRelation reln);
extern void mdsync(void); extern void mdsync(void);
extern void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno); extern void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno);
extern void ForgetRelationFsyncRequests(RelFileNode rnode);
extern void ForgetDatabaseFsyncRequests(Oid dbid);
/* smgrtype.c */ /* smgrtype.c */
extern Datum smgrout(PG_FUNCTION_ARGS); extern Datum smgrout(PG_FUNCTION_ARGS);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment