Commit 54d9e8c6 authored by Alvaro Herrera's avatar Alvaro Herrera

Use "transient" files for blind writes

"Blind writes" are a mechanism to push buffers down to disk when
evicting them; since they may belong to different databases than the one
a backend is connected to, the backend does not necessarily have a
relation to link them to, and thus no way to blow them away.  We were
keeping those files open indefinitely, which would cause a problem if
the underlying table was deleted, because the operating system would not
be able to reclaim the disk space used by those files.

To fix, have bufmgr mark such files as transient to smgr; the lower
layer is allowed to close the file descriptor when the current
transaction ends.  We must be careful to have any other access of the
file to remove the transient markings, to prevent unnecessary expensive
system calls when evicting buffers belonging to our own database (which
files we're likely to require again soon.)
parent 74b1d29d
...@@ -1834,7 +1834,10 @@ BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum, ...@@ -1834,7 +1834,10 @@ BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum,
* written.) * written.)
* *
* If the caller has an smgr reference for the buffer's relation, pass it * If the caller has an smgr reference for the buffer's relation, pass it
* as the second parameter. If not, pass NULL. * as the second parameter. If not, pass NULL. In the latter case, the
* relation will be marked as "transient" so that the corresponding
* kernel-level file descriptors are closed when the current transaction ends,
* if any.
*/ */
static void static void
FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln) FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
...@@ -1856,9 +1859,12 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln) ...@@ -1856,9 +1859,12 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
errcontext.previous = error_context_stack; errcontext.previous = error_context_stack;
error_context_stack = &errcontext; error_context_stack = &errcontext;
/* Find smgr relation for buffer */ /* Find smgr relation for buffer, and mark it as transient */
if (reln == NULL) if (reln == NULL)
{
reln = smgropen(buf->tag.rnode, InvalidBackendId); reln = smgropen(buf->tag.rnode, InvalidBackendId);
smgrsettransient(reln);
}
TRACE_POSTGRESQL_BUFFER_FLUSH_START(buf->tag.forkNum, TRACE_POSTGRESQL_BUFFER_FLUSH_START(buf->tag.forkNum,
buf->tag.blockNum, buf->tag.blockNum,
......
...@@ -125,12 +125,11 @@ static int max_safe_fds = 32; /* default if not changed */ ...@@ -125,12 +125,11 @@ static int max_safe_fds = 32; /* default if not changed */
/* these are the assigned bits in fdstate below: */ /* these are the assigned bits in fdstate below: */
#define FD_TEMPORARY (1 << 0) /* T = delete when closed */ #define FD_TEMPORARY (1 << 0) /* T = delete when closed */
#define FD_XACT_TEMPORARY (1 << 1) /* T = delete at eoXact */ #define FD_XACT_TEMPORARY (1 << 1) /* T = delete at eoXact */
#define FD_XACT_TRANSIENT (1 << 2) /* T = close (not delete) at aoXact,
* but keep VFD */
/* /* Flag to tell whether there are files to close/delete at end of transaction */
* Flag to tell whether it's worth scanning VfdCache looking for temp files to static bool have_pending_fd_cleanup = false;
* close
*/
static bool have_xact_temporary_files = false;
typedef struct vfd typedef struct vfd
{ {
...@@ -953,7 +952,7 @@ OpenTemporaryFile(bool interXact) ...@@ -953,7 +952,7 @@ OpenTemporaryFile(bool interXact)
VfdCache[file].resowner = CurrentResourceOwner; VfdCache[file].resowner = CurrentResourceOwner;
/* ensure cleanup happens at eoxact */ /* ensure cleanup happens at eoxact */
have_xact_temporary_files = true; have_pending_fd_cleanup = true;
} }
return file; return file;
...@@ -1026,6 +1025,45 @@ OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError) ...@@ -1026,6 +1025,45 @@ OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError)
return file; return file;
} }
/*
* Set the transient flag on a file
*
* This flag tells CleanupTempFiles to close the kernel-level file descriptor
* (but not the VFD itself) at end of transaction.
*/
void
FileSetTransient(File file)
{
Vfd *vfdP;
Assert(FileIsValid(file));
vfdP = &VfdCache[file];
vfdP->fdstate |= FD_XACT_TRANSIENT;
have_pending_fd_cleanup = true;
}
/*
* Close a file at the kernel level, but keep the VFD open
*/
static void
FileKernelClose(File file)
{
Vfd *vfdP;
Assert(FileIsValid(file));
vfdP = &VfdCache[file];
if (!FileIsNotOpen(file))
{
if (close(vfdP->fd))
elog(ERROR, "could not close file \"%s\": %m", vfdP->fileName);
vfdP->fd = VFD_CLOSED;
}
}
/* /*
* close a file when done with it * close a file when done with it
*/ */
...@@ -1778,8 +1816,9 @@ AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid, ...@@ -1778,8 +1816,9 @@ AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid,
* particularly care which). All still-open per-transaction temporary file * particularly care which). All still-open per-transaction temporary file
* VFDs are closed, which also causes the underlying files to be deleted * VFDs are closed, which also causes the underlying files to be deleted
* (although they should've been closed already by the ResourceOwner * (although they should've been closed already by the ResourceOwner
* cleanup). Furthermore, all "allocated" stdio files are closed. We also * cleanup). Transient files have their kernel file descriptors closed.
* forget any transaction-local temp tablespace list. * Furthermore, all "allocated" stdio files are closed. We also forget any
* transaction-local temp tablespace list.
*/ */
void void
AtEOXact_Files(void) AtEOXact_Files(void)
...@@ -1802,7 +1841,10 @@ AtProcExit_Files(int code, Datum arg) ...@@ -1802,7 +1841,10 @@ AtProcExit_Files(int code, Datum arg)
} }
/* /*
* Close temporary files and delete their underlying files. * General cleanup routine for fd.c.
*
* Temporary files are closed, and their underlying files deleted.
* Transient files are closed.
* *
* isProcExit: if true, this is being called as the backend process is * isProcExit: if true, this is being called as the backend process is
* exiting. If that's the case, we should remove all temporary files; if * exiting. If that's the case, we should remove all temporary files; if
...@@ -1819,35 +1861,49 @@ CleanupTempFiles(bool isProcExit) ...@@ -1819,35 +1861,49 @@ CleanupTempFiles(bool isProcExit)
* Careful here: at proc_exit we need extra cleanup, not just * Careful here: at proc_exit we need extra cleanup, not just
* xact_temporary files. * xact_temporary files.
*/ */
if (isProcExit || have_xact_temporary_files) if (isProcExit || have_pending_fd_cleanup)
{ {
Assert(FileIsNotOpen(0)); /* Make sure ring not corrupted */ Assert(FileIsNotOpen(0)); /* Make sure ring not corrupted */
for (i = 1; i < SizeVfdCache; i++) for (i = 1; i < SizeVfdCache; i++)
{ {
unsigned short fdstate = VfdCache[i].fdstate; unsigned short fdstate = VfdCache[i].fdstate;
if ((fdstate & FD_TEMPORARY) && VfdCache[i].fileName != NULL) if (VfdCache[i].fileName != NULL)
{ {
/* if (fdstate & FD_TEMPORARY)
* If we're in the process of exiting a backend process, close {
* all temporary files. Otherwise, only close temporary files /*
* local to the current transaction. They should be closed by * If we're in the process of exiting a backend process, close
* the ResourceOwner mechanism already, so this is just a * all temporary files. Otherwise, only close temporary files
* debugging cross-check. * local to the current transaction. They should be closed by
*/ * the ResourceOwner mechanism already, so this is just a
if (isProcExit) * debugging cross-check.
FileClose(i); */
else if (fdstate & FD_XACT_TEMPORARY) if (isProcExit)
FileClose(i);
else if (fdstate & FD_XACT_TEMPORARY)
{
elog(WARNING,
"temporary file %s not closed at end-of-transaction",
VfdCache[i].fileName);
FileClose(i);
}
}
else if (fdstate & FD_XACT_TRANSIENT)
{ {
elog(WARNING, /*
"temporary file %s not closed at end-of-transaction", * Close the kernel file descriptor, but also remove the
VfdCache[i].fileName); * flag from the VFD. This is to ensure that if the VFD is
FileClose(i); * reused in the future for non-transient access, we don't
* close it inappropriately then.
*/
FileKernelClose(i);
VfdCache[i].fdstate &= ~FD_XACT_TRANSIENT;
} }
} }
} }
have_xact_temporary_files = false; have_pending_fd_cleanup = false;
} }
/* Clean up "allocated" stdio files and dirs. */ /* Clean up "allocated" stdio files and dirs. */
......
...@@ -288,6 +288,9 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo) ...@@ -288,6 +288,9 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
pfree(path); pfree(path);
if (reln->smgr_transient)
FileSetTransient(fd);
reln->md_fd[forkNum] = _fdvec_alloc(); reln->md_fd[forkNum] = _fdvec_alloc();
reln->md_fd[forkNum]->mdfd_vfd = fd; reln->md_fd[forkNum]->mdfd_vfd = fd;
...@@ -542,6 +545,9 @@ mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior) ...@@ -542,6 +545,9 @@ mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior)
pfree(path); pfree(path);
if (reln->smgr_transient)
FileSetTransient(fd);
reln->md_fd[forknum] = mdfd = _fdvec_alloc(); reln->md_fd[forknum] = mdfd = _fdvec_alloc();
mdfd->mdfd_vfd = fd; mdfd->mdfd_vfd = fd;
...@@ -1556,6 +1562,9 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, ...@@ -1556,6 +1562,9 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno,
if (fd < 0) if (fd < 0)
return NULL; return NULL;
if (reln->smgr_transient)
FileSetTransient(fd);
/* allocate an mdfdvec entry for it */ /* allocate an mdfdvec entry for it */
v = _fdvec_alloc(); v = _fdvec_alloc();
......
...@@ -165,16 +165,33 @@ smgropen(RelFileNode rnode, BackendId backend) ...@@ -165,16 +165,33 @@ smgropen(RelFileNode rnode, BackendId backend)
reln->smgr_targblock = InvalidBlockNumber; reln->smgr_targblock = InvalidBlockNumber;
reln->smgr_fsm_nblocks = InvalidBlockNumber; reln->smgr_fsm_nblocks = InvalidBlockNumber;
reln->smgr_vm_nblocks = InvalidBlockNumber; reln->smgr_vm_nblocks = InvalidBlockNumber;
reln->smgr_transient = false;
reln->smgr_which = 0; /* we only have md.c at present */ reln->smgr_which = 0; /* we only have md.c at present */
/* mark it not open */ /* mark it not open */
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
reln->md_fd[forknum] = NULL; reln->md_fd[forknum] = NULL;
} }
else
/* if it was transient before, it no longer is */
reln->smgr_transient = false;
return reln; return reln;
} }
/*
* smgrsettransient() -- mark an SMgrRelation object as transaction-bound
*
* The main effect of this is that all opened files are marked to be
* kernel-level closed (but not necessarily VFD-closed) when the current
* transaction ends.
*/
void
smgrsettransient(SMgrRelation reln)
{
reln->smgr_transient = true;
}
/* /*
* smgrsetowner() -- Establish a long-lived reference to an SMgrRelation object * smgrsetowner() -- Establish a long-lived reference to an SMgrRelation object
* *
......
...@@ -61,6 +61,7 @@ extern int max_files_per_process; ...@@ -61,6 +61,7 @@ extern int max_files_per_process;
/* Operations on virtual Files --- equivalent to Unix kernel file ops */ /* Operations on virtual Files --- equivalent to Unix kernel file ops */
extern File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode); extern File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode);
extern File OpenTemporaryFile(bool interXact); extern File OpenTemporaryFile(bool interXact);
extern void FileSetTransient(File file);
extern void FileClose(File file); extern void FileClose(File file);
extern int FilePrefetch(File file, off_t offset, int amount); extern int FilePrefetch(File file, off_t offset, int amount);
extern int FileRead(File file, char *buffer, int amount); extern int FileRead(File file, char *buffer, int amount);
......
...@@ -62,6 +62,7 @@ typedef struct SMgrRelationData ...@@ -62,6 +62,7 @@ typedef struct SMgrRelationData
* submodules. Do not touch them from elsewhere. * submodules. Do not touch them from elsewhere.
*/ */
int smgr_which; /* storage manager selector */ int smgr_which; /* storage manager selector */
bool smgr_transient; /* T if files are to be closed at EOXact */
/* for md.c; NULL for forks that are not open */ /* for md.c; NULL for forks that are not open */
struct _MdfdVec *md_fd[MAX_FORKNUM + 1]; struct _MdfdVec *md_fd[MAX_FORKNUM + 1];
...@@ -74,6 +75,7 @@ typedef SMgrRelationData *SMgrRelation; ...@@ -74,6 +75,7 @@ typedef SMgrRelationData *SMgrRelation;
extern void smgrinit(void); extern void smgrinit(void);
extern SMgrRelation smgropen(RelFileNode rnode, BackendId backend); extern SMgrRelation smgropen(RelFileNode rnode, BackendId backend);
extern void smgrsettransient(SMgrRelation reln);
extern bool smgrexists(SMgrRelation reln, ForkNumber forknum); extern bool smgrexists(SMgrRelation reln, ForkNumber forknum);
extern void smgrsetowner(SMgrRelation *owner, SMgrRelation reln); extern void smgrsetowner(SMgrRelation *owner, SMgrRelation reln);
extern void smgrclose(SMgrRelation reln); extern void smgrclose(SMgrRelation reln);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment