Commit 79ccd7cb authored by Robert Haas's avatar Robert Haas

pg_prewarm: Add automatic prewarm feature.

Periodically while the server is running, and at shutdown, write out a
list of blocks in shared buffers.  When the server reaches consistency
-- unfortunatey, we can't do it before that point without breaking
things -- reload those blocks into any still-unused shared buffers.

Mithun Cy and Robert Haas, reviewed and tested by Beena Emerson,
Amit Kapila, Jim Nasby, and Rafia Sabih.

Discussion: http://postgr.es/m/CAD__OugubOs1Vy7kgF6xTjmEqTR4CrGAv8w+ZbaY_+MZeitukw@mail.gmail.com
parent 66ed3829
# contrib/pg_prewarm/Makefile # contrib/pg_prewarm/Makefile
MODULE_big = pg_prewarm MODULE_big = pg_prewarm
OBJS = pg_prewarm.o $(WIN32RES) OBJS = pg_prewarm.o autoprewarm.o $(WIN32RES)
EXTENSION = pg_prewarm EXTENSION = pg_prewarm
DATA = pg_prewarm--1.1.sql pg_prewarm--1.0--1.1.sql DATA = pg_prewarm--1.1--1.2.sql pg_prewarm--1.1.sql pg_prewarm--1.0--1.1.sql
PGFILEDESC = "pg_prewarm - preload relation data into system buffer cache" PGFILEDESC = "pg_prewarm - preload relation data into system buffer cache"
ifdef USE_PGXS ifdef USE_PGXS
......
/*-------------------------------------------------------------------------
*
* autoprewarm.c
* Periodically dump information about the blocks present in
* shared_buffers, and reload them on server restart.
*
* Due to locking considerations, we can't actually begin prewarming
* until the server reaches a consistent state. We need the catalogs
* to be consistent so that we can figure out which relation to lock,
* and we need to lock the relations so that we don't try to prewarm
* pages from a relation that is in the process of being dropped.
*
* While prewarming, autoprewarm will use two workers. There's a
* master worker that reads and sorts the list of blocks to be
* prewarmed and then launches a per-database worker for each
* relevant database in turn. The former keeps running after the
* initial prewarm is complete to update the dump file periodically.
*
* Copyright (c) 2016-2017, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/pg_prewarm/autoprewarm.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <unistd.h>
#include "access/heapam.h"
#include "access/xact.h"
#include "catalog/pg_class.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/bgworker.h"
#include "storage/buf_internals.h"
#include "storage/dsm.h"
#include "storage/ipc.h"
#include "storage/latch.h"
#include "storage/lwlock.h"
#include "storage/proc.h"
#include "storage/procsignal.h"
#include "storage/shmem.h"
#include "storage/smgr.h"
#include "tcop/tcopprot.h"
#include "utils/acl.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/relfilenodemap.h"
#include "utils/resowner.h"
#define AUTOPREWARM_FILE "autoprewarm.blocks"
/* Metadata for each block we dump. */
typedef struct BlockInfoRecord
{
Oid database;
Oid tablespace;
Oid filenode;
ForkNumber forknum;
BlockNumber blocknum;
} BlockInfoRecord;
/* Shared state information for autoprewarm bgworker. */
typedef struct AutoPrewarmSharedState
{
LWLock lock; /* mutual exclusion */
pid_t bgworker_pid; /* for main bgworker */
pid_t pid_using_dumpfile; /* for autoprewarm or block dump */
/* Following items are for communication with per-database worker */
dsm_handle block_info_handle;
Oid database;
int64 prewarm_start_idx;
int64 prewarm_stop_idx;
int64 prewarmed_blocks;
} AutoPrewarmSharedState;
void _PG_init(void);
void autoprewarm_main(Datum main_arg);
void autoprewarm_database_main(Datum main_arg);
PG_FUNCTION_INFO_V1(autoprewarm_start_worker);
PG_FUNCTION_INFO_V1(autoprewarm_dump_now);
static void apw_load_buffers(void);
static int64 apw_dump_now(bool is_bgworker, bool dump_unlogged);
static void apw_start_master_worker(void);
static void apw_start_database_worker(void);
static bool apw_init_shmem(void);
static void apw_detach_shmem(int code, Datum arg);
static int apw_compare_blockinfo(const void *p, const void *q);
static void apw_sigterm_handler(SIGNAL_ARGS);
static void apw_sighup_handler(SIGNAL_ARGS);
/* Flags set by signal handlers */
static volatile sig_atomic_t got_sigterm = false;
static volatile sig_atomic_t got_sighup = false;
/* Pointer to shared-memory state. */
static AutoPrewarmSharedState *apw_state = NULL;
/* GUC variables. */
static bool autoprewarm = true; /* start worker? */
static int autoprewarm_interval; /* dump interval */
/*
* Module load callback.
*/
void
_PG_init(void)
{
DefineCustomIntVariable("pg_prewarm.autoprewarm_interval",
"Sets the interval between dumps of shared buffers",
"If set to zero, time-based dumping is disabled.",
&autoprewarm_interval,
300,
0, INT_MAX / 1000,
PGC_SIGHUP,
GUC_UNIT_S,
NULL,
NULL,
NULL);
if (!process_shared_preload_libraries_in_progress)
return;
/* can't define PGC_POSTMASTER variable after startup */
DefineCustomBoolVariable("pg_prewarm.autoprewarm",
"Starts the autoprewarm worker.",
NULL,
&autoprewarm,
true,
PGC_POSTMASTER,
0,
NULL,
NULL,
NULL);
EmitWarningsOnPlaceholders("pg_prewarm");
RequestAddinShmemSpace(MAXALIGN(sizeof(AutoPrewarmSharedState)));
/* Register autoprewarm worker, if enabled. */
if (autoprewarm)
apw_start_master_worker();
}
/*
* Main entry point for the master autoprewarm process. Per-database workers
* have a separate entry point.
*/
void
autoprewarm_main(Datum main_arg)
{
bool first_time = true;
TimestampTz last_dump_time = 0;
/* Establish signal handlers; once that's done, unblock signals. */
pqsignal(SIGTERM, apw_sigterm_handler);
pqsignal(SIGHUP, apw_sighup_handler);
pqsignal(SIGUSR1, procsignal_sigusr1_handler);
BackgroundWorkerUnblockSignals();
/* Create (if necessary) and attach to our shared memory area. */
if (apw_init_shmem())
first_time = false;
/* Set on-detach hook so that our PID will be cleared on exit. */
on_shmem_exit(apw_detach_shmem, 0);
/*
* Store our PID in the shared memory area --- unless there's already
* another worker running, in which case just exit.
*/
LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
if (apw_state->bgworker_pid != InvalidPid)
{
LWLockRelease(&apw_state->lock);
ereport(LOG,
(errmsg("autoprewarm worker is already running under PID %d",
apw_state->bgworker_pid)));
return;
}
apw_state->bgworker_pid = MyProcPid;
LWLockRelease(&apw_state->lock);
/*
* Preload buffers from the dump file only if we just created the shared
* memory region. Otherwise, it's either already been done or shouldn't
* be done - e.g. because the old dump file has been overwritten since the
* server was started.
*
* There's not much point in performing a dump immediately after we finish
* preloading; so, if we do end up preloading, consider the last dump time
* to be equal to the current time.
*/
if (first_time)
{
apw_load_buffers();
last_dump_time = GetCurrentTimestamp();
}
/* Periodically dump buffers until terminated. */
while (!got_sigterm)
{
int rc;
/* In case of a SIGHUP, just reload the configuration. */
if (got_sighup)
{
got_sighup = false;
ProcessConfigFile(PGC_SIGHUP);
}
if (autoprewarm_interval <= 0)
{
/* We're only dumping at shutdown, so just wait forever. */
rc = WaitLatch(&MyProc->procLatch,
WL_LATCH_SET | WL_POSTMASTER_DEATH,
-1L,
PG_WAIT_EXTENSION);
}
else
{
long delay_in_ms = 0;
TimestampTz next_dump_time = 0;
long secs = 0;
int usecs = 0;
/* Compute the next dump time. */
next_dump_time =
TimestampTzPlusMilliseconds(last_dump_time,
autoprewarm_interval * 1000);
TimestampDifference(GetCurrentTimestamp(), next_dump_time,
&secs, &usecs);
delay_in_ms = secs + (usecs / 1000);
/* Perform a dump if it's time. */
if (delay_in_ms <= 0)
{
last_dump_time = GetCurrentTimestamp();
apw_dump_now(true, false);
continue;
}
/* Sleep until the next dump time. */
rc = WaitLatch(&MyProc->procLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
delay_in_ms,
PG_WAIT_EXTENSION);
}
/* Reset the latch, bail out if postmaster died, otherwise loop. */
ResetLatch(&MyProc->procLatch);
if (rc & WL_POSTMASTER_DEATH)
proc_exit(1);
}
/*
* Dump one last time. We assume this is probably the result of a system
* shutdown, although it's possible that we've merely been terminated.
*/
apw_dump_now(true, true);
}
/*
* Read the dump file and launch per-database workers one at a time to
* prewarm the buffers found there.
*/
static void
apw_load_buffers(void)
{
FILE *file = NULL;
int64 num_elements,
i;
BlockInfoRecord *blkinfo;
dsm_segment *seg;
/*
* Skip the prewarm if the dump file is in use; otherwise, prevent any
* other process from writing it while we're using it.
*/
LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
if (apw_state->pid_using_dumpfile == InvalidPid)
apw_state->pid_using_dumpfile = MyProcPid;
else
{
LWLockRelease(&apw_state->lock);
ereport(LOG,
(errmsg("skipping prewarm because block dump file is being written by PID %d",
apw_state->pid_using_dumpfile)));
return;
}
LWLockRelease(&apw_state->lock);
/*
* Open the block dump file. Exit quietly if it doesn't exist, but report
* any other error.
*/
file = AllocateFile(AUTOPREWARM_FILE, "r");
if (!file)
{
if (errno == ENOENT)
{
LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
apw_state->pid_using_dumpfile = InvalidPid;
LWLockRelease(&apw_state->lock);
return; /* No file to load. */
}
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m",
AUTOPREWARM_FILE)));
}
/* First line of the file is a record count. */
if (fscanf(file, "<<" INT64_FORMAT ">>\n", &num_elements) != 1)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read from file \"%s\": %m",
AUTOPREWARM_FILE)));
/* Allocate a dynamic shared memory segment to store the record data. */
seg = dsm_create(sizeof(BlockInfoRecord) * num_elements, 0);
blkinfo = (BlockInfoRecord *) dsm_segment_address(seg);
/* Read records, one per line. */
for (i = 0; i < num_elements; i++)
{
unsigned forknum;
if (fscanf(file, "%u,%u,%u,%u,%u\n", &blkinfo[i].database,
&blkinfo[i].tablespace, &blkinfo[i].filenode,
&forknum, &blkinfo[i].blocknum) != 5)
ereport(ERROR,
(errmsg("autoprewarm block dump file is corrupted at line " INT64_FORMAT,
i + 1)));
blkinfo[i].forknum = forknum;
}
FreeFile(file);
/* Sort the blocks to be loaded. */
pg_qsort(blkinfo, num_elements, sizeof(BlockInfoRecord),
apw_compare_blockinfo);
/* Populate shared memory state. */
apw_state->block_info_handle = dsm_segment_handle(seg);
apw_state->prewarm_start_idx = apw_state->prewarm_stop_idx = 0;
apw_state->prewarmed_blocks = 0;
/* Get the info position of the first block of the next database. */
while (apw_state->prewarm_start_idx < num_elements)
{
uint32 i = apw_state->prewarm_start_idx;
Oid current_db = blkinfo[i].database;
/*
* Advance the prewarm_stop_idx to the first BlockRecordInfo that does
* not belong to this database.
*/
i++;
while (i < num_elements)
{
if (current_db != blkinfo[i].database)
{
/*
* Combine BlockRecordInfos for global objects withs those of
* the database.
*/
if (current_db != InvalidOid)
break;
current_db = blkinfo[i].database;
}
i++;
}
/*
* If we reach this point with current_db == InvalidOid, then only
* BlockRecordInfos belonging to global objects exist. We can't
* prewarm without a database connection, so just bail out.
*/
if (current_db == InvalidOid)
break;
/* Configure stop point and database for next per-database worker. */
apw_state->prewarm_stop_idx = i;
apw_state->database = current_db;
Assert(apw_state->prewarm_start_idx < apw_state->prewarm_stop_idx);
/* If we've run out of free buffers, don't launch another worker. */
if (!have_free_buffer())
break;
/*
* Start a per-database worker to load blocks for this database; this
* function will return once the per-database worker exits.
*/
apw_start_database_worker();
/* Prepare for next database. */
apw_state->prewarm_start_idx = apw_state->prewarm_stop_idx;
}
/* Clean up. */
dsm_detach(seg);
LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
apw_state->block_info_handle = DSM_HANDLE_INVALID;
apw_state->pid_using_dumpfile = InvalidPid;
LWLockRelease(&apw_state->lock);
/* Report our success. */
ereport(LOG,
(errmsg("autoprewarm successfully prewarmed " INT64_FORMAT
" of " INT64_FORMAT " previously-loaded blocks",
apw_state->prewarmed_blocks, num_elements)));
}
/*
* Prewarm all blocks for one database (and possibly also global objects, if
* those got grouped with this database).
*/
void
autoprewarm_database_main(Datum main_arg)
{
uint32 pos;
BlockInfoRecord *block_info;
Relation rel = NULL;
BlockNumber nblocks = 0;
BlockInfoRecord *old_blk = NULL;
dsm_segment *seg;
/* Establish signal handlers; once that's done, unblock signals. */
pqsignal(SIGTERM, die);
BackgroundWorkerUnblockSignals();
/* Connect to correct database and get block information. */
apw_init_shmem();
seg = dsm_attach(apw_state->block_info_handle);
if (seg == NULL)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("could not map dynamic shared memory segment")));
BackgroundWorkerInitializeConnectionByOid(apw_state->database, InvalidOid);
block_info = (BlockInfoRecord *) dsm_segment_address(seg);
pos = apw_state->prewarm_start_idx;
/*
* Loop until we run out of blocks to prewarm or until we run out of free
* buffers.
*/
while (pos < apw_state->prewarm_stop_idx && have_free_buffer())
{
BlockInfoRecord *blk = &block_info[pos++];
Buffer buf;
CHECK_FOR_INTERRUPTS();
/*
* Quit if we've reached records for another database. If previous
* blocks are of some global objects, then continue pre-warming.
*/
if (old_blk != NULL && old_blk->database != blk->database &&
old_blk->database != 0)
break;
/*
* As soon as we encounter a block of a new relation, close the old
* relation. Note that rel will be NULL if try_relation_open failed
* previously; in that case, there is nothing to close.
*/
if (old_blk != NULL && old_blk->filenode != blk->filenode &&
rel != NULL)
{
relation_close(rel, AccessShareLock);
rel = NULL;
CommitTransactionCommand();
}
/*
* Try to open each new relation, but only once, when we first
* encounter it. If it's been dropped, skip the associated blocks.
*/
if (old_blk == NULL || old_blk->filenode != blk->filenode)
{
Oid reloid;
Assert(rel == NULL);
StartTransactionCommand();
reloid = RelidByRelfilenode(blk->tablespace, blk->filenode);
if (OidIsValid(reloid))
rel = try_relation_open(reloid, AccessShareLock);
if (!rel)
CommitTransactionCommand();
}
if (!rel)
{
old_blk = blk;
continue;
}
/* Once per fork, check for fork existence and size. */
if (old_blk == NULL ||
old_blk->filenode != blk->filenode ||
old_blk->forknum != blk->forknum)
{
RelationOpenSmgr(rel);
/*
* smgrexists is not safe for illegal forknum, hence check whether
* the passed forknum is valid before using it in smgrexists.
*/
if (blk->forknum > InvalidForkNumber &&
blk->forknum <= MAX_FORKNUM &&
smgrexists(rel->rd_smgr, blk->forknum))
nblocks = RelationGetNumberOfBlocksInFork(rel, blk->forknum);
else
nblocks = 0;
}
/* Check whether blocknum is valid and within fork file size. */
if (blk->blocknum >= nblocks)
{
/* Move to next forknum. */
old_blk = blk;
continue;
}
/* Prewarm buffer. */
buf = ReadBufferExtended(rel, blk->forknum, blk->blocknum, RBM_NORMAL,
NULL);
if (BufferIsValid(buf))
{
apw_state->prewarmed_blocks++;
ReleaseBuffer(buf);
}
old_blk = blk;
}
dsm_detach(seg);
/* Release lock on previous relation. */
if (rel)
{
relation_close(rel, AccessShareLock);
CommitTransactionCommand();
}
}
/*
* Dump information on blocks in shared buffers. We use a text format here
* so that it's easy to understand and even change the file contents if
* necessary.
*/
static int64
apw_dump_now(bool is_bgworker, bool dump_unlogged)
{
uint32 i;
int ret;
int64 num_blocks;
BlockInfoRecord *block_info_array;
BufferDesc *bufHdr;
FILE *file;
char transient_dump_file_path[MAXPGPATH];
pid_t pid;
LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
pid = apw_state->pid_using_dumpfile;
if (apw_state->pid_using_dumpfile == InvalidPid)
apw_state->pid_using_dumpfile = MyProcPid;
LWLockRelease(&apw_state->lock);
if (pid != InvalidPid)
{
if (!is_bgworker)
ereport(ERROR,
(errmsg("could not perform block dump because dump file is being used by PID %d",
apw_state->pid_using_dumpfile)));
ereport(LOG,
(errmsg("skipping block dump because it is already being performed by PID %d",
apw_state->pid_using_dumpfile)));
return 0;
}
block_info_array =
(BlockInfoRecord *) palloc(sizeof(BlockInfoRecord) * NBuffers);
for (num_blocks = 0, i = 0; i < NBuffers; i++)
{
uint32 buf_state;
CHECK_FOR_INTERRUPTS();
bufHdr = GetBufferDescriptor(i);
/* Lock each buffer header before inspecting. */
buf_state = LockBufHdr(bufHdr);
/*
* Unlogged tables will be automatically truncated after a crash or
* unclean shutdown. In such cases we need not prewarm them. Dump them
* only if requested by caller.
*/
if (buf_state & BM_TAG_VALID &&
((buf_state & BM_PERMANENT) || dump_unlogged))
{
block_info_array[num_blocks].database = bufHdr->tag.rnode.dbNode;
block_info_array[num_blocks].tablespace = bufHdr->tag.rnode.spcNode;
block_info_array[num_blocks].filenode = bufHdr->tag.rnode.relNode;
block_info_array[num_blocks].forknum = bufHdr->tag.forkNum;
block_info_array[num_blocks].blocknum = bufHdr->tag.blockNum;
++num_blocks;
}
UnlockBufHdr(bufHdr, buf_state);
}
snprintf(transient_dump_file_path, MAXPGPATH, "%s.tmp", AUTOPREWARM_FILE);
file = AllocateFile(transient_dump_file_path, "w");
if (!file)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m",
transient_dump_file_path)));
ret = fprintf(file, "<<" INT64_FORMAT ">>\n", num_blocks);
if (ret < 0)
{
int save_errno = errno;
FreeFile(file);
unlink(transient_dump_file_path);
errno = save_errno;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\" : %m",
transient_dump_file_path)));
}
for (i = 0; i < num_blocks; i++)
{
CHECK_FOR_INTERRUPTS();
ret = fprintf(file, "%u,%u,%u,%u,%u\n",
block_info_array[i].database,
block_info_array[i].tablespace,
block_info_array[i].filenode,
(uint32) block_info_array[i].forknum,
block_info_array[i].blocknum);
if (ret < 0)
{
int save_errno = errno;
FreeFile(file);
unlink(transient_dump_file_path);
errno = save_errno;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\" : %m",
transient_dump_file_path)));
}
}
pfree(block_info_array);
/*
* Rename transient_dump_file_path to AUTOPREWARM_FILE to make things
* permanent.
*/
ret = FreeFile(file);
if (ret != 0)
{
int save_errno = errno;
unlink(transient_dump_file_path);
errno = save_errno;
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not close file \"%s\" : %m",
transient_dump_file_path)));
}
(void) durable_rename(transient_dump_file_path, AUTOPREWARM_FILE, ERROR);
apw_state->pid_using_dumpfile = InvalidPid;
ereport(DEBUG1,
(errmsg("wrote block details for " INT64_FORMAT " blocks",
num_blocks)));
return num_blocks;
}
/*
* SQL-callable function to launch autoprewarm.
*/
Datum
autoprewarm_start_worker(PG_FUNCTION_ARGS)
{
pid_t pid;
if (!autoprewarm)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("autoprewarm is disabled")));
apw_init_shmem();
LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
pid = apw_state->bgworker_pid;
LWLockRelease(&apw_state->lock);
if (pid != InvalidPid)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("autoprewarm worker is already running under PID %d",
pid)));
apw_start_master_worker();
PG_RETURN_VOID();
}
/*
* SQL-callable function to perform an immediate block dump.
*/
Datum
autoprewarm_dump_now(PG_FUNCTION_ARGS)
{
int64 num_blocks;
apw_init_shmem();
PG_ENSURE_ERROR_CLEANUP(apw_detach_shmem, 0);
{
num_blocks = apw_dump_now(false, true);
}
PG_END_ENSURE_ERROR_CLEANUP(apw_detach_shmem, 0);
PG_RETURN_INT64(num_blocks);
}
/*
* Allocate and initialize autoprewarm related shared memory, if not already
* done, and set up backend-local pointer to that state. Returns true if an
* existing shared memory segment was found.
*/
static bool
apw_init_shmem(void)
{
bool found;
LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
apw_state = ShmemInitStruct("autoprewarm",
sizeof(AutoPrewarmSharedState),
&found);
if (!found)
{
/* First time through ... */
LWLockInitialize(&apw_state->lock, LWLockNewTrancheId());
apw_state->bgworker_pid = InvalidPid;
apw_state->pid_using_dumpfile = InvalidPid;
}
LWLockRelease(AddinShmemInitLock);
return found;
}
/*
* Clear our PID from autoprewarm shared state.
*/
static void
apw_detach_shmem(int code, Datum arg)
{
LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
if (apw_state->pid_using_dumpfile == MyProcPid)
apw_state->pid_using_dumpfile = InvalidPid;
if (apw_state->bgworker_pid == MyProcPid)
apw_state->bgworker_pid = InvalidPid;
LWLockRelease(&apw_state->lock);
}
/*
* Start autoprewarm master worker process.
*/
static void
apw_start_master_worker(void)
{
BackgroundWorker worker;
BackgroundWorkerHandle *handle;
BgwHandleStatus status;
pid_t pid;
MemSet(&worker, 0, sizeof(BackgroundWorker));
worker.bgw_flags = BGWORKER_SHMEM_ACCESS;
worker.bgw_start_time = BgWorkerStart_ConsistentState;
strcpy(worker.bgw_library_name, "pg_prewarm");
strcpy(worker.bgw_function_name, "autoprewarm_main");
strcpy(worker.bgw_name, "autoprewarm");
if (process_shared_preload_libraries_in_progress)
{
RegisterBackgroundWorker(&worker);
return;
}
/* must set notify PID to wait for startup */
worker.bgw_notify_pid = MyProcPid;
if (!RegisterDynamicBackgroundWorker(&worker, &handle))
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
errmsg("could not register background process"),
errhint("You may need to increase max_worker_processes.")));
status = WaitForBackgroundWorkerStartup(handle, &pid);
if (status != BGWH_STARTED)
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
errmsg("could not start background process"),
errhint("More details may be available in the server log.")));
}
/*
* Start autoprewarm per-database worker process.
*/
static void
apw_start_database_worker(void)
{
BackgroundWorker worker;
BackgroundWorkerHandle *handle;
MemSet(&worker, 0, sizeof(BackgroundWorker));
worker.bgw_flags =
BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
worker.bgw_start_time = BgWorkerStart_ConsistentState;
strcpy(worker.bgw_library_name, "pg_prewarm");
strcpy(worker.bgw_function_name, "autoprewarm_database_main");
strcpy(worker.bgw_name, "autoprewarm");
/* must set notify PID to wait for shutdown */
worker.bgw_notify_pid = MyProcPid;
if (!RegisterDynamicBackgroundWorker(&worker, &handle))
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
errmsg("registering dynamic bgworker autoprewarm failed"),
errhint("Consider increasing configuration parameter \"max_worker_processes\".")));
/*
* Ignore return value; if it fails, postmaster has died, but we have
* checks for that elsewhere.
*/
WaitForBackgroundWorkerShutdown(handle);
}
/* Compare member elements to check whether they are not equal. */
#define cmp_member_elem(fld) \
do { \
if (a->fld < b->fld) \
return -1; \
else if (a->fld > b->fld) \
return 1; \
} while(0);
/*
* apw_compare_blockinfo
*
* We depend on all records for a particular database being consecutive
* in the dump file; each per-database worker will preload blocks until
* it sees a block for some other database. Sorting by tablespace,
* filenode, forknum, and blocknum isn't critical for correctness, but
* helps us get a sequential I/O pattern.
*/
static int
apw_compare_blockinfo(const void *p, const void *q)
{
BlockInfoRecord *a = (BlockInfoRecord *) p;
BlockInfoRecord *b = (BlockInfoRecord *) q;
cmp_member_elem(database);
cmp_member_elem(tablespace);
cmp_member_elem(filenode);
cmp_member_elem(forknum);
cmp_member_elem(blocknum);
return 0;
}
/*
* Signal handler for SIGTERM
*/
static void
apw_sigterm_handler(SIGNAL_ARGS)
{
int save_errno = errno;
got_sigterm = true;
if (MyProc)
SetLatch(&MyProc->procLatch);
errno = save_errno;
}
/*
* Signal handler for SIGHUP
*/
static void
apw_sighup_handler(SIGNAL_ARGS)
{
int save_errno = errno;
got_sighup = true;
if (MyProc)
SetLatch(&MyProc->procLatch);
errno = save_errno;
}
/* contrib/pg_prewarm/pg_prewarm--1.1--1.2.sql */
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION pg_prewarm UPDATE TO '1.2'" to load this file. \quit
CREATE FUNCTION autoprewarm_start_worker()
RETURNS VOID STRICT
AS 'MODULE_PATHNAME', 'autoprewarm_start_worker'
LANGUAGE C;
CREATE FUNCTION autoprewarm_dump_now()
RETURNS pg_catalog.int8 STRICT
AS 'MODULE_PATHNAME', 'autoprewarm_dump_now'
LANGUAGE C;
# pg_prewarm extension # pg_prewarm extension
comment = 'prewarm relation data' comment = 'prewarm relation data'
default_version = '1.1' default_version = '1.2'
module_pathname = '$libdir/pg_prewarm' module_pathname = '$libdir/pg_prewarm'
relocatable = true relocatable = true
...@@ -10,7 +10,13 @@ ...@@ -10,7 +10,13 @@
<para> <para>
The <filename>pg_prewarm</filename> module provides a convenient way The <filename>pg_prewarm</filename> module provides a convenient way
to load relation data into either the operating system buffer cache to load relation data into either the operating system buffer cache
or the <productname>PostgreSQL</productname> buffer cache. or the <productname>PostgreSQL</productname> buffer cache. Prewarming
can be performed manually using the <filename>pg_prewarm</> function,
or can be performed automatically by including <literal>pg_prewarm</> in
<xref linkend="guc-shared-preload-libraries">. In the latter case, the
system will run a background worker which periodically records the contents
of shared buffers in a file called <filename>autoprewarm.blocks</> and
will, using 2 background workers, reload those same blocks after a restart.
</para> </para>
<sect2> <sect2>
...@@ -55,6 +61,67 @@ pg_prewarm(regclass, mode text default 'buffer', fork text default 'main', ...@@ -55,6 +61,67 @@ pg_prewarm(regclass, mode text default 'buffer', fork text default 'main',
cache. For these reasons, prewarming is typically most useful at startup, cache. For these reasons, prewarming is typically most useful at startup,
when caches are largely empty. when caches are largely empty.
</para> </para>
<synopsis>
autoprewarm_start_worker() RETURNS void
</synopsis>
<para>
Launch the main autoprewarm worker. This will normally happen
automatically, but is useful if automatic prewarm was not configured at
server startup time and you wish to start up the worker at a later time.
</para>
<synopsis>
autoprewarm_dump_now() RETURNS int8
</synopsis>
<para>
Update <filename>autoprewarm.blocks</> immediately. This may be useful
if the autoprewarm worker is not running but you anticipate running it
after the next restart. The return value is the number of records written
to <filename>autoprewarm.blocks</>.
</para>
</sect2>
<sect2>
<title>Configuration Parameters</title>
<variablelist>
<varlistentry>
<term>
<varname>pg_prewarm.autoprewarm</varname> (<type>boolean</type>)
<indexterm>
<primary><varname>pg_prewarm.autoprewarm</> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
Controls whether the server should run the autoprewarm worker. This is
on by default. This parameter can only be set at server start.
</para>
</listitem>
</varlistentry>
</variablelist>
<variablelist>
<varlistentry>
<term>
<varname>pg_prewarm.autoprewarm_interval</varname> (<type>int</type>)
<indexterm>
<primary><varname>pg_prewarm.autoprewarm_interval</> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
This is the interval between updates to <literal>autoprewarm.blocks</>.
The default is 300 seconds. If set to 0, the file will not be
dumped at regular intervals, but only when the server is shut down.
</para>
</listitem>
</varlistentry>
</variablelist>
</sect2> </sect2>
<sect2> <sect2>
......
...@@ -168,6 +168,23 @@ ClockSweepTick(void) ...@@ -168,6 +168,23 @@ ClockSweepTick(void)
return victim; return victim;
} }
/*
* have_free_buffer -- a lockless check to see if there is a free buffer in
* buffer pool.
*
* If the result is true that will become stale once free buffers are moved out
* by other operations, so the caller who strictly want to use a free buffer
* should not call this.
*/
bool
have_free_buffer()
{
if (StrategyControl->firstFreeBuffer >= 0)
return true;
else
return false;
}
/* /*
* StrategyGetBuffer * StrategyGetBuffer
* *
......
...@@ -317,6 +317,7 @@ extern void StrategyNotifyBgWriter(int bgwprocno); ...@@ -317,6 +317,7 @@ extern void StrategyNotifyBgWriter(int bgwprocno);
extern Size StrategyShmemSize(void); extern Size StrategyShmemSize(void);
extern void StrategyInitialize(bool init); extern void StrategyInitialize(bool init);
extern bool have_free_buffer(void);
/* buf_table.c */ /* buf_table.c */
extern Size BufTableShmemSize(int size); extern Size BufTableShmemSize(int size);
......
...@@ -138,6 +138,7 @@ AttrDefault ...@@ -138,6 +138,7 @@ AttrDefault
AttrNumber AttrNumber
AttributeOpts AttributeOpts
AuthRequest AuthRequest
AutoPrewarmSharedState
AutoVacOpts AutoVacOpts
AutoVacuumShmemStruct AutoVacuumShmemStruct
AutoVacuumWorkItem AutoVacuumWorkItem
...@@ -218,6 +219,7 @@ BlobInfo ...@@ -218,6 +219,7 @@ BlobInfo
Block Block
BlockId BlockId
BlockIdData BlockIdData
BlockInfoRecord
BlockNumber BlockNumber
BlockSampler BlockSampler
BlockSamplerData BlockSamplerData
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment