Commit b09ff536 authored by Thomas Munro's avatar Thomas Munro

Simplify the effective_io_concurrency setting.

The effective_io_concurrency GUC and equivalent tablespace option were
previously passed through a formula based on a theory about RAID
spindles and probabilities, to arrive at the number of pages to prefetch
in bitmap heap scans.  Tomas Vondra, Andres Freund and others argued
that it was anachronistic and hard to justify, and commit 558a9165
already started down the path of bypassing it in new code.  We agreed to
drop that logic and use the value directly.

For the default setting of 1, there is no change in effect.  Higher
settings can be converted from the old meaning to the new with:

  select round(sum(OLD / n::float)) from generate_series(1, OLD) s(n);

We might want to consider renaming the GUC before the next release given
the change in meaning, but it's not clear that many users had set it
very carefully anyway.  That decision is deferred for now.

Discussion: https://postgr.es/m/CA%2BhUKGJUw08dPs_3EUcdO6M90GnjofPYrWp4YSLaBkgYwS-AqA%40mail.gmail.com
parent f207bb0b
...@@ -707,7 +707,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) ...@@ -707,7 +707,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
{ {
BitmapHeapScanState *scanstate; BitmapHeapScanState *scanstate;
Relation currentRelation; Relation currentRelation;
int io_concurrency;
/* check for unsupported flags */ /* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
...@@ -737,8 +736,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) ...@@ -737,8 +736,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
scanstate->prefetch_iterator = NULL; scanstate->prefetch_iterator = NULL;
scanstate->prefetch_pages = 0; scanstate->prefetch_pages = 0;
scanstate->prefetch_target = 0; scanstate->prefetch_target = 0;
/* may be updated below */
scanstate->prefetch_maximum = target_prefetch_pages;
scanstate->pscan_len = 0; scanstate->pscan_len = 0;
scanstate->initialized = false; scanstate->initialized = false;
scanstate->shared_tbmiterator = NULL; scanstate->shared_tbmiterator = NULL;
...@@ -794,20 +791,11 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) ...@@ -794,20 +791,11 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate); ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
/* /*
* Determine the maximum for prefetch_target. If the tablespace has a * Maximum number of prefetches for the tablespace if configured, otherwise
* specific IO concurrency set, use that to compute the corresponding * the current value of the effective_io_concurrency GUC.
* maximum value; otherwise, we already initialized to the value computed
* by the GUC machinery.
*/ */
io_concurrency = scanstate->prefetch_maximum =
get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace); get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
if (io_concurrency != effective_io_concurrency)
{
double maximum;
if (ComputeIoConcurrency(io_concurrency, &maximum))
scanstate->prefetch_maximum = rint(maximum);
}
scanstate->ss.ss_currentRelation = currentRelation; scanstate->ss.ss_currentRelation = currentRelation;
......
...@@ -110,6 +110,13 @@ bool zero_damaged_pages = false; ...@@ -110,6 +110,13 @@ bool zero_damaged_pages = false;
int bgwriter_lru_maxpages = 100; int bgwriter_lru_maxpages = 100;
double bgwriter_lru_multiplier = 2.0; double bgwriter_lru_multiplier = 2.0;
bool track_io_timing = false; bool track_io_timing = false;
/*
* How many buffers PrefetchBuffer callers should try to stay ahead of their
* ReadBuffer calls by. Zero means "never prefetch". This value is only used
* for buffers not belonging to tablespaces that have their
* effective_io_concurrency parameter set.
*/
int effective_io_concurrency = 0; int effective_io_concurrency = 0;
/* /*
...@@ -120,15 +127,6 @@ int checkpoint_flush_after = 0; ...@@ -120,15 +127,6 @@ int checkpoint_flush_after = 0;
int bgwriter_flush_after = 0; int bgwriter_flush_after = 0;
int backend_flush_after = 0; int backend_flush_after = 0;
/*
* How many buffers PrefetchBuffer callers should try to stay ahead of their
* ReadBuffer calls by. This is maintained by the assign hook for
* effective_io_concurrency. Zero means "never prefetch". This value is
* only used for buffers not belonging to tablespaces that have their
* effective_io_concurrency parameter set.
*/
int target_prefetch_pages = 0;
/* local state for StartBufferIO and related functions */ /* local state for StartBufferIO and related functions */
static BufferDesc *InProgressBuf = NULL; static BufferDesc *InProgressBuf = NULL;
static bool IsForInput; static bool IsForInput;
...@@ -461,64 +459,6 @@ static int ckpt_buforder_comparator(const void *pa, const void *pb); ...@@ -461,64 +459,6 @@ static int ckpt_buforder_comparator(const void *pa, const void *pb);
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg); static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg);
/*
* ComputeIoConcurrency -- get the number of pages to prefetch for a given
* number of spindles.
*/
bool
ComputeIoConcurrency(int io_concurrency, double *target)
{
double new_prefetch_pages = 0.0;
int i;
/*
* Make sure the io_concurrency value is within valid range; it may have
* been forced with a manual pg_tablespace update.
*/
io_concurrency = Min(Max(io_concurrency, 0), MAX_IO_CONCURRENCY);
/*----------
* The user-visible GUC parameter is the number of drives (spindles),
* which we need to translate to a number-of-pages-to-prefetch target.
* The target value is stashed in *extra and then assigned to the actual
* variable by assign_effective_io_concurrency.
*
* The expected number of prefetch pages needed to keep N drives busy is:
*
* drives | I/O requests
* -------+----------------
* 1 | 1
* 2 | 2/1 + 2/2 = 3
* 3 | 3/1 + 3/2 + 3/3 = 5 1/2
* 4 | 4/1 + 4/2 + 4/3 + 4/4 = 8 1/3
* n | n * H(n)
*
* This is called the "coupon collector problem" and H(n) is called the
* harmonic series. This could be approximated by n * ln(n), but for
* reasonable numbers of drives we might as well just compute the series.
*
* Alternatively we could set the target to the number of pages necessary
* so that the expected number of active spindles is some arbitrary
* percentage of the total. This sounds the same but is actually slightly
* different. The result ends up being ln(1-P)/ln((n-1)/n) where P is
* that desired fraction.
*
* Experimental results show that both of these formulas aren't aggressive
* enough, but we don't really have any better proposals.
*
* Note that if io_concurrency = 0 (disabled), we must set target = 0.
*----------
*/
for (i = 1; i <= io_concurrency; i++)
new_prefetch_pages += (double) io_concurrency / (double) i;
*target = new_prefetch_pages;
/* This range check shouldn't fail, but let's be paranoid */
return (new_prefetch_pages >= 0.0 && new_prefetch_pages < (double) INT_MAX);
}
/* /*
* PrefetchBuffer -- initiate asynchronous read of a block of a relation * PrefetchBuffer -- initiate asynchronous read of a block of a relation
* *
......
...@@ -196,7 +196,6 @@ static bool check_autovacuum_max_workers(int *newval, void **extra, GucSource so ...@@ -196,7 +196,6 @@ static bool check_autovacuum_max_workers(int *newval, void **extra, GucSource so
static bool check_max_wal_senders(int *newval, void **extra, GucSource source); static bool check_max_wal_senders(int *newval, void **extra, GucSource source);
static bool check_autovacuum_work_mem(int *newval, void **extra, GucSource source); static bool check_autovacuum_work_mem(int *newval, void **extra, GucSource source);
static bool check_effective_io_concurrency(int *newval, void **extra, GucSource source); static bool check_effective_io_concurrency(int *newval, void **extra, GucSource source);
static void assign_effective_io_concurrency(int newval, void *extra);
static void assign_pgstat_temp_directory(const char *newval, void *extra); static void assign_pgstat_temp_directory(const char *newval, void *extra);
static bool check_application_name(char **newval, void **extra, GucSource source); static bool check_application_name(char **newval, void **extra, GucSource source);
static void assign_application_name(const char *newval, void *extra); static void assign_application_name(const char *newval, void *extra);
...@@ -2882,7 +2881,7 @@ static struct config_int ConfigureNamesInt[] = ...@@ -2882,7 +2881,7 @@ static struct config_int ConfigureNamesInt[] =
0, 0,
#endif #endif
0, MAX_IO_CONCURRENCY, 0, MAX_IO_CONCURRENCY,
check_effective_io_concurrency, assign_effective_io_concurrency, NULL check_effective_io_concurrency, NULL, NULL
}, },
{ {
...@@ -11457,36 +11456,14 @@ check_max_worker_processes(int *newval, void **extra, GucSource source) ...@@ -11457,36 +11456,14 @@ check_max_worker_processes(int *newval, void **extra, GucSource source)
static bool static bool
check_effective_io_concurrency(int *newval, void **extra, GucSource source) check_effective_io_concurrency(int *newval, void **extra, GucSource source)
{ {
#ifdef USE_PREFETCH #ifndef USE_PREFETCH
double new_prefetch_pages;
if (ComputeIoConcurrency(*newval, &new_prefetch_pages))
{
int *myextra = (int *) guc_malloc(ERROR, sizeof(int));
*myextra = (int) rint(new_prefetch_pages);
*extra = (void *) myextra;
return true;
}
else
return false;
#else
if (*newval != 0) if (*newval != 0)
{ {
GUC_check_errdetail("effective_io_concurrency must be set to 0 on platforms that lack posix_fadvise()."); GUC_check_errdetail("effective_io_concurrency must be set to 0 on platforms that lack posix_fadvise().");
return false; return false;
} }
return true;
#endif /* USE_PREFETCH */
}
static void
assign_effective_io_concurrency(int newval, void *extra)
{
#ifdef USE_PREFETCH
target_prefetch_pages = *((int *) extra);
#endif /* USE_PREFETCH */ #endif /* USE_PREFETCH */
return true;
} }
static void static void
......
...@@ -57,7 +57,7 @@ extern bool zero_damaged_pages; ...@@ -57,7 +57,7 @@ extern bool zero_damaged_pages;
extern int bgwriter_lru_maxpages; extern int bgwriter_lru_maxpages;
extern double bgwriter_lru_multiplier; extern double bgwriter_lru_multiplier;
extern bool track_io_timing; extern bool track_io_timing;
extern int target_prefetch_pages; extern int effective_io_concurrency;
extern int checkpoint_flush_after; extern int checkpoint_flush_after;
extern int backend_flush_after; extern int backend_flush_after;
...@@ -66,9 +66,6 @@ extern int bgwriter_flush_after; ...@@ -66,9 +66,6 @@ extern int bgwriter_flush_after;
/* in buf_init.c */ /* in buf_init.c */
extern PGDLLIMPORT char *BufferBlocks; extern PGDLLIMPORT char *BufferBlocks;
/* in guc.c */
extern int effective_io_concurrency;
/* in localbuf.c */ /* in localbuf.c */
extern PGDLLIMPORT int NLocBuffer; extern PGDLLIMPORT int NLocBuffer;
extern PGDLLIMPORT Block *LocalBufferBlockPointers; extern PGDLLIMPORT Block *LocalBufferBlockPointers;
...@@ -161,7 +158,6 @@ extern PGDLLIMPORT int32 *LocalRefCount; ...@@ -161,7 +158,6 @@ extern PGDLLIMPORT int32 *LocalRefCount;
/* /*
* prototypes for functions in bufmgr.c * prototypes for functions in bufmgr.c
*/ */
extern bool ComputeIoConcurrency(int io_concurrency, double *target);
extern void PrefetchBuffer(Relation reln, ForkNumber forkNum, extern void PrefetchBuffer(Relation reln, ForkNumber forkNum,
BlockNumber blockNum); BlockNumber blockNum);
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum); extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment