Commit 7526e102 authored by Alvaro Herrera's avatar Alvaro Herrera

BRIN auto-summarization

Previously, only VACUUM would cause a page range to get initially
summarized by BRIN indexes, which for some use cases takes too much time
since the inserts occur.  To avoid the delay, have brininsert request a
summarization run for the previous range as soon as the first tuple is
inserted into the first page of the next range.  Autovacuum is in charge
of processing these requests, after doing all the regular vacuuming/
analyzing work on tables.

This doesn't impose any new tasks on autovacuum, because autovacuum was
already in charge of doing summarizations.  The only actual effect is to
change the timing, i.e. that it occurs earlier.  For this reason, we
don't go any great lengths to record these requests very robustly; if
they are lost because of a server crash or restart, they will happen at
a later time anyway.

Most of the new code here is in autovacuum, which can now be told about
"work items" to process.  This can be used for other things such as GIN
pending list cleaning, perhaps visibility map bit setting, both of which
are currently invoked during vacuum, but do not really depend on vacuum
taking place.

The requests are at the page range level, a granularity for which we did
not have SQL-level access; we only had index-level summarization
requests via brin_summarize_new_values().  It seems reasonable to add
SQL-level access to range-level summarization too, so add a function
brin_summarize_range() to do that.

Authors: Álvaro Herrera, based on sketch from Simon Riggs.
Reviewed-by: Thomas Munro.
Discussion: https://postgr.es/m/20170301045823.vneqdqkmsd4as4ds@alvherre.pgsql
parent 7220c7b3
......@@ -74,9 +74,14 @@
tuple; those tuples remain unsummarized until a summarization run is
invoked later, creating initial summaries.
This process can be invoked manually using the
<function>brin_summarize_new_values(regclass)</function> function,
or automatically when <command>VACUUM</command> processes the table.
<function>brin_summarize_range(regclass, bigint)</function> or
<function>brin_summarize_new_values(regclass)</function> functions;
automatically when <command>VACUUM</command> processes the table;
or by automatic summarization executed by autovacuum, as insertions
occur. (This last trigger is disabled by default and can be enabled
with the <literal>autosummarize</literal> parameter.)
</para>
</sect2>
</sect1>
......
......@@ -19683,6 +19683,13 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
<entry><type>integer</type></entry>
<entry>summarize page ranges not already summarized</entry>
</row>
<row>
<entry>
<literal><function>brin_summarize_range(<parameter>index</> <type>regclass</>, <parameter>blockNumber</> <type>bigint</type>)</function></literal>
</entry>
<entry><type>integer</type></entry>
<entry>summarize the page range covering the given block, if not already summarized</entry>
</row>
<row>
<entry>
<literal><function>gin_clean_pending_list(<parameter>index</> <type>regclass</>)</function></literal>
......@@ -19700,7 +19707,8 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
that are not currently summarized by the index; for any such range
it creates a new summary index tuple by scanning the table pages.
It returns the number of new page range summaries that were inserted
into the index.
into the index. <function>brin_summarize_range</> does the same, except
it only summarizes the range that covers the given block number.
</para>
<para>
......
......@@ -382,7 +382,7 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
</variablelist>
<para>
<acronym>BRIN</> indexes accept a different parameter:
<acronym>BRIN</> indexes accept different parameters:
</para>
<variablelist>
......@@ -396,6 +396,16 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>autosummarize</></term>
<listitem>
<para>
Defines whether a summarization run is invoked for the previous page
range whenever an insertion is detected on the next one.
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect2>
......
......@@ -26,6 +26,7 @@
#include "catalog/pg_am.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "utils/builtins.h"
......@@ -60,10 +61,12 @@ typedef struct BrinOpaque
BrinDesc *bo_bdesc;
} BrinOpaque;
#define BRIN_ALL_BLOCKRANGES InvalidBlockNumber
static BrinBuildState *initialize_brin_buildstate(Relation idxRel,
BrinRevmap *revmap, BlockNumber pagesPerRange);
static void terminate_brin_buildstate(BrinBuildState *state);
static void brinsummarize(Relation index, Relation heapRel,
static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
double *numSummarized, double *numExisting);
static void form_and_insert_tuple(BrinBuildState *state);
static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a,
......@@ -126,8 +129,11 @@ brinhandler(PG_FUNCTION_ARGS)
* with those of the new tuple. If the tuple values are not consistent with
* the summary tuple, we need to update the index tuple.
*
* If autosummarization is enabled, check if we need to summarize the previous
* page range.
*
* If the range is not currently summarized (i.e. the revmap returns NULL for
* it), there's nothing to do.
* it), there's nothing to do for this tuple.
*/
bool
brininsert(Relation idxRel, Datum *values, bool *nulls,
......@@ -136,30 +142,59 @@ brininsert(Relation idxRel, Datum *values, bool *nulls,
IndexInfo *indexInfo)
{
BlockNumber pagesPerRange;
BlockNumber origHeapBlk;
BlockNumber heapBlk;
BrinDesc *bdesc = (BrinDesc *) indexInfo->ii_AmCache;
BrinRevmap *revmap;
Buffer buf = InvalidBuffer;
MemoryContext tupcxt = NULL;
MemoryContext oldcxt = CurrentMemoryContext;
bool autosummarize = BrinGetAutoSummarize(idxRel);
revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL);
/*
* origHeapBlk is the block number where the insertion occurred. heapBlk
* is the first block in the corresponding page range.
*/
origHeapBlk = ItemPointerGetBlockNumber(heaptid);
heapBlk = (origHeapBlk / pagesPerRange) * pagesPerRange;
for (;;)
{
bool need_insert = false;
OffsetNumber off;
BrinTuple *brtup;
BrinMemTuple *dtup;
BlockNumber heapBlk;
int keyno;
CHECK_FOR_INTERRUPTS();
heapBlk = ItemPointerGetBlockNumber(heaptid);
/* normalize the block number to be the first block in the range */
heapBlk = (heapBlk / pagesPerRange) * pagesPerRange;
brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL,
BUFFER_LOCK_SHARE, NULL);
/*
* If auto-summarization is enabled and we just inserted the first
* tuple into the first block of a new non-first page range, request a
* summarization run of the previous range.
*/
if (autosummarize &&
heapBlk > 0 &&
heapBlk == origHeapBlk &&
ItemPointerGetOffsetNumber(heaptid) == FirstOffsetNumber)
{
BlockNumber lastPageRange = heapBlk - 1;
BrinTuple *lastPageTuple;
lastPageTuple =
brinGetTupleForHeapBlock(revmap, lastPageRange, &buf, &off,
NULL, BUFFER_LOCK_SHARE, NULL);
if (!lastPageTuple)
AutoVacuumRequestWork(AVW_BRINSummarizeRange,
RelationGetRelid(idxRel),
lastPageRange);
brin_free_tuple(lastPageTuple);
}
brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off,
NULL, BUFFER_LOCK_SHARE, NULL);
/* if range is unsummarized, there's nothing to do */
if (!brtup)
......@@ -747,7 +782,7 @@ brinvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
brin_vacuum_scan(info->index, info->strategy);
brinsummarize(info->index, heapRel,
brinsummarize(info->index, heapRel, BRIN_ALL_BLOCKRANGES,
&stats->num_index_tuples, &stats->num_index_tuples);
heap_close(heapRel, AccessShareLock);
......@@ -765,7 +800,8 @@ brinoptions(Datum reloptions, bool validate)
BrinOptions *rdopts;
int numoptions;
static const relopt_parse_elt tab[] = {
{"pages_per_range", RELOPT_TYPE_INT, offsetof(BrinOptions, pagesPerRange)}
{"pages_per_range", RELOPT_TYPE_INT, offsetof(BrinOptions, pagesPerRange)},
{"autosummarize", RELOPT_TYPE_BOOL, offsetof(BrinOptions, autosummarize)}
};
options = parseRelOptions(reloptions, validate, RELOPT_KIND_BRIN,
......@@ -791,13 +827,40 @@ brinoptions(Datum reloptions, bool validate)
*/
Datum
brin_summarize_new_values(PG_FUNCTION_ARGS)
{
Datum relation = PG_GETARG_DATUM(0);
return DirectFunctionCall2(brin_summarize_range,
relation,
Int64GetDatum((int64) BRIN_ALL_BLOCKRANGES));
}
/*
* SQL-callable function to summarize the indicated page range, if not already
* summarized. If the second argument is BRIN_ALL_BLOCKRANGES, all
* unsummarized ranges are summarized.
*/
Datum
brin_summarize_range(PG_FUNCTION_ARGS)
{
Oid indexoid = PG_GETARG_OID(0);
int64 heapBlk64 = PG_GETARG_INT64(1);
BlockNumber heapBlk;
Oid heapoid;
Relation indexRel;
Relation heapRel;
double numSummarized = 0;
if (heapBlk64 > BRIN_ALL_BLOCKRANGES || heapBlk64 < 0)
{
char *blk = psprintf(INT64_FORMAT, heapBlk64);
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("block number out of range: %s", blk)));
}
heapBlk = (BlockNumber) heapBlk64;
/*
* We must lock table before index to avoid deadlocks. However, if the
* passed indexoid isn't an index then IndexGetRelation() will fail.
......@@ -837,7 +900,7 @@ brin_summarize_new_values(PG_FUNCTION_ARGS)
RelationGetRelationName(indexRel))));
/* OK, do it */
brinsummarize(indexRel, heapRel, &numSummarized, NULL);
brinsummarize(indexRel, heapRel, heapBlk, &numSummarized, NULL);
relation_close(indexRel, ShareUpdateExclusiveLock);
relation_close(heapRel, ShareUpdateExclusiveLock);
......@@ -1063,17 +1126,17 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
}
/*
* Scan a complete BRIN index, and summarize each page range that's not already
* summarized. The index and heap must have been locked by caller in at
* least ShareUpdateExclusiveLock mode.
* Summarize page ranges that are not already summarized. If pageRange is
* BRIN_ALL_BLOCKRANGES then the whole table is scanned; otherwise, only the
* page range containing the given heap page number is scanned.
*
* For each new index tuple inserted, *numSummarized (if not NULL) is
* incremented; for each existing tuple, *numExisting (if not NULL) is
* incremented.
*/
static void
brinsummarize(Relation index, Relation heapRel, double *numSummarized,
double *numExisting)
brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
double *numSummarized, double *numExisting)
{
BrinRevmap *revmap;
BrinBuildState *state = NULL;
......@@ -1082,15 +1145,40 @@ brinsummarize(Relation index, Relation heapRel, double *numSummarized,
BlockNumber heapBlk;
BlockNumber pagesPerRange;
Buffer buf;
BlockNumber startBlk;
BlockNumber endBlk;
/* determine range of pages to process; nothing to do for an empty table */
heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
if (heapNumBlocks == 0)
return;
revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
if (pageRange == BRIN_ALL_BLOCKRANGES)
{
startBlk = 0;
endBlk = heapNumBlocks;
}
else
{
startBlk = (pageRange / pagesPerRange) * pagesPerRange;
/* Nothing to do if start point is beyond end of table */
if (startBlk > heapNumBlocks)
{
brinRevmapTerminate(revmap);
return;
}
endBlk = startBlk + pagesPerRange;
if (endBlk > heapNumBlocks)
endBlk = heapNumBlocks;
}
/*
* Scan the revmap to find unsummarized items.
*/
buf = InvalidBuffer;
heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
for (heapBlk = 0; heapBlk < heapNumBlocks; heapBlk += pagesPerRange)
for (heapBlk = startBlk; heapBlk < endBlk; heapBlk += pagesPerRange)
{
BrinTuple *tup;
OffsetNumber off;
......
......@@ -205,7 +205,11 @@ brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk,
/* normalize the heap block number to be the first page in the range */
heapBlk = (heapBlk / revmap->rm_pagesPerRange) * revmap->rm_pagesPerRange;
/* Compute the revmap page number we need */
/*
* Compute the revmap page number we need. If Invalid is returned (i.e.,
* the revmap page hasn't been created yet), the requested page range is
* not summarized.
*/
mapBlk = revmap_get_blkno(revmap, heapBlk);
if (mapBlk == InvalidBlockNumber)
{
......
......@@ -92,6 +92,15 @@
static relopt_bool boolRelOpts[] =
{
{
{
"autosummarize",
"Enables automatic summarization on this BRIN index",
RELOPT_KIND_BRIN,
AccessExclusiveLock
},
false
},
{
{
"autovacuum_enabled",
......
This diff is collapsed.
......@@ -22,6 +22,7 @@ typedef struct BrinOptions
{
int32 vl_len_; /* varlena header (do not touch directly!) */
BlockNumber pagesPerRange;
bool autosummarize;
} BrinOptions;
#define BRIN_DEFAULT_PAGES_PER_RANGE 128
......@@ -29,5 +30,9 @@ typedef struct BrinOptions
((relation)->rd_options ? \
((BrinOptions *) (relation)->rd_options)->pagesPerRange : \
BRIN_DEFAULT_PAGES_PER_RANGE)
#define BrinGetAutoSummarize(relation) \
((relation)->rd_options ? \
((BrinOptions *) (relation)->rd_options)->autosummarize : \
false)
#endif /* BRIN_H */
......@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201703312
#define CATALOG_VERSION_NO 201704011
#endif
......@@ -564,6 +564,8 @@ DATA(insert OID = 335 ( brinhandler PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0
DESCR("brin index access method handler");
DATA(insert OID = 3952 ( brin_summarize_new_values PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 23 "2205" _null_ _null_ _null_ _null_ _null_ brin_summarize_new_values _null_ _null_ _null_ ));
DESCR("brin: standalone scan new table pages");
DATA(insert OID = 3999 ( brin_summarize_range PGNSP PGUID 12 1 0 0 0 f f f f t f v s 2 0 23 "2205 20" _null_ _null_ _null_ _null_ _null_ brin_summarize_range _null_ _null_ _null_ ));
DESCR("brin: standalone scan new table pages");
DATA(insert OID = 338 ( amvalidate PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 16 "26" _null_ _null_ _null_ _null_ _null_ amvalidate _null_ _null_ _null_ ));
DESCR("validate an operator class");
......
......@@ -14,6 +14,15 @@
#ifndef AUTOVACUUM_H
#define AUTOVACUUM_H
/*
* Other processes can request specific work from autovacuum, identified by
* AutoVacuumWorkItem elements.
*/
typedef enum
{
AVW_BRINSummarizeRange
} AutoVacuumWorkItemType;
/* GUC variables */
extern bool autovacuum_start_daemon;
......@@ -60,6 +69,9 @@ extern void AutovacuumWorkerIAm(void);
extern void AutovacuumLauncherIAm(void);
#endif
extern void AutoVacuumRequestWork(AutoVacuumWorkItemType type,
Oid relationId, BlockNumber blkno);
/* shared memory stuff */
extern Size AutoVacuumShmemSize(void);
extern void AutoVacuumShmemInit(void);
......
......@@ -406,3 +406,51 @@ SELECT brin_summarize_new_values('brinidx'); -- ok, no change expected
0
(1 row)
-- Test brin_summarize_range
CREATE TABLE brin_summarize (
value int
) WITH (fillfactor=10, autovacuum_enabled=false);
CREATE INDEX brin_summarize_idx ON brin_summarize USING brin (value) WITH (pages_per_range=2);
-- Fill a few pages
DO $$
DECLARE curtid tid;
BEGIN
LOOP
INSERT INTO brin_summarize VALUES (1) RETURNING ctid INTO curtid;
EXIT WHEN curtid > tid '(2, 0)';
END LOOP;
END;
$$;
-- summarize one range
SELECT brin_summarize_range('brin_summarize_idx', 0);
brin_summarize_range
----------------------
1
(1 row)
-- nothing: already summarized
SELECT brin_summarize_range('brin_summarize_idx', 1);
brin_summarize_range
----------------------
0
(1 row)
-- summarize one range
SELECT brin_summarize_range('brin_summarize_idx', 2);
brin_summarize_range
----------------------
1
(1 row)
-- nothing: page doesn't exist in table
SELECT brin_summarize_range('brin_summarize_idx', 4294967295);
brin_summarize_range
----------------------
0
(1 row)
-- invalid block number values
SELECT brin_summarize_range('brin_summarize_idx', -1);
ERROR: block number out of range: -1
SELECT brin_summarize_range('brin_summarize_idx', 4294967296);
ERROR: block number out of range: 4294967296
......@@ -409,3 +409,31 @@ UPDATE brintest SET textcol = '' WHERE textcol IS NOT NULL;
SELECT brin_summarize_new_values('brintest'); -- error, not an index
SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index
SELECT brin_summarize_new_values('brinidx'); -- ok, no change expected
-- Test brin_summarize_range
CREATE TABLE brin_summarize (
value int
) WITH (fillfactor=10, autovacuum_enabled=false);
CREATE INDEX brin_summarize_idx ON brin_summarize USING brin (value) WITH (pages_per_range=2);
-- Fill a few pages
DO $$
DECLARE curtid tid;
BEGIN
LOOP
INSERT INTO brin_summarize VALUES (1) RETURNING ctid INTO curtid;
EXIT WHEN curtid > tid '(2, 0)';
END LOOP;
END;
$$;
-- summarize one range
SELECT brin_summarize_range('brin_summarize_idx', 0);
-- nothing: already summarized
SELECT brin_summarize_range('brin_summarize_idx', 1);
-- summarize one range
SELECT brin_summarize_range('brin_summarize_idx', 2);
-- nothing: page doesn't exist in table
SELECT brin_summarize_range('brin_summarize_idx', 4294967295);
-- invalid block number values
SELECT brin_summarize_range('brin_summarize_idx', -1);
SELECT brin_summarize_range('brin_summarize_idx', 4294967296);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment