Commit 2a96909a authored by Andres Freund's avatar Andres Freund

tableam: Support for an index build's initial table scan(s).

To support building indexes over tables of different AMs, the scans to
do so need to be routed through the table AM.  While moving a fair
amount of code, nearly all the changes are just moving code to below a
callback.

Currently the range based interface wouldn't make much sense for non
block based table AMs. But that seems aceptable for now.

Author: Andres Freund
Discussion: https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
parent 12bb35fc
......@@ -23,9 +23,9 @@
*/
#include "postgres.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/nbtree.h"
#include "access/table.h"
#include "access/tableam.h"
#include "access/transam.h"
#include "access/xact.h"
......@@ -142,7 +142,7 @@ static void bt_tuple_present_callback(Relation index, HeapTuple htup,
Datum *values, bool *isnull,
bool tupleIsAlive, void *checkstate);
static IndexTuple bt_normalize_tuple(BtreeCheckState *state,
IndexTuple itup);
IndexTuple itup);
static bool bt_rootdescend(BtreeCheckState *state, IndexTuple itup);
static inline bool offset_is_negative_infinity(BTPageOpaque opaque,
OffsetNumber offset);
......@@ -387,10 +387,10 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
/*
* Register our own snapshot in !readonly case, rather than asking
* IndexBuildHeapScan() to do this for us later. This needs to happen
* before index fingerprinting begins, so we can later be certain that
* index fingerprinting should have reached all tuples returned by
* IndexBuildHeapScan().
* table_index_build_scan() to do this for us later. This needs to
* happen before index fingerprinting begins, so we can later be
* certain that index fingerprinting should have reached all tuples
* returned by table_index_build_scan().
*
* In readonly case, we also check for problems with missing
* downlinks. A second Bloom filter is used for this.
......@@ -525,18 +525,19 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
}
/*
* Create our own scan for IndexBuildHeapScan(), rather than getting
* it to do so for us. This is required so that we can actually use
* the MVCC snapshot registered earlier in !readonly case.
* Create our own scan for table_index_build_scan(), rather than
* getting it to do so for us. This is required so that we can
* actually use the MVCC snapshot registered earlier in !readonly
* case.
*
* Note that IndexBuildHeapScan() calls heap_endscan() for us.
* Note that table_index_build_scan() calls heap_endscan() for us.
*/
scan = table_beginscan_strat(state->heaprel, /* relation */
scan = table_beginscan_strat(state->heaprel, /* relation */
snapshot, /* snapshot */
0, /* number of keys */
0, /* number of keys */
NULL, /* scan key */
true, /* buffer access strategy OK */
true); /* syncscan OK? */
true); /* syncscan OK? */
/*
* Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY
......@@ -565,8 +566,8 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
RelationGetRelationName(state->rel),
RelationGetRelationName(state->heaprel));
IndexBuildHeapScan(state->heaprel, state->rel, indexinfo, true,
bt_tuple_present_callback, (void *) state, scan);
table_index_build_scan(state->heaprel, state->rel, indexinfo, true,
bt_tuple_present_callback, (void *) state, scan);
ereport(DEBUG1,
(errmsg_internal("finished verifying presence of " INT64_FORMAT " tuples from table \"%s\" with bitset %.2f%% set",
......@@ -814,7 +815,7 @@ nextpage:
* (Limited to heapallindexed readonly callers.)
*
* This is also where heapallindexed callers use their Bloom filter to
* fingerprint IndexTuples for later IndexBuildHeapScan() verification.
* fingerprint IndexTuples for later table_index_build_scan() verification.
*
* Note: Memory allocated in this routine is expected to be released by caller
* resetting state->targetcontext.
......@@ -1776,7 +1777,7 @@ bt_downlink_missing_check(BtreeCheckState *state)
}
/*
* Per-tuple callback from IndexBuildHeapScan, used to determine if index has
* Per-tuple callback from table_index_build_scan, used to determine if index has
* all the entries that definitely should have been observed in leaf pages of
* the target index (that is, all IndexTuples that were fingerprinted by our
* Bloom filter). All heapallindexed checks occur here.
......@@ -1801,7 +1802,7 @@ bt_downlink_missing_check(BtreeCheckState *state)
* verification, just in case it's a cross-page invariant issue, though that
* isn't particularly likely.
*
* IndexBuildHeapScan() expects to be able to find the root tuple when a
* table_index_build_scan() expects to be able to find the root tuple when a
* heap-only tuple (the live tuple at the end of some HOT chain) needs to be
* indexed, in order to replace the actual tuple's TID with the root tuple's
* TID (which is what we're actually passed back here). The index build heap
......@@ -1817,7 +1818,7 @@ bt_downlink_missing_check(BtreeCheckState *state)
* setting will probably also leave the index in a corrupt state before too
* long, the problem is nonetheless that there is heap corruption.)
*
* Heap-only tuple handling within IndexBuildHeapScan() works in a way that
* Heap-only tuple handling within table_index_build_scan() works in a way that
* helps us to detect index tuples that contain the wrong values (values that
* don't match the latest tuple in the HOT chain). This can happen when there
* is no superseding index tuple due to a faulty assessment of HOT safety,
......
......@@ -14,6 +14,7 @@
#include "access/genam.h"
#include "access/generic_xlog.h"
#include "access/tableam.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
......@@ -69,7 +70,7 @@ initCachedPage(BloomBuildState *buildstate)
}
/*
* Per-tuple callback from IndexBuildHeapScan.
* Per-tuple callback for table_index_build_scan.
*/
static void
bloomBuildCallback(Relation index, HeapTuple htup, Datum *values,
......@@ -141,9 +142,9 @@ blbuild(Relation heap, Relation index, IndexInfo *indexInfo)
initCachedPage(&buildstate);
/* Do the heap scan */
reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
bloomBuildCallback, (void *) &buildstate,
NULL);
reltuples = table_index_build_scan(heap, index, indexInfo, true,
bloomBuildCallback, (void *) &buildstate,
NULL);
/* Flush last page if needed (it will be, unless heap was empty) */
if (buildstate.count > 0)
......
......@@ -238,7 +238,7 @@ ambuild (Relation heapRelation,
but is empty. It must be filled in with whatever fixed data the
access method requires, plus entries for all tuples already existing
in the table. Ordinarily the <function>ambuild</function> function will call
<function>IndexBuildHeapScan()</function> to scan the table for existing tuples
<function>table_index_build_scan()</function> to scan the table for existing tuples
and compute the keys that need to be inserted into the index.
The function must return a palloc'd struct containing statistics about
the new index.
......
......@@ -23,6 +23,7 @@
#include "access/reloptions.h"
#include "access/relscan.h"
#include "access/table.h"
#include "access/tableam.h"
#include "access/xloginsert.h"
#include "catalog/index.h"
#include "catalog/pg_am.h"
......@@ -587,7 +588,7 @@ brinendscan(IndexScanDesc scan)
}
/*
* Per-heap-tuple callback for IndexBuildHeapScan.
* Per-heap-tuple callback for table_index_build_scan.
*
* Note we don't worry about the page range at the end of the table here; it is
* present in the build state struct after we're called the last time, but not
......@@ -718,8 +719,8 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
* Now scan the relation. No syncscan allowed here because we want the
* heap blocks in physical order.
*/
reltuples = IndexBuildHeapScan(heap, index, indexInfo, false,
brinbuildCallback, (void *) state, NULL);
reltuples = table_index_build_scan(heap, index, indexInfo, false,
brinbuildCallback, (void *) state, NULL);
/* process the final batch */
form_and_insert_tuple(state);
......@@ -1230,13 +1231,14 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
* short of brinbuildCallback creating the new index entry.
*
* Note that it is critical we use the "any visible" mode of
* IndexBuildHeapRangeScan here: otherwise, we would miss tuples inserted
* by transactions that are still in progress, among other corner cases.
* table_index_build_range_scan here: otherwise, we would miss tuples
* inserted by transactions that are still in progress, among other corner
* cases.
*/
state->bs_currRangeStart = heapBlk;
IndexBuildHeapRangeScan(heapRel, state->bs_irel, indexInfo, false, true,
heapBlk, scanNumBlks,
brinbuildCallback, (void *) state, NULL);
table_index_build_range_scan(heapRel, state->bs_irel, indexInfo, false, true,
heapBlk, scanNumBlks,
brinbuildCallback, (void *) state, NULL);
/*
* Now we update the values obtained by the scan with the placeholder
......
......@@ -17,6 +17,7 @@
#include "access/gin_private.h"
#include "access/ginxlog.h"
#include "access/xloginsert.h"
#include "access/tableam.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
......@@ -394,8 +395,9 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
* Do the heap scan. We disallow sync scan here because dataPlaceToPage
* prefers to receive tuples in TID order.
*/
reltuples = IndexBuildHeapScan(heap, index, indexInfo, false,
ginBuildCallback, (void *) &buildstate, NULL);
reltuples = table_index_build_scan(heap, index, indexInfo, false,
ginBuildCallback, (void *) &buildstate,
NULL);
/* dump remaining entries to the index */
oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
......
......@@ -19,6 +19,7 @@
#include "access/genam.h"
#include "access/gist_private.h"
#include "access/gistxlog.h"
#include "access/tableam.h"
#include "access/xloginsert.h"
#include "catalog/index.h"
#include "miscadmin.h"
......@@ -204,8 +205,9 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
/*
* Do the heap scan.
*/
reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
gistBuildCallback, (void *) &buildstate, NULL);
reltuples = table_index_build_scan(heap, index, indexInfo, true,
gistBuildCallback,
(void *) &buildstate, NULL);
/*
* If buffering was used, flush out all the tuples that are still in the
......@@ -454,7 +456,7 @@ calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep)
}
/*
* Per-tuple callback from IndexBuildHeapScan.
* Per-tuple callback for table_index_build_scan.
*/
static void
gistBuildCallback(Relation index,
......
......@@ -21,6 +21,7 @@
#include "access/hash.h"
#include "access/hash_xlog.h"
#include "access/relscan.h"
#include "access/tableam.h"
#include "catalog/index.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
......@@ -159,8 +160,9 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
buildstate.heapRel = heap;
/* do the heap scan */
reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
hashbuildCallback, (void *) &buildstate, NULL);
reltuples = table_index_build_scan(heap, index, indexInfo, true,
hashbuildCallback,
(void *) &buildstate, NULL);
if (buildstate.spool)
{
......@@ -190,7 +192,7 @@ hashbuildempty(Relation index)
}
/*
* Per-tuple callback from IndexBuildHeapScan
* Per-tuple callback for table_index_build_scan
*/
static void
hashbuildCallback(Relation index,
......
This diff is collapsed.
......@@ -480,9 +480,9 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate,
/* Fill spool using either serial or parallel heap scan */
if (!buildstate->btleader)
reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
_bt_build_callback, (void *) buildstate,
NULL);
reltuples = table_index_build_scan(heap, index, indexInfo, true,
_bt_build_callback, (void *) buildstate,
NULL);
else
reltuples = _bt_parallel_heapscan(buildstate,
&indexInfo->ii_BrokenHotChain);
......@@ -558,7 +558,7 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
}
/*
* Per-tuple callback from IndexBuildHeapScan
* Per-tuple callback for table_index_build_scan
*/
static void
_bt_build_callback(Relation index,
......@@ -1705,11 +1705,10 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2,
/* Join parallel scan */
indexInfo = BuildIndexInfo(btspool->index);
indexInfo->ii_Concurrent = btshared->isconcurrent;
scan = table_beginscan_parallel(btspool->heap,
ParallelTableScanFromBTShared(btshared));
reltuples = IndexBuildHeapScan(btspool->heap, btspool->index, indexInfo,
true, _bt_build_callback,
(void *) &buildstate, scan);
scan = table_beginscan_parallel(btspool->heap, ParallelTableScanFromBTShared(btshared));
reltuples = table_index_build_scan(btspool->heap, btspool->index, indexInfo,
true, _bt_build_callback,
(void *) &buildstate, scan);
/*
* Execute this worker's part of the sort.
......
......@@ -19,6 +19,7 @@
#include "access/genam.h"
#include "access/spgist_private.h"
#include "access/spgxlog.h"
#include "access/tableam.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "catalog/index.h"
......@@ -37,7 +38,7 @@ typedef struct
} SpGistBuildState;
/* Callback to process one heap tuple during IndexBuildHeapScan */
/* Callback to process one heap tuple during table_index_build_scan */
static void
spgistBuildCallback(Relation index, HeapTuple htup, Datum *values,
bool *isnull, bool tupleIsAlive, void *state)
......@@ -142,9 +143,9 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
"SP-GiST build temporary context",
ALLOCSET_DEFAULT_SIZES);
reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
spgistBuildCallback, (void *) &buildstate,
NULL);
reltuples = table_index_build_scan(heap, index, indexInfo, true,
spgistBuildCallback, (void *) &buildstate,
NULL);
MemoryContextDelete(buildstate.tmpCtx);
......
This diff is collapsed.
......@@ -28,6 +28,9 @@ extern bool synchronize_seqscans;
struct BulkInsertStateData;
struct IndexInfo;
struct IndexBuildCallback;
struct ValidateIndexState;
/*
......@@ -106,6 +109,14 @@ typedef struct TM_FailureData
#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1)
/* Typedef for callback function for table_index_build_scan */
typedef void (*IndexBuildCallback) (Relation index,
HeapTuple htup,
Datum *values,
bool *isnull,
bool tupleIsAlive,
void *state);
/*
* API struct for a table AM. Note this must be allocated in a
* server-lifetime manner, typically as a static const struct, which then gets
......@@ -361,6 +372,31 @@ typedef struct TableAmRoutine
uint8 flags,
TM_FailureData *tmfd);
/* ------------------------------------------------------------------------
* DDL related functionality.
* ------------------------------------------------------------------------
*/
/* see table_index_build_range_scan for reference about parameters */
double (*index_build_range_scan) (Relation heap_rel,
Relation index_rel,
struct IndexInfo *index_nfo,
bool allow_sync,
bool anyvisible,
BlockNumber start_blockno,
BlockNumber end_blockno,
IndexBuildCallback callback,
void *callback_state,
TableScanDesc scan);
/* see table_index_validate_scan for reference about parameters */
void (*index_validate_scan) (Relation heap_rel,
Relation index_rel,
struct IndexInfo *index_info,
Snapshot snapshot,
struct ValidateIndexState *state);
} TableAmRoutine;
......@@ -920,6 +956,111 @@ table_lock_tuple(Relation rel, ItemPointer tid, Snapshot snapshot,
}
/* ------------------------------------------------------------------------
* DDL related functionality.
* ------------------------------------------------------------------------
*/
/*
* table_index_build_range_scan - scan the table to find tuples to be indexed
*
* This is called back from an access-method-specific index build procedure
* after the AM has done whatever setup it needs. The parent heap relation
* is scanned to find tuples that should be entered into the index. Each
* such tuple is passed to the AM's callback routine, which does the right
* things to add it to the new index. After we return, the AM's index
* build procedure does whatever cleanup it needs.
*
* The total count of live tuples is returned. This is for updating pg_class
* statistics. (It's annoying not to be able to do that here, but we want to
* merge that update with others; see index_update_stats.) Note that the
* index AM itself must keep track of the number of index tuples; we don't do
* so here because the AM might reject some of the tuples for its own reasons,
* such as being unable to store NULLs.
*
*
* A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
* any potentially broken HOT chains. Currently, we set this if there are any
* RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without trying
* very hard to detect whether they're really incompatible with the chain tip.
* This only really makes sense for heap AM, it might need to be generalized
* for other AMs later.
*/
static inline double
table_index_build_scan(Relation heap_rel,
Relation index_rel,
struct IndexInfo *index_nfo,
bool allow_sync,
IndexBuildCallback callback,
void *callback_state,
TableScanDesc scan)
{
return heap_rel->rd_tableam->index_build_range_scan(heap_rel,
index_rel,
index_nfo,
allow_sync,
false,
0,
InvalidBlockNumber,
callback,
callback_state,
scan);
}
/*
* As table_index_build_scan(), except that instead of scanning the complete
* table, only the given number of blocks are scanned. Scan to end-of-rel can
* be signalled by passing InvalidBlockNumber as numblocks. Note that
* restricting the range to scan cannot be done when requesting syncscan.
*
* When "anyvisible" mode is requested, all tuples visible to any transaction
* are indexed and counted as live, including those inserted or deleted by
* transactions that are still in progress.
*/
static inline double
table_index_build_range_scan(Relation heap_rel,
Relation index_rel,
struct IndexInfo *index_nfo,
bool allow_sync,
bool anyvisible,
BlockNumber start_blockno,
BlockNumber numblocks,
IndexBuildCallback callback,
void *callback_state,
TableScanDesc scan)
{
return heap_rel->rd_tableam->index_build_range_scan(heap_rel,
index_rel,
index_nfo,
allow_sync,
anyvisible,
start_blockno,
numblocks,
callback,
callback_state,
scan);
}
/*
* table_index_validate_scan - second table scan for concurrent index build
*
* See validate_index() for an explanation.
*/
static inline void
table_index_validate_scan(Relation heap_rel,
Relation index_rel,
struct IndexInfo *index_info,
Snapshot snapshot,
struct ValidateIndexState *state)
{
heap_rel->rd_tableam->index_validate_scan(heap_rel,
index_rel,
index_info,
snapshot,
state);
}
/* ----------------------------------------------------------------------------
* Functions to make modifications a bit simpler.
* ----------------------------------------------------------------------------
......
......@@ -20,14 +20,6 @@
#define DEFAULT_INDEX_TYPE "btree"
/* Typedef for callback function for IndexBuildHeapScan */
typedef void (*IndexBuildCallback) (Relation index,
HeapTuple htup,
Datum *values,
bool *isnull,
bool tupleIsAlive,
void *state);
/* Action code for index_set_state_flags */
typedef enum
{
......@@ -37,6 +29,15 @@ typedef enum
INDEX_DROP_SET_DEAD
} IndexStateFlagsAction;
/* state info for validate_index bulkdelete callback */
typedef struct ValidateIndexState
{
Tuplesortstate *tuplesort; /* for sorting the index TIDs */
/* statistics (for debug purposes only): */
double htups,
itups,
tups_inserted;
} ValidateIndexState;
extern void index_check_primary_key(Relation heapRel,
IndexInfo *indexInfo,
......@@ -110,25 +111,6 @@ extern void index_build(Relation heapRelation,
bool isreindex,
bool parallel);
struct TableScanDescData;
extern double IndexBuildHeapScan(Relation heapRelation,
Relation indexRelation,
IndexInfo *indexInfo,
bool allow_sync,
IndexBuildCallback callback,
void *callback_state,
struct TableScanDescData *scan);
extern double IndexBuildHeapRangeScan(Relation heapRelation,
Relation indexRelation,
IndexInfo *indexInfo,
bool allow_sync,
bool anyvisible,
BlockNumber start_blockno,
BlockNumber end_blockno,
IndexBuildCallback callback,
void *callback_state,
struct TableScanDescData *scan);
extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot);
extern void index_set_state_flags(Oid indexId, IndexStateFlagsAction action);
......@@ -155,4 +137,45 @@ extern void RestoreReindexState(void *reindexstate);
extern void IndexSetParentIndex(Relation idx, Oid parentOid);
/*
* itemptr_encode - Encode ItemPointer as int64/int8
*
* This representation must produce values encoded as int64 that sort in the
* same order as their corresponding original TID values would (using the
* default int8 opclass to produce a result equivalent to the default TID
* opclass).
*
* As noted in validate_index(), this can be significantly faster.
*/
static inline int64
itemptr_encode(ItemPointer itemptr)
{
BlockNumber block = ItemPointerGetBlockNumber(itemptr);
OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr);
int64 encoded;
/*
* Use the 16 least significant bits for the offset. 32 adjacent bits are
* used for the block number. Since remaining bits are unused, there
* cannot be negative encoded values (We assume a two's complement
* representation).
*/
encoded = ((uint64) block << 16) | (uint16) offset;
return encoded;
}
/*
* itemptr_decode - Decode int64/int8 representation back to ItemPointer
*/
static inline void
itemptr_decode(ItemPointer itemptr, int64 encoded)
{
BlockNumber block = (BlockNumber) (encoded >> 16);
OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF);
ItemPointerSet(itemptr, block, offset);
}
#endif /* INDEX_H */
......@@ -2489,6 +2489,7 @@ VacAttrStatsP
VacuumParams
VacuumRelation
VacuumStmt
ValidateIndexState
Value
ValuesScan
ValuesScanState
......@@ -3239,7 +3240,6 @@ uuidKEY
uuid_rc_t
uuid_sortsupport_state
uuid_t
v_i_state
va_list
vacuumingOptions
validate_string_relopt
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment