Commit c3b23ae4 authored by Andres Freund's avatar Andres Freund

Don't to predicate lock for analyze scans, refactor scan option passing.

Before this commit, when ANALYZE was run on a table and serializable
was used (either by virtue of an explicit BEGIN TRANSACTION ISOLATION
LEVEL SERIALIZABLE, or default_transaction_isolation being set to
serializable) a null pointer dereference lead to a crash.

The analyze scan doesn't need a snapshot (nor predicate locking), but
before this commit a scan only contained information about being a
bitmap or sample scan.

Refactor the option passing to the scan_begin callback to use a
bitmask instead. Alternatively we could have added a new boolean
parameter, but that seems harder to read. Even before this issue
various people (Heikki, Tom, Robert) suggested doing so.

These changes don't change the scan APIs outside of tableam. The flags
argument could be exposed, it's not necessary to fix this
problem. Also the wrapper table_beginscan* functions encapsulate most
of that complexity.

After these changes fixing the bug is trivial, just don't acquire
predicate lock for analyze style scans. That was already done for
bitmap heap scans.  Add an assert that a snapshot is passed when
acquiring the predicate lock, so this kind of bug doesn't require
running with serializable.

Also add a comment about sample scans currently requiring predicate
locking the entire relation, that previously wasn't remarked upon.

Reported-By: Joe Wildish
Author: Andres Freund
Discussion:
    https://postgr.es/m/4EA80A20-E9BF-49F1-9F01-5B66CAB21453@elusive.cx
    https://postgr.es/m/20190411164947.nkii4gaeilt4bui7@alap3.anarazel.de
    https://postgr.es/m/20190518203102.g7peu2fianukjuxm@alap3.anarazel.de
parent bd1592e8
...@@ -245,8 +245,8 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) ...@@ -245,8 +245,8 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
if (!RelationUsesLocalBuffers(scan->rs_base.rs_rd) && if (!RelationUsesLocalBuffers(scan->rs_base.rs_rd) &&
scan->rs_nblocks > NBuffers / 4) scan->rs_nblocks > NBuffers / 4)
{ {
allow_strat = scan->rs_base.rs_allow_strat; allow_strat = (scan->rs_base.rs_flags & SO_ALLOW_STRAT) != 0;
allow_sync = scan->rs_base.rs_allow_sync; allow_sync = (scan->rs_base.rs_flags & SO_ALLOW_SYNC) != 0;
} }
else else
allow_strat = allow_sync = false; allow_strat = allow_sync = false;
...@@ -267,7 +267,10 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) ...@@ -267,7 +267,10 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
if (scan->rs_base.rs_parallel != NULL) if (scan->rs_base.rs_parallel != NULL)
{ {
/* For parallel scan, believe whatever ParallelTableScanDesc says. */ /* For parallel scan, believe whatever ParallelTableScanDesc says. */
scan->rs_base.rs_syncscan = scan->rs_base.rs_parallel->phs_syncscan; if (scan->rs_base.rs_parallel->phs_syncscan)
scan->rs_base.rs_flags |= SO_ALLOW_SYNC;
else
scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
} }
else if (keep_startblock) else if (keep_startblock)
{ {
...@@ -276,16 +279,19 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) ...@@ -276,16 +279,19 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
* so that rewinding a cursor doesn't generate surprising results. * so that rewinding a cursor doesn't generate surprising results.
* Reset the active syncscan setting, though. * Reset the active syncscan setting, though.
*/ */
scan->rs_base.rs_syncscan = (allow_sync && synchronize_seqscans); if (allow_sync && synchronize_seqscans)
scan->rs_base.rs_flags |= SO_ALLOW_SYNC;
else
scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
} }
else if (allow_sync && synchronize_seqscans) else if (allow_sync && synchronize_seqscans)
{ {
scan->rs_base.rs_syncscan = true; scan->rs_base.rs_flags |= SO_ALLOW_SYNC;
scan->rs_startblock = ss_get_location(scan->rs_base.rs_rd, scan->rs_nblocks); scan->rs_startblock = ss_get_location(scan->rs_base.rs_rd, scan->rs_nblocks);
} }
else else
{ {
scan->rs_base.rs_syncscan = false; scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
scan->rs_startblock = 0; scan->rs_startblock = 0;
} }
...@@ -305,11 +311,11 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) ...@@ -305,11 +311,11 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
memcpy(scan->rs_base.rs_key, key, scan->rs_base.rs_nkeys * sizeof(ScanKeyData)); memcpy(scan->rs_base.rs_key, key, scan->rs_base.rs_nkeys * sizeof(ScanKeyData));
/* /*
* Currently, we don't have a stats counter for bitmap heap scans (but the * Currently, we only have a stats counter for sequential heap scans (but
* underlying bitmap index scans will be counted) or sample scans (we only * e.g for bitmap scans the underlying bitmap index scans will be counted,
* update stats for tuple fetches there) * and for sample scans we update stats for tuple fetches).
*/ */
if (!scan->rs_base.rs_bitmapscan && !scan->rs_base.rs_samplescan) if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN)
pgstat_count_heap_scan(scan->rs_base.rs_rd); pgstat_count_heap_scan(scan->rs_base.rs_rd);
} }
...@@ -325,7 +331,8 @@ heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlk ...@@ -325,7 +331,8 @@ heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlk
HeapScanDesc scan = (HeapScanDesc) sscan; HeapScanDesc scan = (HeapScanDesc) sscan;
Assert(!scan->rs_inited); /* else too late to change */ Assert(!scan->rs_inited); /* else too late to change */
Assert(!scan->rs_base.rs_syncscan); /* else rs_startblock is significant */ /* else rs_startblock is significant */
Assert(!(scan->rs_base.rs_flags & SO_ALLOW_SYNC));
/* Check startBlk is valid (but allow case of zero blocks...) */ /* Check startBlk is valid (but allow case of zero blocks...) */
Assert(startBlk == 0 || startBlk < scan->rs_nblocks); Assert(startBlk == 0 || startBlk < scan->rs_nblocks);
...@@ -375,7 +382,7 @@ heapgetpage(TableScanDesc sscan, BlockNumber page) ...@@ -375,7 +382,7 @@ heapgetpage(TableScanDesc sscan, BlockNumber page)
RBM_NORMAL, scan->rs_strategy); RBM_NORMAL, scan->rs_strategy);
scan->rs_cblock = page; scan->rs_cblock = page;
if (!scan->rs_base.rs_pageatatime) if (!(scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE))
return; return;
buffer = scan->rs_cbuf; buffer = scan->rs_cbuf;
...@@ -574,7 +581,7 @@ heapgettup(HeapScanDesc scan, ...@@ -574,7 +581,7 @@ heapgettup(HeapScanDesc scan,
* time, and much more likely that we'll just bollix things for * time, and much more likely that we'll just bollix things for
* forward scanners. * forward scanners.
*/ */
scan->rs_base.rs_syncscan = false; scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
/* start from last page of the scan */ /* start from last page of the scan */
if (scan->rs_startblock > 0) if (scan->rs_startblock > 0)
page = scan->rs_startblock - 1; page = scan->rs_startblock - 1;
...@@ -738,7 +745,7 @@ heapgettup(HeapScanDesc scan, ...@@ -738,7 +745,7 @@ heapgettup(HeapScanDesc scan,
* a little bit backwards on every invocation, which is confusing. * a little bit backwards on every invocation, which is confusing.
* We don't guarantee any specific ordering in general, though. * We don't guarantee any specific ordering in general, though.
*/ */
if (scan->rs_base.rs_syncscan) if (scan->rs_base.rs_flags & SO_ALLOW_SYNC)
ss_report_location(scan->rs_base.rs_rd, page); ss_report_location(scan->rs_base.rs_rd, page);
} }
...@@ -885,7 +892,7 @@ heapgettup_pagemode(HeapScanDesc scan, ...@@ -885,7 +892,7 @@ heapgettup_pagemode(HeapScanDesc scan,
* time, and much more likely that we'll just bollix things for * time, and much more likely that we'll just bollix things for
* forward scanners. * forward scanners.
*/ */
scan->rs_base.rs_syncscan = false; scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
/* start from last page of the scan */ /* start from last page of the scan */
if (scan->rs_startblock > 0) if (scan->rs_startblock > 0)
page = scan->rs_startblock - 1; page = scan->rs_startblock - 1;
...@@ -1037,7 +1044,7 @@ heapgettup_pagemode(HeapScanDesc scan, ...@@ -1037,7 +1044,7 @@ heapgettup_pagemode(HeapScanDesc scan,
* a little bit backwards on every invocation, which is confusing. * a little bit backwards on every invocation, which is confusing.
* We don't guarantee any specific ordering in general, though. * We don't guarantee any specific ordering in general, though.
*/ */
if (scan->rs_base.rs_syncscan) if (scan->rs_base.rs_flags & SO_ALLOW_SYNC)
ss_report_location(scan->rs_base.rs_rd, page); ss_report_location(scan->rs_base.rs_rd, page);
} }
...@@ -1125,12 +1132,7 @@ TableScanDesc ...@@ -1125,12 +1132,7 @@ TableScanDesc
heap_beginscan(Relation relation, Snapshot snapshot, heap_beginscan(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key, int nkeys, ScanKey key,
ParallelTableScanDesc parallel_scan, ParallelTableScanDesc parallel_scan,
bool allow_strat, uint32 flags)
bool allow_sync,
bool allow_pagemode,
bool is_bitmapscan,
bool is_samplescan,
bool temp_snap)
{ {
HeapScanDesc scan; HeapScanDesc scan;
...@@ -1151,33 +1153,39 @@ heap_beginscan(Relation relation, Snapshot snapshot, ...@@ -1151,33 +1153,39 @@ heap_beginscan(Relation relation, Snapshot snapshot,
scan->rs_base.rs_rd = relation; scan->rs_base.rs_rd = relation;
scan->rs_base.rs_snapshot = snapshot; scan->rs_base.rs_snapshot = snapshot;
scan->rs_base.rs_nkeys = nkeys; scan->rs_base.rs_nkeys = nkeys;
scan->rs_base.rs_bitmapscan = is_bitmapscan; scan->rs_base.rs_flags = flags;
scan->rs_base.rs_samplescan = is_samplescan;
scan->rs_strategy = NULL; /* set in initscan */
scan->rs_base.rs_allow_strat = allow_strat;
scan->rs_base.rs_allow_sync = allow_sync;
scan->rs_base.rs_temp_snap = temp_snap;
scan->rs_base.rs_parallel = parallel_scan; scan->rs_base.rs_parallel = parallel_scan;
scan->rs_strategy = NULL; /* set in initscan */
/* /*
* we can use page-at-a-time mode if it's an MVCC-safe snapshot * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
*/ */
scan->rs_base.rs_pageatatime = if (!(snapshot && IsMVCCSnapshot(snapshot)))
allow_pagemode && snapshot && IsMVCCSnapshot(snapshot); scan->rs_base.rs_flags &= ~SO_ALLOW_PAGEMODE;
/* /*
* For a seqscan in a serializable transaction, acquire a predicate lock * For seqscan and sample scans in a serializable transaction, acquire a
* on the entire relation. This is required not only to lock all the * predicate lock on the entire relation. This is required not only to
* matching tuples, but also to conflict with new insertions into the * lock all the matching tuples, but also to conflict with new insertions
* table. In an indexscan, we take page locks on the index pages covering * into the table. In an indexscan, we take page locks on the index pages
* the range specified in the scan qual, but in a heap scan there is * covering the range specified in the scan qual, but in a heap scan there
* nothing more fine-grained to lock. A bitmap scan is a different story, * is nothing more fine-grained to lock. A bitmap scan is a different
* there we have already scanned the index and locked the index pages * story, there we have already scanned the index and locked the index
* covering the predicate. But in that case we still have to lock any * pages covering the predicate. But in that case we still have to lock
* matching heap tuples. * any matching heap tuples. For sample scan we could optimize the locking
* to be at least page-level granularity, but we'd need to add per-tuple
* locking for that.
*/ */
if (!is_bitmapscan) if (scan->rs_base.rs_flags & (SO_TYPE_SEQSCAN | SO_TYPE_SAMPLESCAN))
{
/*
* Ensure a missing snapshot is noticed reliably, even if the
* isolation mode means predicate locking isn't performed (and
* therefore the snapshot isn't used here).
*/
Assert(snapshot);
PredicateLockRelation(relation, snapshot); PredicateLockRelation(relation, snapshot);
}
/* we only need to set this up once */ /* we only need to set this up once */
scan->rs_ctup.t_tableOid = RelationGetRelid(relation); scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
...@@ -1204,10 +1212,21 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, ...@@ -1204,10 +1212,21 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
if (set_params) if (set_params)
{ {
scan->rs_base.rs_allow_strat = allow_strat; if (allow_strat)
scan->rs_base.rs_allow_sync = allow_sync; scan->rs_base.rs_flags |= SO_ALLOW_STRAT;
scan->rs_base.rs_pageatatime = else
allow_pagemode && IsMVCCSnapshot(scan->rs_base.rs_snapshot); scan->rs_base.rs_flags &= ~SO_ALLOW_STRAT;
if (allow_sync)
scan->rs_base.rs_flags |= SO_ALLOW_SYNC;
else
scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
if (allow_pagemode && scan->rs_base.rs_snapshot &&
IsMVCCSnapshot(scan->rs_base.rs_snapshot))
scan->rs_base.rs_flags |= SO_ALLOW_PAGEMODE;
else
scan->rs_base.rs_flags &= ~SO_ALLOW_PAGEMODE;
} }
/* /*
...@@ -1246,7 +1265,7 @@ heap_endscan(TableScanDesc sscan) ...@@ -1246,7 +1265,7 @@ heap_endscan(TableScanDesc sscan)
if (scan->rs_strategy != NULL) if (scan->rs_strategy != NULL)
FreeAccessStrategy(scan->rs_strategy); FreeAccessStrategy(scan->rs_strategy);
if (scan->rs_base.rs_temp_snap) if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
UnregisterSnapshot(scan->rs_base.rs_snapshot); UnregisterSnapshot(scan->rs_base.rs_snapshot);
pfree(scan); pfree(scan);
...@@ -1288,7 +1307,7 @@ heap_getnext(TableScanDesc sscan, ScanDirection direction) ...@@ -1288,7 +1307,7 @@ heap_getnext(TableScanDesc sscan, ScanDirection direction)
HEAPDEBUG_1; /* heap_getnext( info ) */ HEAPDEBUG_1; /* heap_getnext( info ) */
if (scan->rs_base.rs_pageatatime) if (scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
heapgettup_pagemode(scan, direction, heapgettup_pagemode(scan, direction,
scan->rs_base.rs_nkeys, scan->rs_base.rs_key); scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
else else
...@@ -1335,11 +1354,10 @@ heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *s ...@@ -1335,11 +1354,10 @@ heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *s
HEAPAMSLOTDEBUG_1; /* heap_getnextslot( info ) */ HEAPAMSLOTDEBUG_1; /* heap_getnextslot( info ) */
if (scan->rs_base.rs_pageatatime) if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
heapgettup_pagemode(scan, direction, heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
else else
heapgettup(scan, direction, scan->rs_base.rs_nkeys, scan->rs_base.rs_key); heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
if (scan->rs_ctup.t_data == NULL) if (scan->rs_ctup.t_data == NULL)
{ {
......
...@@ -2323,7 +2323,7 @@ heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate) ...@@ -2323,7 +2323,7 @@ heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
* a little bit backwards on every invocation, which is confusing. * a little bit backwards on every invocation, which is confusing.
* We don't guarantee any specific ordering in general, though. * We don't guarantee any specific ordering in general, though.
*/ */
if (scan->rs_syncscan) if (scan->rs_flags & SO_ALLOW_SYNC)
ss_report_location(scan->rs_rd, blockno); ss_report_location(scan->rs_rd, blockno);
if (blockno == hscan->rs_startblock) if (blockno == hscan->rs_startblock)
...@@ -2357,7 +2357,7 @@ heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, ...@@ -2357,7 +2357,7 @@ heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate,
HeapScanDesc hscan = (HeapScanDesc) scan; HeapScanDesc hscan = (HeapScanDesc) scan;
TsmRoutine *tsm = scanstate->tsmroutine; TsmRoutine *tsm = scanstate->tsmroutine;
BlockNumber blockno = hscan->rs_cblock; BlockNumber blockno = hscan->rs_cblock;
bool pagemode = scan->rs_pageatatime; bool pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0;
Page page; Page page;
bool all_visible; bool all_visible;
...@@ -2504,7 +2504,7 @@ SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer, ...@@ -2504,7 +2504,7 @@ SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
{ {
HeapScanDesc hscan = (HeapScanDesc) scan; HeapScanDesc hscan = (HeapScanDesc) scan;
if (scan->rs_pageatatime) if (scan->rs_flags & SO_ALLOW_PAGEMODE)
{ {
/* /*
* In pageatatime mode, heapgetpage() already did visibility checks, * In pageatatime mode, heapgetpage() already did visibility checks,
......
...@@ -93,12 +93,13 @@ table_slot_create(Relation relation, List **reglist) ...@@ -93,12 +93,13 @@ table_slot_create(Relation relation, List **reglist)
TableScanDesc TableScanDesc
table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key) table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key)
{ {
uint32 flags = SO_TYPE_SEQSCAN |
SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE | SO_TEMP_SNAPSHOT;
Oid relid = RelationGetRelid(relation); Oid relid = RelationGetRelid(relation);
Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
return relation->rd_tableam->scan_begin(relation, snapshot, nkeys, key, return relation->rd_tableam->scan_begin(relation, snapshot, nkeys, key,
NULL, true, true, true, false, NULL, flags);
false, true);
} }
void void
...@@ -108,7 +109,7 @@ table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot) ...@@ -108,7 +109,7 @@ table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot)
RegisterSnapshot(snapshot); RegisterSnapshot(snapshot);
scan->rs_snapshot = snapshot; scan->rs_snapshot = snapshot;
scan->rs_temp_snap = true; scan->rs_flags |= SO_TEMP_SNAPSHOT;
} }
...@@ -156,6 +157,8 @@ TableScanDesc ...@@ -156,6 +157,8 @@ TableScanDesc
table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan) table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan)
{ {
Snapshot snapshot; Snapshot snapshot;
uint32 flags = SO_TYPE_SEQSCAN |
SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
Assert(RelationGetRelid(relation) == parallel_scan->phs_relid); Assert(RelationGetRelid(relation) == parallel_scan->phs_relid);
...@@ -165,6 +168,7 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan) ...@@ -165,6 +168,7 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan)
snapshot = RestoreSnapshot((char *) parallel_scan + snapshot = RestoreSnapshot((char *) parallel_scan +
parallel_scan->phs_snapshot_off); parallel_scan->phs_snapshot_off);
RegisterSnapshot(snapshot); RegisterSnapshot(snapshot);
flags |= SO_TEMP_SNAPSHOT;
} }
else else
{ {
...@@ -173,9 +177,7 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan) ...@@ -173,9 +177,7 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan)
} }
return relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL, return relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL,
parallel_scan, true, true, true, parallel_scan, flags);
false, false,
!parallel_scan->phs_snapshot_any);
} }
......
...@@ -110,12 +110,7 @@ typedef enum ...@@ -110,12 +110,7 @@ typedef enum
extern TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, extern TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key, int nkeys, ScanKey key,
ParallelTableScanDesc parallel_scan, ParallelTableScanDesc parallel_scan,
bool allow_strat, uint32 flags);
bool allow_sync,
bool allow_pagemode,
bool is_bitmapscan,
bool is_samplescan,
bool temp_snap);
extern void heap_setscanlimits(TableScanDesc scan, BlockNumber startBlk, extern void heap_setscanlimits(TableScanDesc scan, BlockNumber startBlk,
BlockNumber endBlk); BlockNumber endBlk);
extern void heapgetpage(TableScanDesc scan, BlockNumber page); extern void heapgetpage(TableScanDesc scan, BlockNumber page);
......
...@@ -35,13 +35,12 @@ typedef struct TableScanDescData ...@@ -35,13 +35,12 @@ typedef struct TableScanDescData
struct SnapshotData *rs_snapshot; /* snapshot to see */ struct SnapshotData *rs_snapshot; /* snapshot to see */
int rs_nkeys; /* number of scan keys */ int rs_nkeys; /* number of scan keys */
struct ScanKeyData *rs_key; /* array of scan key descriptors */ struct ScanKeyData *rs_key; /* array of scan key descriptors */
bool rs_bitmapscan; /* true if this is really a bitmap scan */
bool rs_samplescan; /* true if this is really a sample scan */ /*
bool rs_pageatatime; /* verify visibility page-at-a-time? */ * Information about type and behaviour of the scan, a bitmask of members
bool rs_allow_strat; /* allow or disallow use of access strategy */ * of the ScanOptions enum (see tableam.h).
bool rs_allow_sync; /* allow or disallow use of syncscan */ */
bool rs_temp_snap; /* unregister snapshot at scan end? */ uint32 rs_flags;
bool rs_syncscan; /* report location to syncscan logic? */
struct ParallelTableScanDescData *rs_parallel; /* parallel scan struct ParallelTableScanDescData *rs_parallel; /* parallel scan
* information */ * information */
......
...@@ -39,6 +39,28 @@ struct TBMIterateResult; ...@@ -39,6 +39,28 @@ struct TBMIterateResult;
struct VacuumParams; struct VacuumParams;
struct ValidateIndexState; struct ValidateIndexState;
/*
* Bitmask values for the flags argument to the scan_begin callback.
*/
typedef enum ScanOptions
{
/* one of SO_TYPE_* may be specified */
SO_TYPE_SEQSCAN = 1 << 0,
SO_TYPE_BITMAPSCAN = 1 << 1,
SO_TYPE_SAMPLESCAN = 1 << 2,
SO_TYPE_ANALYZE = 1 << 3,
/* several of SO_ALLOW_* may be specified */
/* allow or disallow use of access strategy */
SO_ALLOW_STRAT = 1 << 4,
/* report location to syncscan logic? */
SO_ALLOW_SYNC = 1 << 5,
/* verify visibility page-at-a-time? */
SO_ALLOW_PAGEMODE = 1 << 6,
/* unregister snapshot at scan end? */
SO_TEMP_SNAPSHOT = 1 << 7
} ScanOptions;
/* /*
* Result codes for table_{update,delete,lock_tuple}, and for visibility * Result codes for table_{update,delete,lock_tuple}, and for visibility
...@@ -78,7 +100,6 @@ typedef enum TM_Result ...@@ -78,7 +100,6 @@ typedef enum TM_Result
TM_WouldBlock TM_WouldBlock
} TM_Result; } TM_Result;
/* /*
* When table_update, table_delete, or table_lock_tuple fail because the target * When table_update, table_delete, or table_lock_tuple fail because the target
* tuple is already outdated, they fill in this struct to provide information * tuple is already outdated, they fill in this struct to provide information
...@@ -170,26 +191,17 @@ typedef struct TableAmRoutine ...@@ -170,26 +191,17 @@ typedef struct TableAmRoutine
* parallelscan_initialize(), and has to be for the same relation. Will * parallelscan_initialize(), and has to be for the same relation. Will
* only be set coming from table_beginscan_parallel(). * only be set coming from table_beginscan_parallel().
* *
* allow_{strat, sync, pagemode} specify whether a scan strategy, * `flags` is a bitmask indicating the type of scan (ScanOptions's
* synchronized scans, or page mode may be used (although not every AM * SO_TYPE_*, currently only one may be specified), options controlling
* will support those). * the scan's behaviour (ScanOptions's SO_ALLOW_*, several may be
* * specified, an AM may ignore unsupported ones) and whether the snapshot
* is_{bitmapscan, samplescan} specify whether the scan is intended to * needs to be deallocated at scan_end (ScanOptions's SO_TEMP_SNAPSHOT).
* support those types of scans.
*
* if temp_snap is true, the snapshot will need to be deallocated at
* scan_end.
*/ */
TableScanDesc (*scan_begin) (Relation rel, TableScanDesc (*scan_begin) (Relation rel,
Snapshot snapshot, Snapshot snapshot,
int nkeys, struct ScanKeyData *key, int nkeys, struct ScanKeyData *key,
ParallelTableScanDesc pscan, ParallelTableScanDesc pscan,
bool allow_strat, uint32 flags);
bool allow_sync,
bool allow_pagemode,
bool is_bitmapscan,
bool is_samplescan,
bool temp_snap);
/* /*
* Release resources and deallocate scan. If TableScanDesc.temp_snap, * Release resources and deallocate scan. If TableScanDesc.temp_snap,
...@@ -715,8 +727,10 @@ static inline TableScanDesc ...@@ -715,8 +727,10 @@ static inline TableScanDesc
table_beginscan(Relation rel, Snapshot snapshot, table_beginscan(Relation rel, Snapshot snapshot,
int nkeys, struct ScanKeyData *key) int nkeys, struct ScanKeyData *key)
{ {
return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, uint32 flags = SO_TYPE_SEQSCAN |
true, true, true, false, false, false); SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
} }
/* /*
...@@ -738,9 +752,14 @@ table_beginscan_strat(Relation rel, Snapshot snapshot, ...@@ -738,9 +752,14 @@ table_beginscan_strat(Relation rel, Snapshot snapshot,
int nkeys, struct ScanKeyData *key, int nkeys, struct ScanKeyData *key,
bool allow_strat, bool allow_sync) bool allow_strat, bool allow_sync)
{ {
return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, uint32 flags = SO_TYPE_SEQSCAN | SO_ALLOW_PAGEMODE;
allow_strat, allow_sync, true,
false, false, false); if (allow_strat)
flags |= SO_ALLOW_STRAT;
if (allow_sync)
flags |= SO_ALLOW_SYNC;
return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
} }
/* /*
...@@ -753,8 +772,9 @@ static inline TableScanDesc ...@@ -753,8 +772,9 @@ static inline TableScanDesc
table_beginscan_bm(Relation rel, Snapshot snapshot, table_beginscan_bm(Relation rel, Snapshot snapshot,
int nkeys, struct ScanKeyData *key) int nkeys, struct ScanKeyData *key)
{ {
return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, uint32 flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
false, false, true, true, false, false);
return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
} }
/* /*
...@@ -770,9 +790,16 @@ table_beginscan_sampling(Relation rel, Snapshot snapshot, ...@@ -770,9 +790,16 @@ table_beginscan_sampling(Relation rel, Snapshot snapshot,
bool allow_strat, bool allow_sync, bool allow_strat, bool allow_sync,
bool allow_pagemode) bool allow_pagemode)
{ {
return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, uint32 flags = SO_TYPE_SAMPLESCAN;
allow_strat, allow_sync, allow_pagemode,
false, true, false); if (allow_strat)
flags |= SO_ALLOW_STRAT;
if (allow_sync)
flags |= SO_ALLOW_SYNC;
if (allow_pagemode)
flags |= SO_ALLOW_PAGEMODE;
return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
} }
/* /*
...@@ -783,9 +810,9 @@ table_beginscan_sampling(Relation rel, Snapshot snapshot, ...@@ -783,9 +810,9 @@ table_beginscan_sampling(Relation rel, Snapshot snapshot,
static inline TableScanDesc static inline TableScanDesc
table_beginscan_analyze(Relation rel) table_beginscan_analyze(Relation rel)
{ {
return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, uint32 flags = SO_TYPE_ANALYZE;
true, false, true,
false, true, false); return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags);
} }
/* /*
......
...@@ -163,6 +163,14 @@ ERROR: relation "does_not_exist" does not exist ...@@ -163,6 +163,14 @@ ERROR: relation "does_not_exist" does not exist
VACUUM (SKIP_LOCKED) vactst; VACUUM (SKIP_LOCKED) vactst;
VACUUM (SKIP_LOCKED, FULL) vactst; VACUUM (SKIP_LOCKED, FULL) vactst;
ANALYZE (SKIP_LOCKED) vactst; ANALYZE (SKIP_LOCKED) vactst;
-- ensure VACUUM and ANALYZE don't have a problem with serializable
SET default_transaction_isolation = serializable;
VACUUM vactst;
ANALYZE vactst;
RESET default_transaction_isolation;
BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
ANALYZE vactst;
COMMIT;
DROP TABLE vaccluster; DROP TABLE vaccluster;
DROP TABLE vactst; DROP TABLE vactst;
DROP TABLE vacparted; DROP TABLE vacparted;
......
...@@ -124,6 +124,15 @@ VACUUM (SKIP_LOCKED) vactst; ...@@ -124,6 +124,15 @@ VACUUM (SKIP_LOCKED) vactst;
VACUUM (SKIP_LOCKED, FULL) vactst; VACUUM (SKIP_LOCKED, FULL) vactst;
ANALYZE (SKIP_LOCKED) vactst; ANALYZE (SKIP_LOCKED) vactst;
-- ensure VACUUM and ANALYZE don't have a problem with serializable
SET default_transaction_isolation = serializable;
VACUUM vactst;
ANALYZE vactst;
RESET default_transaction_isolation;
BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
ANALYZE vactst;
COMMIT;
DROP TABLE vaccluster; DROP TABLE vaccluster;
DROP TABLE vactst; DROP TABLE vactst;
DROP TABLE vacparted; DROP TABLE vacparted;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment