Commit bb16aba5 authored by Thomas Munro's avatar Thomas Munro

Enable parallel query with SERIALIZABLE isolation.

Previously, the SERIALIZABLE isolation level prevented parallel query
from being used.  Allow the two features to be used together by
sharing the leader's SERIALIZABLEXACT with parallel workers.

An extra per-SERIALIZABLEXACT LWLock is introduced to make it safe to
share, and new logic is introduced to coordinate the early release
of the SERIALIZABLEXACT required for the SXACT_FLAG_RO_SAFE
optimization, as follows:

The first backend to observe the SXACT_FLAG_RO_SAFE flag (set by
some other transaction) will 'partially release' the SERIALIZABLEXACT,
meaning that the conflicts and locks it holds are released, but the
SERIALIZABLEXACT itself will remain active because other backends
might still have a pointer to it.

Whenever any backend notices the SXACT_FLAG_RO_SAFE flag, it clears
its own MySerializableXact variable and frees local resources so that
it can skip SSI checks for the rest of the transaction.  In the
special case of the leader process, it transfers the SERIALIZABLEXACT
to a new variable SavedSerializableXact, so that it can be completely
released at the end of the transaction after all workers have exited.

Remove the serializable_okay flag added to CreateParallelContext() by
commit 9da0cc35, because it's now redundant.

Author: Thomas Munro
Reviewed-by: Haribabu Kommi, Robert Haas, Masahiko Sawada, Kevin Grittner
Discussion: https://postgr.es/m/CAEepm=0gXGYhtrVDWOTHS8SQQy_=S9xo+8oCxGLWZAOoeJ=yzQ@mail.gmail.com
parent 13e8643b
......@@ -861,7 +861,7 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
<tbody>
<row>
<entry morerows="63"><literal>LWLock</literal></entry>
<entry morerows="64"><literal>LWLock</literal></entry>
<entry><literal>ShmemIndexLock</literal></entry>
<entry>Waiting to find or allocate space in shared memory.</entry>
</row>
......@@ -1121,6 +1121,11 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
<entry><literal>predicate_lock_manager</literal></entry>
<entry>Waiting to add or examine predicate lock information.</entry>
</row>
<row>
<entry><literal>serializable_xact</literal></entry>
<entry>Waiting to perform an operation on a serializable transaction
in a parallel query.</entry>
</row>
<row>
<entry><literal>parallel_query_dsa</literal></entry>
<entry>Waiting for parallel query dynamic shared memory allocation lock.</entry>
......
......@@ -184,13 +184,6 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
using a very large number of processes.
</para>
</listitem>
<listitem>
<para>
The transaction isolation level is serializable. This is
a limitation of the current implementation.
</para>
</listitem>
</itemizedlist>
<para>
......@@ -233,16 +226,6 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
that may be suboptimal when run serially.
</para>
</listitem>
<listitem>
<para>
The transaction isolation level is serializable. This situation
does not normally arise, because parallel query plans are not
generated when the transaction isolation level is serializable.
However, it can happen if the transaction isolation level is changed to
serializable after the plan is generated and before it is executed.
</para>
</listitem>
</itemizedlist>
</sect1>
......
......@@ -1265,7 +1265,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
EnterParallelMode();
Assert(request > 0);
pcxt = CreateParallelContext("postgres", "_bt_parallel_build_main",
request, true);
request);
scantuplesortstates = leaderparticipates ? request + 1 : request;
/*
......
......@@ -31,6 +31,7 @@
#include "optimizer/optimizer.h"
#include "pgstat.h"
#include "storage/ipc.h"
#include "storage/predicate.h"
#include "storage/sinval.h"
#include "storage/spin.h"
#include "tcop/tcopprot.h"
......@@ -91,6 +92,7 @@ typedef struct FixedParallelState
BackendId parallel_master_backend_id;
TimestampTz xact_ts;
TimestampTz stmt_ts;
SerializableXactHandle serializable_xact_handle;
/* Mutex protects remaining fields. */
slock_t mutex;
......@@ -155,7 +157,7 @@ static void ParallelWorkerShutdown(int code, Datum arg);
*/
ParallelContext *
CreateParallelContext(const char *library_name, const char *function_name,
int nworkers, bool serializable_okay)
int nworkers)
{
MemoryContext oldcontext;
ParallelContext *pcxt;
......@@ -166,16 +168,6 @@ CreateParallelContext(const char *library_name, const char *function_name,
/* Number of workers should be non-negative. */
Assert(nworkers >= 0);
/*
* If we are running under serializable isolation, we can't use parallel
* workers, at least not until somebody enhances that mechanism to be
* parallel-aware. Utility statement callers may ask us to ignore this
* restriction because they're always able to safely ignore the fact that
* SIREAD locks do not work with parallelism.
*/
if (IsolationIsSerializable() && !serializable_okay)
nworkers = 0;
/* We might be running in a short-lived memory context. */
oldcontext = MemoryContextSwitchTo(TopTransactionContext);
......@@ -327,6 +319,7 @@ InitializeParallelDSM(ParallelContext *pcxt)
fps->parallel_master_backend_id = MyBackendId;
fps->xact_ts = GetCurrentTransactionStartTimestamp();
fps->stmt_ts = GetCurrentStatementStartTimestamp();
fps->serializable_xact_handle = ShareSerializableXact();
SpinLockInit(&fps->mutex);
fps->last_xlog_end = 0;
shm_toc_insert(pcxt->toc, PARALLEL_KEY_FIXED, fps);
......@@ -1422,6 +1415,9 @@ ParallelWorkerMain(Datum main_arg)
false);
RestoreEnumBlacklist(enumblacklistspace);
/* Attach to the leader's serializable transaction, if SERIALIZABLE. */
AttachSerializableXact(fps->serializable_xact_handle);
/*
* We've initialized all of our state now; nothing should change
* hereafter.
......
......@@ -2024,9 +2024,12 @@ CommitTransaction(void)
/*
* Mark serializable transaction as complete for predicate locking
* purposes. This should be done as late as we can put it and still allow
* errors to be raised for failure patterns found at commit.
* errors to be raised for failure patterns found at commit. This is not
* appropriate in a parallel worker however, because we aren't committing
* the leader's transaction and its serializable state will live on.
*/
PreCommit_CheckForSerializationFailure();
if (!is_parallel_worker)
PreCommit_CheckForSerializationFailure();
/*
* Insert notifications sent by NOTIFY commands into the queue. This
......
......@@ -604,7 +604,7 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate,
pstmt_data = ExecSerializePlan(planstate->plan, estate);
/* Create a parallel context. */
pcxt = CreateParallelContext("postgres", "ParallelQueryMain", nworkers, false);
pcxt = CreateParallelContext("postgres", "ParallelQueryMain", nworkers);
pei->pcxt = pcxt;
/*
......
......@@ -337,22 +337,13 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
* parallel worker. We might eventually be able to relax this
* restriction, but for now it seems best not to have parallel workers
* trying to create their own parallel workers.
*
* We can't use parallelism in serializable mode because the predicate
* locking code is not parallel-aware. It's not catastrophic if someone
* tries to run a parallel plan in serializable mode; it just won't get
* any workers and will run serially. But it seems like a good heuristic
* to assume that the same serialization level will be in effect at plan
* time and execution time, so don't generate a parallel plan if we're in
* serializable mode.
*/
if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
IsUnderPostmaster &&
parse->commandType == CMD_SELECT &&
!parse->hasModifyingCTE &&
max_parallel_workers_per_gather > 0 &&
!IsParallelWorker() &&
!IsolationIsSerializable())
!IsParallelWorker())
{
/* all the cheap tests pass, so scan the query tree */
glob->maxParallelHazard = max_parallel_hazard(parse);
......
......@@ -521,6 +521,7 @@ RegisterLWLockTranches(void)
LWLockRegisterTranche(LWTRANCHE_TBM, "tbm");
LWLockRegisterTranche(LWTRANCHE_PARALLEL_APPEND, "parallel_append");
LWLockRegisterTranche(LWTRANCHE_PARALLEL_HASH_JOIN, "parallel_hash_join");
LWLockRegisterTranche(LWTRANCHE_SXACT, "serializable_xact");
/* Register named tranches. */
for (i = 0; i < NamedLWLockTrancheRequests; i++)
......
This diff is collapsed.
......@@ -566,7 +566,7 @@ ResourceOwnerReleaseInternal(ResourceOwner owner,
if (owner == TopTransactionResourceOwner)
{
ProcReleaseLocks(isCommit);
ReleasePredicateLocks(isCommit);
ReleasePredicateLocks(isCommit, false);
}
}
else
......
......@@ -60,8 +60,7 @@ extern PGDLLIMPORT bool InitializingParallelWorker;
#define IsParallelWorker() (ParallelWorkerNumber >= 0)
extern ParallelContext *CreateParallelContext(const char *library_name,
const char *function_name, int nworkers,
bool serializable_okay);
const char *function_name, int nworkers);
extern void InitializeParallelDSM(ParallelContext *pcxt);
extern void ReinitializeParallelDSM(ParallelContext *pcxt);
extern void LaunchParallelWorkers(ParallelContext *pcxt);
......
......@@ -219,6 +219,7 @@ typedef enum BuiltinTrancheIds
LWTRANCHE_SHARED_TUPLESTORE,
LWTRANCHE_TBM,
LWTRANCHE_PARALLEL_APPEND,
LWTRANCHE_SXACT,
LWTRANCHE_FIRST_USER_DEFINED
} BuiltinTrancheIds;
......
......@@ -30,6 +30,11 @@ extern int max_predicate_locks_per_page;
/* Number of SLRU buffers to use for predicate locking */
#define NUM_OLDSERXID_BUFFERS 16
/*
* A handle used for sharing SERIALIZABLEXACT objects between the participants
* in a parallel query.
*/
typedef void *SerializableXactHandle;
/*
* function prototypes
......@@ -56,7 +61,7 @@ extern void PredicateLockTuple(Relation relation, HeapTuple tuple, Snapshot snap
extern void PredicateLockPageSplit(Relation relation, BlockNumber oldblkno, BlockNumber newblkno);
extern void PredicateLockPageCombine(Relation relation, BlockNumber oldblkno, BlockNumber newblkno);
extern void TransferPredicateLocksToHeapRelation(Relation relation);
extern void ReleasePredicateLocks(bool isCommit);
extern void ReleasePredicateLocks(bool isCommit, bool isReadOnlySafe);
/* conflict detection (may also trigger rollback) */
extern void CheckForSerializableConflictOut(bool valid, Relation relation, HeapTuple tuple,
......@@ -74,4 +79,8 @@ extern void PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit);
extern void predicatelock_twophase_recover(TransactionId xid, uint16 info,
void *recdata, uint32 len);
/* parallel query support */
extern SerializableXactHandle ShareSerializableXact(void);
extern void AttachSerializableXact(SerializableXactHandle handle);
#endif /* PREDICATE_H */
......@@ -15,6 +15,7 @@
#define PREDICATE_INTERNALS_H
#include "storage/lock.h"
#include "storage/lwlock.h"
/*
* Commit number.
......@@ -91,6 +92,9 @@ typedef struct SERIALIZABLEXACT
SHM_QUEUE finishedLink; /* list link in
* FinishedSerializableTransactions */
LWLock predicateLockListLock; /* protects predicateLocks in parallel
* mode */
/*
* for r/o transactions: list of concurrent r/w transactions that we could
* potentially have conflicts with, and vice versa for r/w transactions
......@@ -123,6 +127,12 @@ typedef struct SERIALIZABLEXACT
#define SXACT_FLAG_RO_UNSAFE 0x00000100
#define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200
#define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
/*
* The following flag means the transaction has been partially released
* already, but is being preserved because parallel workers might have a
* reference to it. It'll be recycled by the leader at end-of-transaction.
*/
#define SXACT_FLAG_PARTIALLY_RELEASED 0x00000800
/*
* The following types are used to provide an ad hoc list for holding
......
Parsed test spec with 2 sessions
starting permutation: s1r s2r1 s1c s2r2 s2c
step s1r: SELECT * FROM foo;
a
1
2
3
4
5
6
7
8
9
10
step s2r1: SELECT * FROM foo;
a
1
2
3
4
5
6
7
8
9
10
step s1c: COMMIT;
step s2r2: SELECT * FROM foo;
a
1
2
3
4
5
6
7
8
9
10
step s2c: COMMIT;
Parsed test spec with 3 sessions
starting permutation: s2rx s2ry s1ry s1wy s1c s2wx s2c s3c
step s2rx: SELECT balance FROM bank_account WHERE id = 'X';
balance
0
step s2ry: SELECT balance FROM bank_account WHERE id = 'Y';
balance
0
step s1ry: SELECT balance FROM bank_account WHERE id = 'Y';
balance
0
step s1wy: UPDATE bank_account SET balance = 20 WHERE id = 'Y';
step s1c: COMMIT;
step s2wx: UPDATE bank_account SET balance = -11 WHERE id = 'X';
step s2c: COMMIT;
step s3c: COMMIT;
starting permutation: s2rx s2ry s1ry s1wy s1c s3r s3c s2wx
step s2rx: SELECT balance FROM bank_account WHERE id = 'X';
balance
0
step s2ry: SELECT balance FROM bank_account WHERE id = 'Y';
balance
0
step s1ry: SELECT balance FROM bank_account WHERE id = 'Y';
balance
0
step s1wy: UPDATE bank_account SET balance = 20 WHERE id = 'Y';
step s1c: COMMIT;
step s3r: SELECT id, balance FROM bank_account WHERE id IN ('X', 'Y') ORDER BY id;
id balance
X 0
Y 20
step s3c: COMMIT;
step s2wx: UPDATE bank_account SET balance = -11 WHERE id = 'X';
ERROR: could not serialize access due to read/write dependencies among transactions
......@@ -78,3 +78,5 @@ test: partition-key-update-3
test: partition-key-update-4
test: plpgsql-toast
test: truncate-conflict
test: serializable-parallel
test: serializable-parallel-2
# Exercise the case where a read-only serializable transaction has
# SXACT_FLAG_RO_SAFE set in a parallel query.
setup
{
CREATE TABLE foo AS SELECT generate_series(1, 10)::int a;
ALTER TABLE foo SET (parallel_workers = 2);
}
teardown
{
DROP TABLE foo;
}
session "s1"
setup { BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; }
step "s1r" { SELECT * FROM foo; }
step "s1c" { COMMIT; }
session "s2"
setup {
BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY;
SET parallel_setup_cost = 0;
SET parallel_tuple_cost = 0;
}
step "s2r1" { SELECT * FROM foo; }
step "s2r2" { SELECT * FROM foo; }
step "s2c" { COMMIT; }
permutation "s1r" "s2r1" "s1c" "s2r2" "s2c"
# The example from the paper "A read-only transaction anomaly under snapshot
# isolation"[1].
#
# Here we test that serializable snapshot isolation (SERIALIZABLE) doesn't
# suffer from the anomaly, because s2 is aborted upon detection of a cycle.
# In this case the read only query s3 happens to be running in a parallel
# worker.
#
# [1] http://www.cs.umb.edu/~poneil/ROAnom.pdf
setup
{
CREATE TABLE bank_account (id TEXT PRIMARY KEY, balance DECIMAL NOT NULL);
INSERT INTO bank_account (id, balance) VALUES ('X', 0), ('Y', 0);
}
teardown
{
DROP TABLE bank_account;
}
session "s1"
setup { BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; }
step "s1ry" { SELECT balance FROM bank_account WHERE id = 'Y'; }
step "s1wy" { UPDATE bank_account SET balance = 20 WHERE id = 'Y'; }
step "s1c" { COMMIT; }
session "s2"
setup { BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE; }
step "s2rx" { SELECT balance FROM bank_account WHERE id = 'X'; }
step "s2ry" { SELECT balance FROM bank_account WHERE id = 'Y'; }
step "s2wx" { UPDATE bank_account SET balance = -11 WHERE id = 'X'; }
step "s2c" { COMMIT; }
session "s3"
setup {
BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
SET force_parallel_mode = on;
}
step "s3r" { SELECT id, balance FROM bank_account WHERE id IN ('X', 'Y') ORDER BY id; }
step "s3c" { COMMIT; }
# without s3, s1 and s2 commit
permutation "s2rx" "s2ry" "s1ry" "s1wy" "s1c" "s2wx" "s2c" "s3c"
# once s3 observes the data committed by s1, a cycle is created and s2 aborts
permutation "s2rx" "s2ry" "s1ry" "s1wy" "s1c" "s3r" "s3c" "s2wx"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment