Commit 38628db8 authored by Fujii Masao's avatar Fujii Masao

Add memory barriers for PgBackendStatus.st_changecount protocol.

st_changecount protocol needs the memory barriers to ensure that
the apparent order of execution is as it desires. Otherwise,
for example, the CPU might rearrange the code so that st_changecount
is incremented twice before the modification on a machine with
weak memory ordering. This surprising result can lead to bugs.

This commit introduces the macros to load and store st_changecount
with the memory barriers. These are called before and after
PgBackendStatus entries are modified or copied into private memory,
in order to prevent CPU from reordering PgBackendStatus access.

Per discussion on pgsql-hackers, we decided not to back-patch this
to 9.4 or before until we get an actual bug report about this.

Patch by me. Review by Robert Haas.
parent 19e065c0
...@@ -2563,7 +2563,7 @@ pgstat_bestart(void) ...@@ -2563,7 +2563,7 @@ pgstat_bestart(void)
beentry = MyBEEntry; beentry = MyBEEntry;
do do
{ {
beentry->st_changecount++; pgstat_increment_changecount_before(beentry);
} while ((beentry->st_changecount & 1) == 0); } while ((beentry->st_changecount & 1) == 0);
beentry->st_procpid = MyProcPid; beentry->st_procpid = MyProcPid;
...@@ -2588,8 +2588,7 @@ pgstat_bestart(void) ...@@ -2588,8 +2588,7 @@ pgstat_bestart(void)
beentry->st_appname[NAMEDATALEN - 1] = '\0'; beentry->st_appname[NAMEDATALEN - 1] = '\0';
beentry->st_activity[pgstat_track_activity_query_size - 1] = '\0'; beentry->st_activity[pgstat_track_activity_query_size - 1] = '\0';
beentry->st_changecount++; pgstat_increment_changecount_after(beentry);
Assert((beentry->st_changecount & 1) == 0);
/* Update app name to current GUC setting */ /* Update app name to current GUC setting */
if (application_name) if (application_name)
...@@ -2624,12 +2623,11 @@ pgstat_beshutdown_hook(int code, Datum arg) ...@@ -2624,12 +2623,11 @@ pgstat_beshutdown_hook(int code, Datum arg)
* before and after. We use a volatile pointer here to ensure the * before and after. We use a volatile pointer here to ensure the
* compiler doesn't try to get cute. * compiler doesn't try to get cute.
*/ */
beentry->st_changecount++; pgstat_increment_changecount_before(beentry);
beentry->st_procpid = 0; /* mark invalid */ beentry->st_procpid = 0; /* mark invalid */
beentry->st_changecount++; pgstat_increment_changecount_after(beentry);
Assert((beentry->st_changecount & 1) == 0);
} }
...@@ -2666,7 +2664,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str) ...@@ -2666,7 +2664,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
* non-disabled state. As our final update, change the state and * non-disabled state. As our final update, change the state and
* clear fields we will not be updating anymore. * clear fields we will not be updating anymore.
*/ */
beentry->st_changecount++; pgstat_increment_changecount_before(beentry);
beentry->st_state = STATE_DISABLED; beentry->st_state = STATE_DISABLED;
beentry->st_state_start_timestamp = 0; beentry->st_state_start_timestamp = 0;
beentry->st_activity[0] = '\0'; beentry->st_activity[0] = '\0';
...@@ -2674,8 +2672,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str) ...@@ -2674,8 +2672,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
/* st_xact_start_timestamp and st_waiting are also disabled */ /* st_xact_start_timestamp and st_waiting are also disabled */
beentry->st_xact_start_timestamp = 0; beentry->st_xact_start_timestamp = 0;
beentry->st_waiting = false; beentry->st_waiting = false;
beentry->st_changecount++; pgstat_increment_changecount_after(beentry);
Assert((beentry->st_changecount & 1) == 0);
} }
return; return;
} }
...@@ -2695,7 +2692,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str) ...@@ -2695,7 +2692,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
/* /*
* Now update the status entry * Now update the status entry
*/ */
beentry->st_changecount++; pgstat_increment_changecount_before(beentry);
beentry->st_state = state; beentry->st_state = state;
beentry->st_state_start_timestamp = current_timestamp; beentry->st_state_start_timestamp = current_timestamp;
...@@ -2707,8 +2704,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str) ...@@ -2707,8 +2704,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
beentry->st_activity_start_timestamp = start_timestamp; beentry->st_activity_start_timestamp = start_timestamp;
} }
beentry->st_changecount++; pgstat_increment_changecount_after(beentry);
Assert((beentry->st_changecount & 1) == 0);
} }
/* ---------- /* ----------
...@@ -2734,13 +2730,12 @@ pgstat_report_appname(const char *appname) ...@@ -2734,13 +2730,12 @@ pgstat_report_appname(const char *appname)
* st_changecount before and after. We use a volatile pointer here to * st_changecount before and after. We use a volatile pointer here to
* ensure the compiler doesn't try to get cute. * ensure the compiler doesn't try to get cute.
*/ */
beentry->st_changecount++; pgstat_increment_changecount_before(beentry);
memcpy((char *) beentry->st_appname, appname, len); memcpy((char *) beentry->st_appname, appname, len);
beentry->st_appname[len] = '\0'; beentry->st_appname[len] = '\0';
beentry->st_changecount++; pgstat_increment_changecount_after(beentry);
Assert((beentry->st_changecount & 1) == 0);
} }
/* /*
...@@ -2760,10 +2755,9 @@ pgstat_report_xact_timestamp(TimestampTz tstamp) ...@@ -2760,10 +2755,9 @@ pgstat_report_xact_timestamp(TimestampTz tstamp)
* st_changecount before and after. We use a volatile pointer here to * st_changecount before and after. We use a volatile pointer here to
* ensure the compiler doesn't try to get cute. * ensure the compiler doesn't try to get cute.
*/ */
beentry->st_changecount++; pgstat_increment_changecount_before(beentry);
beentry->st_xact_start_timestamp = tstamp; beentry->st_xact_start_timestamp = tstamp;
beentry->st_changecount++; pgstat_increment_changecount_after(beentry);
Assert((beentry->st_changecount & 1) == 0);
} }
/* ---------- /* ----------
...@@ -2839,7 +2833,10 @@ pgstat_read_current_status(void) ...@@ -2839,7 +2833,10 @@ pgstat_read_current_status(void)
*/ */
for (;;) for (;;)
{ {
int save_changecount = beentry->st_changecount; int before_changecount;
int after_changecount;
pgstat_save_changecount_before(beentry, before_changecount);
localentry->backendStatus.st_procpid = beentry->st_procpid; localentry->backendStatus.st_procpid = beentry->st_procpid;
if (localentry->backendStatus.st_procpid > 0) if (localentry->backendStatus.st_procpid > 0)
...@@ -2856,8 +2853,9 @@ pgstat_read_current_status(void) ...@@ -2856,8 +2853,9 @@ pgstat_read_current_status(void)
localentry->backendStatus.st_activity = localactivity; localentry->backendStatus.st_activity = localactivity;
} }
if (save_changecount == beentry->st_changecount && pgstat_save_changecount_after(beentry, after_changecount);
(save_changecount & 1) == 0) if (before_changecount == after_changecount &&
(before_changecount & 1) == 0)
break; break;
/* Make sure we can break out of loop if stuck... */ /* Make sure we can break out of loop if stuck... */
...@@ -2927,12 +2925,17 @@ pgstat_get_backend_current_activity(int pid, bool checkUser) ...@@ -2927,12 +2925,17 @@ pgstat_get_backend_current_activity(int pid, bool checkUser)
for (;;) for (;;)
{ {
int save_changecount = vbeentry->st_changecount; int before_changecount;
int after_changecount;
pgstat_save_changecount_before(vbeentry, before_changecount);
found = (vbeentry->st_procpid == pid); found = (vbeentry->st_procpid == pid);
if (save_changecount == vbeentry->st_changecount && pgstat_save_changecount_after(vbeentry, after_changecount);
(save_changecount & 1) == 0)
if (before_changecount == after_changecount &&
(before_changecount & 1) == 0)
break; break;
/* Make sure we can break out of loop if stuck... */ /* Make sure we can break out of loop if stuck... */
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "libpq/pqcomm.h" #include "libpq/pqcomm.h"
#include "portability/instr_time.h" #include "portability/instr_time.h"
#include "postmaster/pgarch.h" #include "postmaster/pgarch.h"
#include "storage/barrier.h"
#include "utils/hsearch.h" #include "utils/hsearch.h"
#include "utils/relcache.h" #include "utils/relcache.h"
...@@ -714,6 +715,12 @@ typedef struct PgBackendStatus ...@@ -714,6 +715,12 @@ typedef struct PgBackendStatus
* st_changecount again. If the value hasn't changed, and if it's even, * st_changecount again. If the value hasn't changed, and if it's even,
* the copy is valid; otherwise start over. This makes updates cheap * the copy is valid; otherwise start over. This makes updates cheap
* while reads are potentially expensive, but that's the tradeoff we want. * while reads are potentially expensive, but that's the tradeoff we want.
*
* The above protocol needs the memory barriers to ensure that
* the apparent order of execution is as it desires. Otherwise,
* for example, the CPU might rearrange the code so that st_changecount
* is incremented twice before the modification on a machine with
* weak memory ordering. This surprising result can lead to bugs.
*/ */
int st_changecount; int st_changecount;
...@@ -745,6 +752,43 @@ typedef struct PgBackendStatus ...@@ -745,6 +752,43 @@ typedef struct PgBackendStatus
char *st_activity; char *st_activity;
} PgBackendStatus; } PgBackendStatus;
/*
* Macros to load and store st_changecount with the memory barriers.
*
* pgstat_increment_changecount_before() and
* pgstat_increment_changecount_after() need to be called before and after
* PgBackendStatus entries are modified, respectively. This makes sure that
* st_changecount is incremented around the modification.
*
* Also pgstat_save_changecount_before() and pgstat_save_changecount_after()
* need to be called before and after PgBackendStatus entries are copied into
* private memory, respectively.
*/
#define pgstat_increment_changecount_before(beentry) \
do { \
beentry->st_changecount++; \
pg_write_barrier(); \
} while (0)
#define pgstat_increment_changecount_after(beentry) \
do { \
pg_write_barrier(); \
beentry->st_changecount++; \
Assert((beentry->st_changecount & 1) == 0); \
} while (0)
#define pgstat_save_changecount_before(beentry, save_changecount) \
do { \
save_changecount = beentry->st_changecount; \
pg_read_barrier(); \
} while (0)
#define pgstat_save_changecount_after(beentry, save_changecount) \
do { \
pg_read_barrier(); \
save_changecount = beentry->st_changecount; \
} while (0)
/* ---------- /* ----------
* LocalPgBackendStatus * LocalPgBackendStatus
* *
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment