]> granicus.if.org Git - postgresql/commitdiff
Add memory barriers for PgBackendStatus.st_changecount protocol.
authorFujii Masao <fujii@postgresql.org>
Thu, 18 Dec 2014 14:07:51 +0000 (23:07 +0900)
committerFujii Masao <fujii@postgresql.org>
Thu, 18 Dec 2014 14:07:51 +0000 (23:07 +0900)
st_changecount protocol needs the memory barriers to ensure that
the apparent order of execution is as it desires. Otherwise,
for example, the CPU might rearrange the code so that st_changecount
is incremented twice before the modification on a machine with
weak memory ordering. This surprising result can lead to bugs.

This commit introduces the macros to load and store st_changecount
with the memory barriers. These are called before and after
PgBackendStatus entries are modified or copied into private memory,
in order to prevent CPU from reordering PgBackendStatus access.

Per discussion on pgsql-hackers, we decided not to back-patch this
to 9.4 or before until we get an actual bug report about this.

Patch by me. Review by Robert Haas.

src/backend/postmaster/pgstat.c
src/include/pgstat.h

index f71fdeb1422337657817e29c0ec7120d4ff77298..6672b8bae51688df7e740a6e8df6267fc9f8f13d 100644 (file)
@@ -2563,7 +2563,7 @@ pgstat_bestart(void)
        beentry = MyBEEntry;
        do
        {
-               beentry->st_changecount++;
+               pgstat_increment_changecount_before(beentry);
        } while ((beentry->st_changecount & 1) == 0);
 
        beentry->st_procpid = MyProcPid;
@@ -2588,8 +2588,7 @@ pgstat_bestart(void)
        beentry->st_appname[NAMEDATALEN - 1] = '\0';
        beentry->st_activity[pgstat_track_activity_query_size - 1] = '\0';
 
-       beentry->st_changecount++;
-       Assert((beentry->st_changecount & 1) == 0);
+       pgstat_increment_changecount_after(beentry);
 
        /* Update app name to current GUC setting */
        if (application_name)
@@ -2624,12 +2623,11 @@ pgstat_beshutdown_hook(int code, Datum arg)
         * before and after.  We use a volatile pointer here to ensure the
         * compiler doesn't try to get cute.
         */
-       beentry->st_changecount++;
+       pgstat_increment_changecount_before(beentry);
 
        beentry->st_procpid = 0;        /* mark invalid */
 
-       beentry->st_changecount++;
-       Assert((beentry->st_changecount & 1) == 0);
+       pgstat_increment_changecount_after(beentry);
 }
 
 
@@ -2666,7 +2664,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
                         * non-disabled state.  As our final update, change the state and
                         * clear fields we will not be updating anymore.
                         */
-                       beentry->st_changecount++;
+                       pgstat_increment_changecount_before(beentry);
                        beentry->st_state = STATE_DISABLED;
                        beentry->st_state_start_timestamp = 0;
                        beentry->st_activity[0] = '\0';
@@ -2674,8 +2672,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
                        /* st_xact_start_timestamp and st_waiting are also disabled */
                        beentry->st_xact_start_timestamp = 0;
                        beentry->st_waiting = false;
-                       beentry->st_changecount++;
-                       Assert((beentry->st_changecount & 1) == 0);
+                       pgstat_increment_changecount_after(beentry);
                }
                return;
        }
@@ -2695,7 +2692,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
        /*
         * Now update the status entry
         */
-       beentry->st_changecount++;
+       pgstat_increment_changecount_before(beentry);
 
        beentry->st_state = state;
        beentry->st_state_start_timestamp = current_timestamp;
@@ -2707,8 +2704,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
                beentry->st_activity_start_timestamp = start_timestamp;
        }
 
-       beentry->st_changecount++;
-       Assert((beentry->st_changecount & 1) == 0);
+       pgstat_increment_changecount_after(beentry);
 }
 
 /* ----------
@@ -2734,13 +2730,12 @@ pgstat_report_appname(const char *appname)
         * st_changecount before and after.  We use a volatile pointer here to
         * ensure the compiler doesn't try to get cute.
         */
-       beentry->st_changecount++;
+       pgstat_increment_changecount_before(beentry);
 
        memcpy((char *) beentry->st_appname, appname, len);
        beentry->st_appname[len] = '\0';
 
-       beentry->st_changecount++;
-       Assert((beentry->st_changecount & 1) == 0);
+       pgstat_increment_changecount_after(beentry);
 }
 
 /*
@@ -2760,10 +2755,9 @@ pgstat_report_xact_timestamp(TimestampTz tstamp)
         * st_changecount before and after.  We use a volatile pointer here to
         * ensure the compiler doesn't try to get cute.
         */
-       beentry->st_changecount++;
+       pgstat_increment_changecount_before(beentry);
        beentry->st_xact_start_timestamp = tstamp;
-       beentry->st_changecount++;
-       Assert((beentry->st_changecount & 1) == 0);
+       pgstat_increment_changecount_after(beentry);
 }
 
 /* ----------
@@ -2839,7 +2833,10 @@ pgstat_read_current_status(void)
                 */
                for (;;)
                {
-                       int                     save_changecount = beentry->st_changecount;
+                       int                     before_changecount;
+                       int                     after_changecount;
+
+                       pgstat_save_changecount_before(beentry, before_changecount);
 
                        localentry->backendStatus.st_procpid = beentry->st_procpid;
                        if (localentry->backendStatus.st_procpid > 0)
@@ -2856,8 +2853,9 @@ pgstat_read_current_status(void)
                                localentry->backendStatus.st_activity = localactivity;
                        }
 
-                       if (save_changecount == beentry->st_changecount &&
-                               (save_changecount & 1) == 0)
+                       pgstat_save_changecount_after(beentry, after_changecount);
+                       if (before_changecount == after_changecount &&
+                               (before_changecount & 1) == 0)
                                break;
 
                        /* Make sure we can break out of loop if stuck... */
@@ -2927,12 +2925,17 @@ pgstat_get_backend_current_activity(int pid, bool checkUser)
 
                for (;;)
                {
-                       int                     save_changecount = vbeentry->st_changecount;
+                       int                     before_changecount;
+                       int                     after_changecount;
+
+                       pgstat_save_changecount_before(vbeentry, before_changecount);
 
                        found = (vbeentry->st_procpid == pid);
 
-                       if (save_changecount == vbeentry->st_changecount &&
-                               (save_changecount & 1) == 0)
+                       pgstat_save_changecount_after(vbeentry, after_changecount);
+
+                       if (before_changecount == after_changecount &&
+                               (before_changecount & 1) == 0)
                                break;
 
                        /* Make sure we can break out of loop if stuck... */
index 08925336d181c16759fedd4af939564c9d109155..7285f3ee167202dc067e73f88eeafcf93af1672a 100644 (file)
@@ -16,6 +16,7 @@
 #include "libpq/pqcomm.h"
 #include "portability/instr_time.h"
 #include "postmaster/pgarch.h"
+#include "storage/barrier.h"
 #include "utils/hsearch.h"
 #include "utils/relcache.h"
 
@@ -714,6 +715,12 @@ typedef struct PgBackendStatus
         * st_changecount again.  If the value hasn't changed, and if it's even,
         * the copy is valid; otherwise start over.  This makes updates cheap
         * while reads are potentially expensive, but that's the tradeoff we want.
+        *
+        * The above protocol needs the memory barriers to ensure that
+        * the apparent order of execution is as it desires. Otherwise,
+        * for example, the CPU might rearrange the code so that st_changecount
+        * is incremented twice before the modification on a machine with
+        * weak memory ordering. This surprising result can lead to bugs.
         */
        int                     st_changecount;
 
@@ -745,6 +752,43 @@ typedef struct PgBackendStatus
        char       *st_activity;
 } PgBackendStatus;
 
+/*
+ * Macros to load and store st_changecount with the memory barriers.
+ *
+ * pgstat_increment_changecount_before() and
+ * pgstat_increment_changecount_after() need to be called before and after
+ * PgBackendStatus entries are modified, respectively. This makes sure that
+ * st_changecount is incremented around the modification.
+ *
+ * Also pgstat_save_changecount_before() and pgstat_save_changecount_after()
+ * need to be called before and after PgBackendStatus entries are copied into
+ * private memory, respectively.
+ */
+#define pgstat_increment_changecount_before(beentry)   \
+       do {    \
+               beentry->st_changecount++;      \
+               pg_write_barrier();     \
+       } while (0)
+
+#define pgstat_increment_changecount_after(beentry)    \
+       do {    \
+               pg_write_barrier();     \
+               beentry->st_changecount++;      \
+               Assert((beentry->st_changecount & 1) == 0);     \
+       } while (0)
+
+#define pgstat_save_changecount_before(beentry, save_changecount)      \
+       do {    \
+               save_changecount = beentry->st_changecount;     \
+               pg_read_barrier();      \
+       } while (0)
+
+#define pgstat_save_changecount_after(beentry, save_changecount)       \
+       do {    \
+               pg_read_barrier();      \
+               save_changecount = beentry->st_changecount;     \
+       } while (0)
+
 /* ----------
  * LocalPgBackendStatus
  *