]> granicus.if.org Git - postgresql/commitdiff
Background worker processes
authorAlvaro Herrera <alvherre@alvh.no-ip.org>
Thu, 6 Dec 2012 17:57:52 +0000 (14:57 -0300)
committerAlvaro Herrera <alvherre@alvh.no-ip.org>
Thu, 6 Dec 2012 20:47:30 +0000 (17:47 -0300)
Background workers are postmaster subprocesses that run arbitrary
user-specified code.  They can request shared memory access as well as
backend database connections; or they can just use plain libpq frontend
database connections.

Modules listed in shared_preload_libraries can register background
workers in their _PG_init() function; this is early enough that it's not
necessary to provide an extra GUC option, because the necessary extra
resources can be allocated early on.  Modules can install more than one
bgworker, if necessary.

Care is taken that these extra processes do not interfere with other
postmaster tasks: only one such process is started on each ServerLoop
iteration.  This means a large number of them could be waiting to be
started up and postmaster is still able to quickly service external
connection requests.  Also, shutdown sequence should not be impacted by
a worker process that's reasonably well behaved (i.e. promptly responds
to termination signals.)

The current implementation lets worker processes specify their start
time, i.e. at what point in the server startup process they are to be
started: right after postmaster start (in which case they mustn't ask
for shared memory access), when consistent state has been reached
(useful during recovery in a HOT standby server), or when recovery has
terminated (i.e. when normal backends are allowed).

In case of a bgworker crash, actions to take depend on registration
data: if shared memory was requested, then all other connections are
taken down (as well as other bgworkers), just like it were a regular
backend crashing.  The bgworker itself is restarted, too, within a
configurable timeframe (which can be configured to be never).

More features to add to this framework can be imagined without much
effort, and have been discussed, but this seems good enough as a useful
unit already.

An elementary sample module is supplied.

Author: Álvaro Herrera

This patch is loosely based on prior patches submitted by KaiGai Kohei,
and unsubmitted code by Simon Riggs.

Reviewed by: KaiGai Kohei, Markus Wanner, Andres Freund,
Heikki Linnakangas, Simon Riggs, Amit Kapila

16 files changed:
contrib/Makefile
contrib/worker_spi/Makefile [new file with mode: 0644]
contrib/worker_spi/worker_spi.c [new file with mode: 0644]
doc/src/sgml/bgworker.sgml [new file with mode: 0644]
doc/src/sgml/filelist.sgml
doc/src/sgml/postgres.sgml
src/backend/postmaster/postmaster.c
src/backend/storage/lmgr/proc.c
src/backend/utils/init/globals.c
src/backend/utils/init/miscinit.c
src/backend/utils/init/postinit.c
src/backend/utils/misc/guc.c
src/include/miscadmin.h
src/include/postmaster/bgworker.h [new file with mode: 0644]
src/include/postmaster/postmaster.h
src/include/storage/proc.h

index d230451a1297e793d39f542b4ee4a3454644ff24..fcd7c1e0330212d317f8793922ac3263ba14fac8 100644 (file)
@@ -50,7 +50,8 @@ SUBDIRS = \
                test_parser     \
                tsearch2        \
                unaccent        \
-               vacuumlo
+               vacuumlo        \
+               worker_spi
 
 ifeq ($(with_openssl),yes)
 SUBDIRS += sslinfo
diff --git a/contrib/worker_spi/Makefile b/contrib/worker_spi/Makefile
new file mode 100644 (file)
index 0000000..edf4105
--- /dev/null
@@ -0,0 +1,14 @@
+# contrib/worker_spi/Makefile
+
+MODULES = worker_spi
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/worker_spi
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/worker_spi/worker_spi.c b/contrib/worker_spi/worker_spi.c
new file mode 100644 (file)
index 0000000..6da747b
--- /dev/null
@@ -0,0 +1,263 @@
+/* -------------------------------------------------------------------------
+ *
+ * worker_spi.c
+ *             Sample background worker code that demonstrates usage of a database
+ *             connection.
+ *
+ * This code connects to a database, create a schema and table, and summarizes
+ * the numbers contained therein.  To see it working, insert an initial value
+ * with "total" type and some initial value; then insert some other rows with
+ * "delta" type.  Delta rows will be deleted by this worker and their values
+ * aggregated into the total.
+ *
+ * Copyright (C) 2012, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *             contrib/worker_spi/worker_spi.c
+ *
+ * -------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+/* These are always necessary for a bgworker */
+#include "miscadmin.h"
+#include "postmaster/bgworker.h"
+#include "storage/ipc.h"
+#include "storage/latch.h"
+#include "storage/lwlock.h"
+#include "storage/proc.h"
+#include "storage/shmem.h"
+
+/* these headers are used by this particular worker's code */
+#include "access/xact.h"
+#include "executor/spi.h"
+#include "fmgr.h"
+#include "lib/stringinfo.h"
+#include "utils/builtins.h"
+#include "utils/snapmgr.h"
+
+PG_MODULE_MAGIC;
+
+void   _PG_init(void);
+
+static bool    got_sigterm = false;
+
+
+typedef struct worktable
+{
+       const char         *schema;
+       const char         *name;
+} worktable;
+
+static void
+worker_spi_sigterm(SIGNAL_ARGS)
+{
+       int                     save_errno = errno;
+
+       got_sigterm = true;
+       if (MyProc)
+               SetLatch(&MyProc->procLatch);
+
+       errno = save_errno;
+}
+
+static void
+worker_spi_sighup(SIGNAL_ARGS)
+{
+       elog(LOG, "got sighup!");
+       if (MyProc)
+               SetLatch(&MyProc->procLatch);
+}
+
+static void
+initialize_worker_spi(worktable *table)
+{
+       int             ret;
+       int             ntup;
+       bool    isnull;
+       StringInfoData  buf;
+
+       StartTransactionCommand();
+       SPI_connect();
+       PushActiveSnapshot(GetTransactionSnapshot());
+
+       initStringInfo(&buf);
+       appendStringInfo(&buf, "select count(*) from pg_namespace where nspname = '%s'",
+                                        table->schema);
+
+       ret = SPI_execute(buf.data, true, 0);
+       if (ret != SPI_OK_SELECT)
+               elog(FATAL, "SPI_execute failed: error code %d", ret);
+
+       if (SPI_processed != 1)
+               elog(FATAL, "not a singleton result");
+
+       ntup = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0],
+                                                                          SPI_tuptable->tupdesc,
+                                                                          1, &isnull));
+       if (isnull)
+               elog(FATAL, "null result");
+
+       if (ntup == 0)
+       {
+               resetStringInfo(&buf);
+               appendStringInfo(&buf,
+                                                "CREATE SCHEMA \"%s\" "
+                                                "CREATE TABLE \"%s\" ("
+                                                "              type text CHECK (type IN ('total', 'delta')), "
+                                                "              value   integer)"
+                                                "CREATE UNIQUE INDEX \"%s_unique_total\" ON \"%s\" (type) "
+                                                "WHERE type = 'total'",
+                                                table->schema, table->name, table->name, table->name);
+
+               ret = SPI_execute(buf.data, false, 0);
+
+               if (ret != SPI_OK_UTILITY)
+                       elog(FATAL, "failed to create my schema");
+       }
+
+       SPI_finish();
+       PopActiveSnapshot();
+       CommitTransactionCommand();
+}
+
+static void
+worker_spi_main(void *main_arg)
+{
+       worktable          *table = (worktable *) main_arg;
+       StringInfoData  buf;
+
+       /* We're now ready to receive signals */
+       BackgroundWorkerUnblockSignals();
+
+       /* Connect to our database */
+       BackgroundWorkerInitializeConnection("postgres", NULL);
+
+       elog(LOG, "%s initialized with %s.%s",
+                MyBgworkerEntry->bgw_name, table->schema, table->name);
+       initialize_worker_spi(table);
+
+       /*
+        * Quote identifiers passed to us.  Note that this must be done after
+        * initialize_worker_spi, because that routine assumes the names are not
+        * quoted.
+        *
+        * Note some memory might be leaked here.
+        */
+       table->schema = quote_identifier(table->schema);
+       table->name = quote_identifier(table->name);
+
+       initStringInfo(&buf);
+       appendStringInfo(&buf,
+                                        "WITH deleted AS (DELETE "
+                                        "FROM %s.%s "
+                                        "WHERE type = 'delta' RETURNING value), "
+                                        "total AS (SELECT coalesce(sum(value), 0) as sum "
+                                        "FROM deleted) "
+                                        "UPDATE %s.%s "
+                                        "SET value = %s.value + total.sum "
+                                        "FROM total WHERE type = 'total' "
+                                        "RETURNING %s.value",
+                                        table->schema, table->name,
+                                        table->schema, table->name,
+                                        table->name,
+                                        table->name);
+
+       while (!got_sigterm)
+       {
+               int             ret;
+               int             rc;
+
+               /*
+                * Background workers mustn't call usleep() or any direct equivalent:
+                * instead, they may wait on their process latch, which sleeps as
+                * necessary, but is awakened if postmaster dies.  That way the
+                * background process goes away immediately in an emergency.
+                */
+               rc = WaitLatch(&MyProc->procLatch,
+                                          WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+                                          1000L);
+               ResetLatch(&MyProc->procLatch);
+
+               /* emergency bailout if postmaster has died */
+               if (rc & WL_POSTMASTER_DEATH)
+                       proc_exit(1);
+
+               StartTransactionCommand();
+               SPI_connect();
+               PushActiveSnapshot(GetTransactionSnapshot());
+
+               ret = SPI_execute(buf.data, false, 0);
+
+               if (ret != SPI_OK_UPDATE_RETURNING)
+                       elog(FATAL, "cannot select from table %s.%s: error code %d",
+                                table->schema, table->name, ret);
+
+               if (SPI_processed > 0)
+               {
+                       bool    isnull;
+                       int32   val;
+
+                       val = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0],
+                                                                                          SPI_tuptable->tupdesc,
+                                                                                          1, &isnull));
+                       if (!isnull)
+                               elog(LOG, "%s: count in %s.%s is now %d",
+                                        MyBgworkerEntry->bgw_name,
+                                        table->schema, table->name, val);
+               }
+
+               SPI_finish();
+               PopActiveSnapshot();
+               CommitTransactionCommand();
+       }
+
+       proc_exit(0);
+}
+
+/*
+ * Entrypoint of this module.
+ *
+ * We register two worker processes here, to demonstrate how that can be done.
+ */
+void
+_PG_init(void)
+{
+       BackgroundWorker        worker;
+       worktable                  *table;
+
+       /* register the worker processes.  These values are common for both */
+       worker.bgw_flags = BGWORKER_SHMEM_ACCESS |
+               BGWORKER_BACKEND_DATABASE_CONNECTION;
+       worker.bgw_start_time = BgWorkerStart_RecoveryFinished;
+       worker.bgw_main = worker_spi_main;
+       worker.bgw_sighup = worker_spi_sighup;
+       worker.bgw_sigterm = worker_spi_sigterm;
+
+       /*
+        * These values are used for the first worker.
+        *
+        * Note these are palloc'd.  The reason this works after starting a new
+        * worker process is that if we only fork, they point to valid allocated
+        * memory in the child process; and if we fork and then exec, the exec'd
+        * process will run this code again, and so the memory is also valid there.
+        */
+       table = palloc(sizeof(worktable));
+       table->schema = pstrdup("schema1");
+       table->name = pstrdup("counted");
+
+       worker.bgw_name = "SPI worker 1";
+       worker.bgw_restart_time = BGW_NEVER_RESTART;
+       worker.bgw_main_arg = (void *) table;
+       RegisterBackgroundWorker(&worker);
+
+       /* Values for the second worker */
+       table = palloc(sizeof(worktable));
+       table->schema = pstrdup("our schema2");
+       table->name = pstrdup("counted rows");
+
+       worker.bgw_name = "SPI worker 2";
+       worker.bgw_restart_time = 2;
+       worker.bgw_main_arg = (void *) table;
+       RegisterBackgroundWorker(&worker);
+}
diff --git a/doc/src/sgml/bgworker.sgml b/doc/src/sgml/bgworker.sgml
new file mode 100644 (file)
index 0000000..912c7de
--- /dev/null
@@ -0,0 +1,146 @@
+<!-- doc/src/sgml/bgworker.sgml -->
+
+<chapter id="bgworker">
+ <title>Background Worker Processes</title>
+
+ <indexterm zone="bgworker">
+  <primary>Background workers</primary>
+ </indexterm>
+
+ <para>
+  PostgreSQL can be extended to run user-supplied code in separate processes.
+  Such processes are started, stopped and monitored by <command>postgres</command>,
+  which permits them to have a lifetime closely linked to the server's status.
+  These processes have the option to attach to <productname>PostgreSQL</>'s
+  shared memory area and to connect to databases internally; they can also run
+  multiple transactions serially, just like a regular client-connected server
+  process.  Also, by linking to <application>libpq</> they can connect to the
+  server and behave like a regular client application.
+ </para>
+
+ <warning>
+  <para>
+   There are considerable robustness and security risks in using background
+   worker processes because, being written in the <literal>C</> language,
+   they have unrestricted access to data.  Administrators wishing to enable
+   modules that include background worker process should exercise extreme
+   caution.  Only carefully audited modules should be permitted to run
+   background worker processes.
+  </para>
+ </warning>
+
+ <para>
+  Only modules listed in <varname>shared_preload_libraries</> can run
+  background workers.  A module wishing to run a background worker needs
+  to register it by calling
+  <function>RegisterBackgroundWorker(<type>BackgroundWorker *worker</type>)</function>
+  from its <function>_PG_init()</>.
+  The structure <structname>BackgroundWorker</structname> is defined thus:
+<programlisting>
+typedef void (*bgworker_main_type)(void *main_arg);
+typedef void (*bgworker_sighdlr_type)(SIGNAL_ARGS);
+typedef struct BackgroundWorker
+{
+    char       *bgw_name;
+    int         bgw_flags;
+    BgWorkerStartTime bgw_start_time;
+    int         bgw_restart_time;       /* in seconds, or BGW_NEVER_RESTART */
+    bgworker_main_type  bgw_main;
+    void       *bgw_main_arg;
+    bgworker_sighdlr_type bgw_sighup;
+    bgworker_sighdlr_type bgw_sigterm;
+} BackgroundWorker;
+</programlisting>
+  </para>
+
+  <para>
+   <structfield>bgw_name</> is a string to be used in log messages, process
+   listings and similar contexts.
+  </para>
+
+  <para>
+   <structfield>bgw_flags</> is a bitwise-or'd bitmask indicating the
+   capabilities that the module wants.  Possible values are
+   <literal>BGWORKER_SHMEM_ACCESS</literal> (requesting shared memory access)
+   and <literal>BGWORKER_BACKEND_DATABASE_CONNECTION</literal> (requesting the
+   ability to establish a database connection, through which it can later run
+   transactions and queries).
+  </para>
+
+  <para>
+   <structfield>bgw_start_time</structfield> is the server state during which
+   <command>postgres</> should start the process; it can be one of
+   <literal>BgWorkerStart_PostmasterStart</> (start as soon as
+   <command>postgres</> itself has finished its own initialization; processes
+   requesting this are not eligible for database connections),
+   <literal>BgWorkerStart_ConsistentState</> (start as soon as a consistent state
+   has been reached in a HOT standby, allowing processes to connect to
+   databases and run read-only queries), and
+   <literal>BgWorkerStart_RecoveryFinished</> (start as soon as the system has
+   entered normal read-write state).  Note the last two values are equivalent
+   in a server that's not a HOT standby.  Note that this setting only indicates
+   when the processes are to be started; they do not stop when a different state
+   is reached.
+  </para>
+
+  <para>
+   <structfield>bgw_restart_time</structfield> is the interval, in seconds, that
+   <command>postgres</command> should wait before restarting the process, in
+   case it crashes.  It can be any positive value,
+   or <literal>BGW_NEVER_RESTART</literal>, indicating not to restart the
+   process in case of a crash.
+  </para>
+
+  <para>
+   <structfield>bgw_main</structfield> is a pointer to the function to run when
+   the process is started.  This function must take a single argument of type
+   <type>void *</> and return <type>void</>.
+   <structfield>bgw_main_arg</structfield> will be passed to it as its only
+   argument.  Note that the global variable <literal>MyBgworkerEntry</literal>
+   points to a copy of the <structname>BackgroundWorker</structname> structure
+   passed at registration time.
+  </para>
+
+  <para>
+   <structfield>bgw_sighup</structfield> and <structfield>bgw_sigterm</> are
+   pointers to functions that will be installed as signal handlers for the new
+   process.  If <structfield>bgw_sighup</> is NULL, then <literal>SIG_IGN</>
+   is used; if <structfield>bgw_sigterm</> is NULL, a handler is installed that
+   will terminate the process after logging a suitable message.
+  </para>
+
+  <para>Once running, the process can connect to a database by calling
+   <function>BackgroundWorkerInitializeConnection(<parameter>char *dbname</parameter>, <parameter>char *username</parameter>)</function>.
+   This allows the process to run transactions and queries using the
+   <literal>SPI</literal> interface.  If <varname>dbname</> is NULL,
+   the session is not connected to any particular database, but shared catalogs
+   can be accessed.  If <varname>username</> is NULL, the process will run as
+   the superuser created during <command>initdb</>.
+   BackgroundWorkerInitializeConnection can only be called once per background
+   process, it is not possible to switch databases.
+  </para>
+
+  <para>
+   Signals are initially blocked when control reaches the
+   <structfield>bgw_main</> function, and must be unblocked by it; this is to
+   allow the process to further customize its signal handlers, if necessary.
+   Signals can be unblocked in the new process by calling
+   <function>BackgroundWorkerUnblockSignals</> and blocked by calling
+   <function>BackgroundWorkerBlockSignals</>.
+  </para>
+
+  <para>
+   Background workers are expected to be continuously running; if they exit
+   cleanly, <command>postgres</> will restart them immediately.  Consider doing
+   interruptible sleep when they have nothing to do; this can be achieved by
+   calling <function>WaitLatch()</function>.  Make sure the
+   <literal>WL_POSTMASTER_DEATH</> flag is set when calling that function, and
+   verify the return code for a prompt exit in the emergency case that
+   <command>postgres</> itself has terminated.
+  </para>
+
+  <para>
+   The <filename>worker_spi</> contrib module contains a working example,
+   which demonstrates some useful techniques.
+  </para>
+</chapter>
index db4cc3a3fbad35de5ccda5b28f1a997eca87cc51..368f9321c88138517bc30c9f2192bc0812d5d5bd 100644 (file)
@@ -50,6 +50,7 @@
 <!ENTITY wal           SYSTEM "wal.sgml">
 
 <!-- programmer's guide -->
+<!ENTITY bgworker   SYSTEM "bgworker.sgml">
 <!ENTITY dfunc      SYSTEM "dfunc.sgml">
 <!ENTITY ecpg       SYSTEM "ecpg.sgml">
 <!ENTITY extend     SYSTEM "extend.sgml">
index 4ef1fc1a6e6725d3b1c9502fe91cabb7dcc886cd..15e4ef641e7fb81d4031a91b8d82cd02048333c6 100644 (file)
   &plpython;
 
   &spi;
+  &bgworker;
 
  </part>
 
index 6f93d93fa3f7577fb9157f0bea805c427e3605dd..a492c60b46a925a3c2cd980689623541fe3c1e8c 100644 (file)
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
+#include "postmaster/bgworker.h"
 #include "postmaster/fork_process.h"
 #include "postmaster/pgarch.h"
 #include "postmaster/postmaster.h"
 #endif
 
 
+/*
+ * Possible types of a backend. Beyond being the possible bkend_type values in
+ * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
+ * and CountChildren().
+ */
+#define BACKEND_TYPE_NORMAL            0x0001  /* normal backend */
+#define BACKEND_TYPE_AUTOVAC   0x0002  /* autovacuum worker process */
+#define BACKEND_TYPE_WALSND            0x0004  /* walsender process */
+#define BACKEND_TYPE_BGWORKER  0x0008  /* bgworker process */
+#define BACKEND_TYPE_ALL               0x000F  /* OR of all the above */
+
+#define BACKEND_TYPE_WORKER            (BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER)
+
 /*
  * List of active backends (or child processes anyway; we don't actually
  * know whether a given child has become a backend or is still in the
  * children we have and send them appropriate signals when necessary.
  *
  * "Special" children such as the startup, bgwriter and autovacuum launcher
- * tasks are not in this list. Autovacuum worker and walsender processes are
- * in it. Also, "dead_end" children are in it: these are children launched just
- * for the purpose of sending a friendly rejection message to a would-be
- * client.     We must track them because they are attached to shared memory,
- * but we know they will never become live backends.  dead_end children are
- * not assigned a PMChildSlot.
+ * tasks are not in this list. Autovacuum worker and walsender are in it.
+ * Also, "dead_end" children are in it: these are children launched just for
+ * the purpose of sending a friendly rejection message to a would-be client.
+ * We must track them because they are attached to shared memory, but we know
+ * they will never become live backends.  dead_end children are not assigned a
+ * PMChildSlot.
+ *
+ * Background workers that request shared memory access during registration are
+ * in this list, too.
  */
 typedef struct bkend
 {
        pid_t           pid;                    /* process id of backend */
        long            cancel_key;             /* cancel key for cancels for this backend */
        int                     child_slot;             /* PMChildSlot for this backend, if any */
-       bool            is_autovacuum;  /* is it an autovacuum process? */
+
+       /*
+        * Flavor of backend or auxiliary process.      Note that BACKEND_TYPE_WALSND
+        * backends initially announce themselves as BACKEND_TYPE_NORMAL, so if
+        * bkend_type is normal, you should check for a recent transition.
+        */
+       int                     bkend_type;
        bool            dead_end;               /* is it going to send an error and quit? */
        dlist_node      elem;                   /* list link in BackendList */
 } Backend;
@@ -155,6 +178,33 @@ static dlist_head BackendList = DLIST_STATIC_INIT(BackendList);
 static Backend *ShmemBackendArray;
 #endif
 
+
+/*
+ * List of background workers.
+ *
+ * A worker that requests a database connection during registration will have
+ * rw_backend set, and will be present in BackendList.  Note: do not rely on
+ * rw_backend being non-NULL for shmem-connected workers!
+ */
+typedef struct RegisteredBgWorker
+{
+       BackgroundWorker rw_worker; /* its registry entry */
+       Backend    *rw_backend;         /* its BackendList entry, or NULL */
+       pid_t           rw_pid;                 /* 0 if not running */
+       int                     rw_child_slot;
+       TimestampTz rw_crashed_at;      /* if not 0, time it last crashed */
+#ifdef EXEC_BACKEND
+       int                     rw_cookie;
+#endif
+       slist_node      rw_lnode;               /* list link */
+}      RegisteredBgWorker;
+
+static slist_head BackgroundWorkerList = SLIST_STATIC_INIT(BackgroundWorkerList);
+
+BackgroundWorker *MyBgworkerEntry = NULL;
+
+
+
 /* The socket number we are listening for connections on */
 int                    PostPortNumber;
 /* The directory names for Unix socket(s) */
@@ -306,6 +356,10 @@ static volatile sig_atomic_t start_autovac_launcher = false;
 /* the launcher needs to be signalled to communicate some condition */
 static volatile bool avlauncher_needs_signal = false;
 
+/* set when there's a worker that needs to be started up */
+static volatile bool StartWorkerNeeded = true;
+static volatile bool HaveCrashedWorker = false;
+
 /*
  * State for assigning random salts and cancel keys.
  * Also, the global MyCancelKey passes the cancel key assigned to a given
@@ -341,8 +395,11 @@ static void reaper(SIGNAL_ARGS);
 static void sigusr1_handler(SIGNAL_ARGS);
 static void startup_die(SIGNAL_ARGS);
 static void dummy_handler(SIGNAL_ARGS);
+static int     GetNumRegisteredBackgroundWorkers(int flags);
 static void StartupPacketTimeoutHandler(void);
 static void CleanupBackend(int pid, int exitstatus);
+static bool CleanupBackgroundWorker(int pid, int exitstatus);
+static void do_start_bgworker(void);
 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
 static void LogChildExit(int lev, const char *procname,
                         int pid, int exitstatus);
@@ -361,19 +418,13 @@ static long PostmasterRandom(void);
 static void RandomSalt(char *md5Salt);
 static void signal_child(pid_t pid, int signal);
 static bool SignalSomeChildren(int signal, int targets);
+static bool SignalUnconnectedWorkers(int signal);
 
 #define SignalChildren(sig)                       SignalSomeChildren(sig, BACKEND_TYPE_ALL)
 
-/*
- * Possible types of a backend. These are OR-able request flag bits
- * for SignalSomeChildren() and CountChildren().
- */
-#define BACKEND_TYPE_NORMAL            0x0001  /* normal backend */
-#define BACKEND_TYPE_AUTOVAC   0x0002  /* autovacuum worker process */
-#define BACKEND_TYPE_WALSND            0x0004  /* walsender process */
-#define BACKEND_TYPE_ALL               0x0007  /* OR of all the above */
-
 static int     CountChildren(int target);
+static int     CountUnconnectedWorkers(void);
+static void StartOneBackgroundWorker(void);
 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
 static pid_t StartChildProcess(AuxProcType type);
 static void StartAutovacuumWorker(void);
@@ -473,6 +524,8 @@ static bool save_backend_variables(BackendParameters *param, Port *port,
 
 static void ShmemBackendArrayAdd(Backend *bn);
 static void ShmemBackendArrayRemove(Backend *bn);
+
+static BackgroundWorker *find_bgworker_entry(int cookie);
 #endif   /* EXEC_BACKEND */
 
 #define StartupDataBase()              StartChildProcess(StartupProcess)
@@ -843,6 +896,17 @@ PostmasterMain(int argc, char *argv[])
         */
        process_shared_preload_libraries();
 
+       /*
+        * If loadable modules have added background workers, MaxBackends needs to
+        * be updated.  Do so now by forcing a no-op update of max_connections.
+        * XXX This is a pretty ugly way to do it, but it doesn't seem worth
+        * introducing a new entry point in guc.c to do it in a cleaner fashion.
+        */
+       if (GetNumShmemAttachedBgworkers() > 0)
+               SetConfigOption("max_connections",
+                                               GetConfigOption("max_connections", false, false),
+                                               PGC_POSTMASTER, PGC_S_OVERRIDE);
+
        /*
         * Establish input sockets.
         */
@@ -1087,7 +1151,8 @@ PostmasterMain(int argc, char *argv[])
         * handling setup of child processes.  See tcop/postgres.c,
         * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
         * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
-        * postmaster/syslogger.c and postmaster/checkpointer.c.
+        * postmaster/syslogger.c, postmaster/bgworker.c and
+        * postmaster/checkpointer.c.
         */
        pqinitmask();
        PG_SETMASK(&BlockSig);
@@ -1177,6 +1242,9 @@ PostmasterMain(int argc, char *argv[])
        Assert(StartupPID != 0);
        pmState = PM_STARTUP;
 
+       /* Some workers may be scheduled to start now */
+       StartOneBackgroundWorker();
+
        status = ServerLoop();
 
        /*
@@ -1341,6 +1409,90 @@ checkDataDir(void)
        FreeFile(fp);
 }
 
+/*
+ * Determine how long should we let ServerLoop sleep.
+ *
+ * In normal conditions we wait at most one minute, to ensure that the other
+ * background tasks handled by ServerLoop get done even when no requests are
+ * arriving.  However, if there are background workers waiting to be started,
+ * we don't actually sleep so that they are quickly serviced.
+ */
+static void
+DetermineSleepTime(struct timeval *timeout)
+{
+       TimestampTz next_wakeup = 0;
+
+       /*
+        * Normal case: either there are no background workers at all, or we're in
+        * a shutdown sequence (during which we ignore bgworkers altogether).
+        */
+       if (Shutdown > NoShutdown ||
+               (!StartWorkerNeeded && !HaveCrashedWorker))
+       {
+               timeout->tv_sec = 60;
+               timeout->tv_usec = 0;
+               return;
+       }
+
+       if (StartWorkerNeeded)
+       {
+               timeout->tv_sec = 0;
+               timeout->tv_usec = 0;
+               return;
+       }
+
+       if (HaveCrashedWorker)
+       {
+               slist_iter      siter;
+
+               /*
+                * When there are crashed bgworkers, we sleep just long enough that
+                * they are restarted when they request to be.  Scan the list to
+                * determine the minimum of all wakeup times according to most recent
+                * crash time and requested restart interval.
+                */
+               slist_foreach(siter, &BackgroundWorkerList)
+               {
+                       RegisteredBgWorker *rw;
+                       TimestampTz this_wakeup;
+
+                       rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
+
+                       if (rw->rw_crashed_at == 0)
+                               continue;
+
+                       if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
+                               continue;
+
+                       this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
+                                                                        1000L * rw->rw_worker.bgw_restart_time);
+                       if (next_wakeup == 0 || this_wakeup < next_wakeup)
+                               next_wakeup = this_wakeup;
+               }
+       }
+
+       if (next_wakeup != 0)
+       {
+               int                     microsecs;
+
+               TimestampDifference(GetCurrentTimestamp(), next_wakeup,
+                                                       &timeout->tv_sec, &microsecs);
+               timeout->tv_usec = microsecs;
+
+               /* Ensure we don't exceed one minute */
+               if (timeout->tv_sec > 60)
+               {
+                       timeout->tv_sec = 60;
+                       timeout->tv_usec = 0;
+               }
+       }
+       else
+       {
+               timeout->tv_sec = 60;
+               timeout->tv_usec = 0;
+       }
+}
+
 /*
  * Main idle loop of postmaster
  */
@@ -1364,9 +1516,6 @@ ServerLoop(void)
                /*
                 * Wait for a connection request to arrive.
                 *
-                * We wait at most one minute, to ensure that the other background
-                * tasks handled below get done even when no requests are arriving.
-                *
                 * If we are in PM_WAIT_DEAD_END state, then we don't want to accept
                 * any new connections, so we don't call select() at all; just sleep
                 * for a little bit with signals unblocked.
@@ -1385,8 +1534,7 @@ ServerLoop(void)
                        /* must set timeout each time; some OSes change it! */
                        struct timeval timeout;
 
-                       timeout.tv_sec = 60;
-                       timeout.tv_usec = 0;
+                       DetermineSleepTime(&timeout);
 
                        selres = select(nSockets, &rmask, NULL, NULL, &timeout);
                }
@@ -1498,6 +1646,10 @@ ServerLoop(void)
                                kill(AutoVacPID, SIGUSR2);
                }
 
+               /* Get other worker processes running, if needed */
+               if (StartWorkerNeeded || HaveCrashedWorker)
+                       StartOneBackgroundWorker();
+
                /*
                 * Touch Unix socket and lock files every 58 minutes, to ensure that
                 * they are not removed by overzealous /tmp-cleaning tasks.  We assume
@@ -1513,7 +1665,6 @@ ServerLoop(void)
        }
 }
 
-
 /*
  * Initialise the masks for select() for the ports we are listening on.
  * Return the number of sockets to listen on.
@@ -1867,7 +2018,7 @@ processCancelRequest(Port *port, void *pkt)
        Backend    *bp;
 
 #ifndef EXEC_BACKEND
-       dlist_iter  iter;
+       dlist_iter      iter;
 #else
        int                     i;
 #endif
@@ -2205,8 +2356,11 @@ pmdie(SIGNAL_ARGS)
                        if (pmState == PM_RUN || pmState == PM_RECOVERY ||
                                pmState == PM_HOT_STANDBY || pmState == PM_STARTUP)
                        {
-                               /* autovacuum workers are told to shut down immediately */
-                               SignalSomeChildren(SIGTERM, BACKEND_TYPE_AUTOVAC);
+                               /* autovac workers are told to shut down immediately */
+                               /* and bgworkers too; does this need tweaking? */
+                               SignalSomeChildren(SIGTERM,
+                                                          BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER);
+                               SignalUnconnectedWorkers(SIGTERM);
                                /* and the autovac launcher too */
                                if (AutoVacPID != 0)
                                        signal_child(AutoVacPID, SIGTERM);
@@ -2258,12 +2412,14 @@ pmdie(SIGNAL_ARGS)
                                signal_child(BgWriterPID, SIGTERM);
                        if (WalReceiverPID != 0)
                                signal_child(WalReceiverPID, SIGTERM);
+                       SignalUnconnectedWorkers(SIGTERM);
                        if (pmState == PM_RECOVERY)
                        {
                                /*
-                                * Only startup, bgwriter, walreceiver, and/or checkpointer
-                                * should be active in this state; we just signaled the first
-                                * three, and we don't want to kill checkpointer yet.
+                                * Only startup, bgwriter, walreceiver, unconnected bgworkers,
+                                * and/or checkpointer should be active in this state; we just
+                                * signaled the first four, and we don't want to kill
+                                * checkpointer yet.
                                 */
                                pmState = PM_WAIT_BACKENDS;
                        }
@@ -2275,9 +2431,10 @@ pmdie(SIGNAL_ARGS)
                        {
                                ereport(LOG,
                                                (errmsg("aborting any active transactions")));
-                               /* shut down all backends and autovac workers */
+                               /* shut down all backends and workers */
                                SignalSomeChildren(SIGTERM,
-                                                                BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC);
+                                                                BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC |
+                                                                  BACKEND_TYPE_BGWORKER);
                                /* and the autovac launcher too */
                                if (AutoVacPID != 0)
                                        signal_child(AutoVacPID, SIGTERM);
@@ -2321,6 +2478,7 @@ pmdie(SIGNAL_ARGS)
                                signal_child(PgArchPID, SIGQUIT);
                        if (PgStatPID != 0)
                                signal_child(PgStatPID, SIGQUIT);
+                       SignalUnconnectedWorkers(SIGQUIT);
                        ExitPostmaster(0);
                        break;
        }
@@ -2449,6 +2607,9 @@ reaper(SIGNAL_ARGS)
                        if (PgStatPID == 0)
                                PgStatPID = pgstat_start();
 
+                       /* some workers may be scheduled to start now */
+                       StartOneBackgroundWorker();
+
                        /* at this point we are really open for business */
                        ereport(LOG,
                                 (errmsg("database system is ready to accept connections")));
@@ -2615,6 +2776,14 @@ reaper(SIGNAL_ARGS)
                        continue;
                }
 
+               /* Was it one of our background workers? */
+               if (CleanupBackgroundWorker(pid, exitstatus))
+               {
+                       /* have it be restarted */
+                       HaveCrashedWorker = true;
+                       continue;
+               }
+
                /*
                 * Else do standard backend child cleanup.
                 */
@@ -2633,11 +2802,100 @@ reaper(SIGNAL_ARGS)
        errno = save_errno;
 }
 
+/*
+ * Scan the bgworkers list and see if the given PID (which has just stopped
+ * or crashed) is in it.  Handle its shutdown if so, and return true.  If not a
+ * bgworker, return false.
+ *
+ * This is heavily based on CleanupBackend.  One important difference is that
+ * we don't know yet that the dying process is a bgworker, so we must be silent
+ * until we're sure it is.
+ */
+static bool
+CleanupBackgroundWorker(int pid,
+                                               int exitstatus) /* child's exit status */
+{
+       char            namebuf[MAXPGPATH];
+       slist_iter      iter;
+
+       slist_foreach(iter, &BackgroundWorkerList)
+       {
+               RegisteredBgWorker *rw;
+
+               rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+
+               if (rw->rw_pid != pid)
+                       continue;
+
+#ifdef WIN32
+               /* see CleanupBackend */
+               if (exitstatus == ERROR_WAIT_NO_CHILDREN)
+                       exitstatus = 0;
+#endif
+
+               snprintf(namebuf, MAXPGPATH, "%s: %s", _("worker process"),
+                                rw->rw_worker.bgw_name);
+
+               /* Delay restarting any bgworker that exits with a nonzero status. */
+               if (!EXIT_STATUS_0(exitstatus))
+                       rw->rw_crashed_at = GetCurrentTimestamp();
+               else
+                       rw->rw_crashed_at = 0;
+
+               /*
+                * Additionally, for shared-memory-connected workers, just like a
+                * backend, any exit status other than 0 or 1 is considered a crash
+                * and causes a system-wide restart.
+                */
+               if (rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS)
+               {
+                       if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
+                       {
+                               rw->rw_crashed_at = GetCurrentTimestamp();
+                               HandleChildCrash(pid, exitstatus, namebuf);
+                               return true;
+                       }
+               }
+
+               if (!ReleasePostmasterChildSlot(rw->rw_child_slot))
+               {
+                       /*
+                        * Uh-oh, the child failed to clean itself up.  Treat as a crash
+                        * after all.
+                        */
+                       rw->rw_crashed_at = GetCurrentTimestamp();
+                       HandleChildCrash(pid, exitstatus, namebuf);
+                       return true;
+               }
+
+               /* Get it out of the BackendList and clear out remaining data */
+               if (rw->rw_backend)
+               {
+                       Assert(rw->rw_worker.bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION);
+                       dlist_delete(&rw->rw_backend->elem);
+#ifdef EXEC_BACKEND
+                       ShmemBackendArrayRemove(rw->rw_backend);
+#endif
+                       free(rw->rw_backend);
+                       rw->rw_backend = NULL;
+               }
+               rw->rw_pid = 0;
+               rw->rw_child_slot = 0;
+
+               LogChildExit(LOG, namebuf, pid, exitstatus);
+
+               return true;
+       }
+
+       return false;
+}
 
 /*
  * CleanupBackend -- cleanup after terminated backend.
  *
  * Remove all local state associated with backend.
+ *
+ * If you change this, see also CleanupBackgroundWorker.
  */
 static void
 CleanupBackend(int pid,
@@ -2705,7 +2963,7 @@ CleanupBackend(int pid,
 
 /*
  * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
- * walwriter or autovacuum.
+ * walwriter, autovacuum, or background worker.
  *
  * The objectives here are to clean up our local state about the child
  * process, and to signal all other remaining children to quickdie.
@@ -2714,6 +2972,7 @@ static void
 HandleChildCrash(int pid, int exitstatus, const char *procname)
 {
        dlist_mutable_iter iter;
+       slist_iter      siter;
        Backend    *bp;
 
        /*
@@ -2727,6 +2986,56 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
                                (errmsg("terminating any other active server processes")));
        }
 
+       /* Process background workers. */
+       slist_foreach(siter, &BackgroundWorkerList)
+       {
+               RegisteredBgWorker *rw;
+
+               rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
+               if (rw->rw_pid == 0)
+                       continue;               /* not running */
+               if (rw->rw_pid == pid)
+               {
+                       /*
+                        * Found entry for freshly-dead worker, so remove it.
+                        */
+                       (void) ReleasePostmasterChildSlot(rw->rw_child_slot);
+                       if (rw->rw_backend)
+                       {
+                               dlist_delete(&rw->rw_backend->elem);
+#ifdef EXEC_BACKEND
+                               ShmemBackendArrayRemove(rw->rw_backend);
+#endif
+                               free(rw->rw_backend);
+                               rw->rw_backend = NULL;
+                       }
+                       rw->rw_pid = 0;
+                       rw->rw_child_slot = 0;
+                       /* don't reset crashed_at */
+                       /* Keep looping so we can signal remaining workers */
+               }
+               else
+               {
+                       /*
+                        * This worker is still alive.  Unless we did so already, tell it
+                        * to commit hara-kiri.
+                        *
+                        * SIGQUIT is the special signal that says exit without proc_exit
+                        * and let the user know what's going on. But if SendStop is set
+                        * (-s on command line), then we send SIGSTOP instead, so that we
+                        * can get core dumps from all backends by hand.
+                        */
+                       if (!FatalError)
+                       {
+                               ereport(DEBUG2,
+                                               (errmsg_internal("sending %s to process %d",
+                                                                                (SendStop ? "SIGSTOP" : "SIGQUIT"),
+                                                                                (int) rw->rw_pid)));
+                               signal_child(rw->rw_pid, (SendStop ? SIGSTOP : SIGQUIT));
+                       }
+               }
+       }
+
        /* Process regular backends */
        dlist_foreach_modify(iter, &BackendList)
        {
@@ -2761,7 +3070,13 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
                         *
                         * We could exclude dead_end children here, but at least in the
                         * SIGSTOP case it seems better to include them.
+                        *
+                        * Background workers were already processed above; ignore them
+                        * here.
                         */
+                       if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
+                               continue;
+
                        if (!FatalError)
                        {
                                ereport(DEBUG2,
@@ -3005,16 +3320,17 @@ PostmasterStateMachine(void)
        {
                /*
                 * PM_WAIT_BACKENDS state ends when we have no regular backends
-                * (including autovac workers) and no walwriter, autovac launcher or
-                * bgwriter.  If we are doing crash recovery then we expect the
-                * checkpointer to exit as well, otherwise not. The archiver, stats,
-                * and syslogger processes are disregarded since they are not
-                * connected to shared memory; we also disregard dead_end children
-                * here. Walsenders are also disregarded, they will be terminated
-                * later after writing the checkpoint record, like the archiver
-                * process.
+                * (including autovac workers), no bgworkers (including unconnected
+                * ones), and no walwriter, autovac launcher or bgwriter.  If we are
+                * doing crash recovery then we expect the checkpointer to exit as
+                * well, otherwise not. The archiver, stats, and syslogger processes
+                * are disregarded since they are not connected to shared memory; we
+                * also disregard dead_end children here. Walsenders are also
+                * disregarded, they will be terminated later after writing the
+                * checkpoint record, like the archiver process.
                 */
-               if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC) == 0 &&
+               if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_WORKER) == 0 &&
+                       CountUnconnectedWorkers() == 0 &&
                        StartupPID == 0 &&
                        WalReceiverPID == 0 &&
                        BgWriterPID == 0 &&
@@ -3226,6 +3542,39 @@ signal_child(pid_t pid, int signal)
 #endif
 }
 
+/*
+ * Send a signal to bgworkers that did not request backend connections
+ *
+ * The reason this is interesting is that workers that did request connections
+ * are considered by SignalChildren; this function complements that one.
+ */
+static bool
+SignalUnconnectedWorkers(int signal)
+{
+       slist_iter      iter;
+       bool            signaled = false;
+
+       slist_foreach(iter, &BackgroundWorkerList)
+       {
+               RegisteredBgWorker *rw;
+
+               rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+
+               if (rw->rw_pid == 0)
+                       continue;
+               /* ignore connected workers */
+               if (rw->rw_backend != NULL)
+                       continue;
+
+               ereport(DEBUG4,
+                               (errmsg_internal("sending signal %d to process %d",
+                                                                signal, (int) rw->rw_pid)));
+               signal_child(rw->rw_pid, signal);
+               signaled = true;
+       }
+       return signaled;
+}
+
 /*
  * Send a signal to the targeted children (but NOT special children;
  * dead_end children are never signaled, either).
@@ -3233,7 +3582,7 @@ signal_child(pid_t pid, int signal)
 static bool
 SignalSomeChildren(int signal, int target)
 {
-       dlist_iter  iter;
+       dlist_iter      iter;
        bool            signaled = false;
 
        dlist_foreach(iter, &BackendList)
@@ -3249,15 +3598,15 @@ SignalSomeChildren(int signal, int target)
                 */
                if (target != BACKEND_TYPE_ALL)
                {
-                       int                     child;
+                       /*
+                        * Assign bkend_type for any recently announced WAL Sender
+                        * processes.
+                        */
+                       if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
+                               IsPostmasterChildWalSender(bp->child_slot))
+                               bp->bkend_type = BACKEND_TYPE_WALSND;
 
-                       if (bp->is_autovacuum)
-                               child = BACKEND_TYPE_AUTOVAC;
-                       else if (IsPostmasterChildWalSender(bp->child_slot))
-                               child = BACKEND_TYPE_WALSND;
-                       else
-                               child = BACKEND_TYPE_NORMAL;
-                       if (!(target & child))
+                       if (!(target & bp->bkend_type))
                                continue;
                }
 
@@ -3375,7 +3724,7 @@ BackendStartup(Port *port)
         * of backends.
         */
        bn->pid = pid;
-       bn->is_autovacuum = false;
+       bn->bkend_type = BACKEND_TYPE_NORMAL;           /* Can change later to WALSND */
        dlist_push_head(&BackendList, &bn->elem);
 
 #ifdef EXEC_BACKEND
@@ -3744,7 +4093,10 @@ internal_forkexec(int argc, char *argv[], Port *port)
        fp = AllocateFile(tmpfilename, PG_BINARY_W);
        if (!fp)
        {
-               /* As in OpenTemporaryFile, try to make the temp-file directory */
+               /*
+                * As in OpenTemporaryFileInTablespace, try to make the temp-file
+                * directory
+                */
                mkdir(PG_TEMP_FILES_DIR, S_IRWXU);
 
                fp = AllocateFile(tmpfilename, PG_BINARY_W);
@@ -4078,7 +4430,8 @@ SubPostmasterMain(int argc, char *argv[])
        if (strcmp(argv[1], "--forkbackend") == 0 ||
                strcmp(argv[1], "--forkavlauncher") == 0 ||
                strcmp(argv[1], "--forkavworker") == 0 ||
-               strcmp(argv[1], "--forkboot") == 0)
+               strcmp(argv[1], "--forkboot") == 0 ||
+               strncmp(argv[1], "--forkbgworker=", 15) == 0)
                PGSharedMemoryReAttach();
 
        /* autovacuum needs this set before calling InitProcess */
@@ -4213,6 +4566,26 @@ SubPostmasterMain(int argc, char *argv[])
 
                AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
        }
+       if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
+       {
+               int                     cookie;
+
+               /* Close the postmaster's sockets */
+               ClosePostmasterPorts(false);
+
+               /* Restore basic shared memory pointers */
+               InitShmemAccess(UsedShmemSegAddr);
+
+               /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
+               InitProcess();
+
+               /* Attach process to shared data structures */
+               CreateSharedMemoryAndSemaphores(false, 0);
+
+               cookie = atoi(argv[1] + 15);
+               MyBgworkerEntry = find_bgworker_entry(cookie);
+               do_start_bgworker();
+       }
        if (strcmp(argv[1], "--forkarch") == 0)
        {
                /* Close the postmaster's sockets */
@@ -4312,6 +4685,9 @@ sigusr1_handler(SIGNAL_ARGS)
                (errmsg("database system is ready to accept read only connections")));
 
                pmState = PM_HOT_STANDBY;
+
+               /* Some workers may be scheduled to start now */
+               StartOneBackgroundWorker();
        }
 
        if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER) &&
@@ -4481,6 +4857,33 @@ PostmasterRandom(void)
        return random();
 }
 
+/*
+ * Count up number of worker processes that did not request backend connections
+ * See SignalUnconnectedWorkers for why this is interesting.
+ */
+static int
+CountUnconnectedWorkers(void)
+{
+       slist_iter      iter;
+       int                     cnt = 0;
+
+       slist_foreach(iter, &BackgroundWorkerList)
+       {
+               RegisteredBgWorker *rw;
+
+               rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+
+               if (rw->rw_pid == 0)
+                       continue;
+               /* ignore connected workers */
+               if (rw->rw_backend != NULL)
+                       continue;
+
+               cnt++;
+       }
+       return cnt;
+}
+
 /*
  * Count up number of child processes of specified types (dead_end chidren
  * are always excluded).
@@ -4488,7 +4891,7 @@ PostmasterRandom(void)
 static int
 CountChildren(int target)
 {
-       dlist_iter  iter;
+       dlist_iter      iter;
        int                     cnt = 0;
 
        dlist_foreach(iter, &BackendList)
@@ -4504,15 +4907,15 @@ CountChildren(int target)
                 */
                if (target != BACKEND_TYPE_ALL)
                {
-                       int                     child;
+                       /*
+                        * Assign bkend_type for any recently announced WAL Sender
+                        * processes.
+                        */
+                       if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
+                               IsPostmasterChildWalSender(bp->child_slot))
+                               bp->bkend_type = BACKEND_TYPE_WALSND;
 
-                       if (bp->is_autovacuum)
-                               child = BACKEND_TYPE_AUTOVAC;
-                       else if (IsPostmasterChildWalSender(bp->child_slot))
-                               child = BACKEND_TYPE_WALSND;
-                       else
-                               child = BACKEND_TYPE_NORMAL;
-                       if (!(target & child))
+                       if (!(target & bp->bkend_type))
                                continue;
                }
 
@@ -4671,7 +5074,7 @@ StartAutovacuumWorker(void)
                        bn->pid = StartAutoVacWorker();
                        if (bn->pid > 0)
                        {
-                               bn->is_autovacuum = true;
+                               bn->bkend_type = BACKEND_TYPE_AUTOVAC;
                                dlist_push_head(&BackendList, &bn->elem);
 #ifdef EXEC_BACKEND
                                ShmemBackendArrayAdd(bn);
@@ -4746,18 +5149,642 @@ CreateOptsFile(int argc, char *argv[], char *fullprogname)
  *
  * This reports the number of entries needed in per-child-process arrays
  * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
- * These arrays include regular backends, autovac workers and walsenders,
- * but not special children nor dead_end children.     This allows the arrays
- * to have a fixed maximum size, to wit the same too-many-children limit
- * enforced by canAcceptConnections(). The exact value isn't too critical
- * as long as it's more than MaxBackends.
+ * These arrays include regular backends, autovac workers, walsenders
+ * and background workers, but not special children nor dead_end children.
+ * This allows the arrays to have a fixed maximum size, to wit the same
+ * too-many-children limit enforced by canAcceptConnections(). The exact value
+ * isn't too critical as long as it's more than MaxBackends.
  */
 int
 MaxLivePostmasterChildren(void)
 {
-       return 2 * MaxBackends;
+       return 2 * (MaxConnections + autovacuum_max_workers + 1 +
+                               GetNumRegisteredBackgroundWorkers(0));
 }
 
+/*
+ * Register a new background worker.
+ *
+ * This can only be called in the _PG_init function of a module library
+ * that's loaded by shared_preload_libraries; otherwise it has no effect.
+ */
+void
+RegisterBackgroundWorker(BackgroundWorker *worker)
+{
+       RegisteredBgWorker *rw;
+       int                     namelen = strlen(worker->bgw_name);
+
+#ifdef EXEC_BACKEND
+
+       /*
+        * Use 1 here, not 0, to avoid confusing a possible bogus cookie read by
+        * atoi() in SubPostmasterMain.
+        */
+       static int      BackgroundWorkerCookie = 1;
+#endif
+
+       if (!IsUnderPostmaster)
+               ereport(LOG,
+                       (errmsg("registering background worker: %s", worker->bgw_name)));
+
+       if (!process_shared_preload_libraries_in_progress)
+       {
+               if (!IsUnderPostmaster)
+                       ereport(LOG,
+                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                        errmsg("background worker \"%s\": must be registered in shared_preload_libraries",
+                                                       worker->bgw_name)));
+               return;
+       }
+
+       /* sanity check for flags */
+       if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)
+       {
+               if (!(worker->bgw_flags & BGWORKER_SHMEM_ACCESS))
+               {
+                       if (!IsUnderPostmaster)
+                               ereport(LOG,
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("background worker \"%s\": must attach to shared memory in order to request a database connection",
+                                                               worker->bgw_name)));
+                       return;
+               }
+
+               if (worker->bgw_start_time == BgWorkerStart_PostmasterStart)
+               {
+                       if (!IsUnderPostmaster)
+                               ereport(LOG,
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("background worker \"%s\": cannot request database access if starting at postmaster start",
+                                                               worker->bgw_name)));
+                       return;
+               }
+
+               /* XXX other checks? */
+       }
+
+       if ((worker->bgw_restart_time < 0 &&
+                worker->bgw_restart_time != BGW_NEVER_RESTART) ||
+               (worker->bgw_restart_time > USECS_PER_DAY / 1000))
+       {
+               if (!IsUnderPostmaster)
+                       ereport(LOG,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("background worker \"%s\": invalid restart interval",
+                                               worker->bgw_name)));
+               return;
+       }
+
+       /*
+        * Copy the registration data into the registered workers list.
+        */
+       rw = malloc(sizeof(RegisteredBgWorker) + namelen + 1);
+       if (rw == NULL)
+       {
+               ereport(LOG,
+                               (errcode(ERRCODE_OUT_OF_MEMORY),
+                                errmsg("out of memory")));
+               return;
+       }
+
+       rw->rw_worker = *worker;
+       rw->rw_worker.bgw_name = ((char *) rw) + sizeof(RegisteredBgWorker);
+       strlcpy(rw->rw_worker.bgw_name, worker->bgw_name, namelen + 1);
+
+       rw->rw_backend = NULL;
+       rw->rw_pid = 0;
+       rw->rw_child_slot = 0;
+       rw->rw_crashed_at = 0;
+#ifdef EXEC_BACKEND
+       rw->rw_cookie = BackgroundWorkerCookie++;
+#endif
+
+       slist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
+}
+
+/*
+ * Connect background worker to a database.
+ */
+void
+BackgroundWorkerInitializeConnection(char *dbname, char *username)
+{
+       BackgroundWorker *worker = MyBgworkerEntry;
+
+       /* XXX is this the right errcode? */
+       if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
+               ereport(FATAL,
+                               (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                errmsg("database connection requirement not indicated during registration")));
+
+       InitPostgres(dbname, InvalidOid, username, NULL);
+
+       /* it had better not gotten out of "init" mode yet */
+       if (!IsInitProcessingMode())
+               ereport(ERROR,
+                               (errmsg("invalid processing mode in bgworker")));
+       SetProcessingMode(NormalProcessing);
+}
+
+/*
+ * Block/unblock signals in a background worker
+ */
+void
+BackgroundWorkerBlockSignals(void)
+{
+       PG_SETMASK(&BlockSig);
+}
+
+void
+BackgroundWorkerUnblockSignals(void)
+{
+       PG_SETMASK(&UnBlockSig);
+}
+
+#ifdef EXEC_BACKEND
+static BackgroundWorker *
+find_bgworker_entry(int cookie)
+{
+       slist_iter      iter;
+
+       slist_foreach(iter, &BackgroundWorkerList)
+       {
+               RegisteredBgWorker *rw;
+
+               rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+               if (rw->rw_cookie == cookie)
+                       return &rw->rw_worker;
+       }
+
+       return NULL;
+}
+#endif
+
+static void
+bgworker_quickdie(SIGNAL_ARGS)
+{
+       sigaddset(&BlockSig, SIGQUIT);          /* prevent nested calls */
+       PG_SETMASK(&BlockSig);
+
+       /*
+        * We DO NOT want to run proc_exit() callbacks -- we're here because
+        * shared memory may be corrupted, so we don't want to try to clean up our
+        * transaction.  Just nail the windows shut and get out of town.  Now that
+        * there's an atexit callback to prevent third-party code from breaking
+        * things by calling exit() directly, we have to reset the callbacks
+        * explicitly to make this work as intended.
+        */
+       on_exit_reset();
+
+       /*
+        * Note we do exit(0) here, not exit(2) like quickdie.  The reason is that
+        * we don't want to be seen this worker as independently crashed, because
+        * then postmaster would delay restarting it again afterwards.  If some
+        * idiot DBA manually sends SIGQUIT to a random bgworker, the "dead man
+        * switch" will ensure that postmaster sees this as a crash.
+        */
+       exit(0);
+}
+
+/*
+ * Standard SIGTERM handler for background workers
+ */
+static void
+bgworker_die(SIGNAL_ARGS)
+{
+       PG_SETMASK(&BlockSig);
+
+       ereport(FATAL,
+                       (errcode(ERRCODE_ADMIN_SHUTDOWN),
+                        errmsg("terminating background worker \"%s\" due to administrator command",
+                                       MyBgworkerEntry->bgw_name)));
+}
+
+static void
+do_start_bgworker(void)
+{
+       sigjmp_buf      local_sigjmp_buf;
+       char            buf[MAXPGPATH];
+       BackgroundWorker *worker = MyBgworkerEntry;
+
+       if (worker == NULL)
+               elog(FATAL, "unable to find bgworker entry");
+
+       /* we are a postmaster subprocess now */
+       IsUnderPostmaster = true;
+       IsBackgroundWorker = true;
+
+       /* reset MyProcPid */
+       MyProcPid = getpid();
+
+       /* record Start Time for logging */
+       MyStartTime = time(NULL);
+
+       /* Identify myself via ps */
+       snprintf(buf, MAXPGPATH, "bgworker: %s", worker->bgw_name);
+       init_ps_display(buf, "", "", "");
+
+       SetProcessingMode(InitProcessing);
+
+       /* Apply PostAuthDelay */
+       if (PostAuthDelay > 0)
+               pg_usleep(PostAuthDelay * 1000000L);
+
+       /*
+        * If possible, make this process a group leader, so that the postmaster
+        * can signal any child processes too.
+        */
+#ifdef HAVE_SETSID
+       if (setsid() < 0)
+               elog(FATAL, "setsid() failed: %m");
+#endif
+
+       /*
+        * Set up signal handlers.
+        */
+       if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)
+       {
+               /*
+                * SIGINT is used to signal canceling the current action
+                */
+               pqsignal(SIGINT, StatementCancelHandler);
+               pqsignal(SIGUSR1, procsignal_sigusr1_handler);
+               pqsignal(SIGFPE, FloatExceptionHandler);
+
+               /* XXX Any other handlers needed here? */
+       }
+       else
+       {
+               pqsignal(SIGINT, SIG_IGN);
+               pqsignal(SIGUSR1, SIG_IGN);
+               pqsignal(SIGFPE, SIG_IGN);
+       }
+
+       /* SIGTERM and SIGHUP are configurable */
+       if (worker->bgw_sigterm)
+               pqsignal(SIGTERM, worker->bgw_sigterm);
+       else
+               pqsignal(SIGTERM, bgworker_die);
+
+       if (worker->bgw_sighup)
+               pqsignal(SIGHUP, worker->bgw_sighup);
+       else
+               pqsignal(SIGHUP, SIG_IGN);
+
+       pqsignal(SIGQUIT, bgworker_quickdie);
+       InitializeTimeouts();           /* establishes SIGALRM handler */
+
+       pqsignal(SIGPIPE, SIG_IGN);
+       pqsignal(SIGUSR2, SIG_IGN);
+       pqsignal(SIGCHLD, SIG_DFL);
+
+       /*
+        * If an exception is encountered, processing resumes here.
+        *
+        * See notes in postgres.c about the design of this coding.
+        */
+       if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+       {
+               /* Since not using PG_TRY, must reset error stack by hand */
+               error_context_stack = NULL;
+
+               /* Prevent interrupts while cleaning up */
+               HOLD_INTERRUPTS();
+
+               /* Report the error to the server log */
+               EmitErrorReport();
+
+               /*
+                * Do we need more cleanup here?  For shmem-connected bgworkers, we
+                * will call InitProcess below, which will install ProcKill as exit
+                * callback.  That will take care of releasing locks, etc.
+                */
+
+               /* and go away */
+               proc_exit(1);
+       }
+
+       /* We can now handle ereport(ERROR) */
+       PG_exception_stack = &local_sigjmp_buf;
+
+       /* Early initialization */
+       BaseInit();
+
+       /*
+        * If necessary, create a per-backend PGPROC struct in shared memory,
+        * except in the EXEC_BACKEND case where this was done in
+        * SubPostmasterMain. We must do this before we can use LWLocks (and in
+        * the EXEC_BACKEND case we already had to do some stuff with LWLocks).
+        */
+#ifndef EXEC_BACKEND
+       if (worker->bgw_flags & BGWORKER_SHMEM_ACCESS)
+               InitProcess();
+#endif
+
+       /*
+        * Note that in normal processes, we would call InitPostgres here.      For a
+        * worker, however, we don't know what database to connect to, yet; so we
+        * need to wait until the user code does it via
+        * BackgroundWorkerInitializeConnection().
+        */
+
+       /*
+        * Now invoke the user-defined worker code
+        */
+       worker->bgw_main(worker->bgw_main_arg);
+
+       /* ... and if it returns, we're done */
+       proc_exit(0);
+}
+
+/*
+ * Return the number of background workers registered that have at least
+ * one of the passed flag bits set.
+ */
+static int
+GetNumRegisteredBackgroundWorkers(int flags)
+{
+       slist_iter      iter;
+       int                     count = 0;
+
+       slist_foreach(iter, &BackgroundWorkerList)
+       {
+               RegisteredBgWorker *rw;
+
+               rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+
+               if (flags != 0 &&
+                       !(rw->rw_worker.bgw_flags & flags))
+                       continue;
+
+               count++;
+       }
+
+       return count;
+}
+
+/*
+ * Return the number of bgworkers that need to have PGPROC entries.
+ */
+int
+GetNumShmemAttachedBgworkers(void)
+{
+       return GetNumRegisteredBackgroundWorkers(BGWORKER_SHMEM_ACCESS);
+}
+
+#ifdef EXEC_BACKEND
+static pid_t
+bgworker_forkexec(int cookie)
+{
+       char       *av[10];
+       int                     ac = 0;
+       char            forkav[MAXPGPATH];
+
+       snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", cookie);
+
+       av[ac++] = "postgres";
+       av[ac++] = forkav;
+       av[ac++] = NULL;                        /* filled in by postmaster_forkexec */
+       av[ac] = NULL;
+
+       Assert(ac < lengthof(av));
+
+       return postmaster_forkexec(ac, av);
+}
+#endif
+
+/*
+ * Start a new bgworker.
+ * Starting time conditions must have been checked already.
+ *
+ * This code is heavily based on autovacuum.c, q.v.
+ */
+static void
+start_bgworker(RegisteredBgWorker *rw)
+{
+       pid_t           worker_pid;
+
+       ereport(LOG,
+                       (errmsg("starting background worker process \"%s\"",
+                                       rw->rw_worker.bgw_name)));
+
+#ifdef EXEC_BACKEND
+       switch ((worker_pid = bgworker_forkexec(rw->rw_cookie)))
+#else
+       switch ((worker_pid = fork_process()))
+#endif
+       {
+               case -1:
+                       ereport(LOG,
+                                       (errmsg("could not fork worker process: %m")));
+                       return;
+
+#ifndef EXEC_BACKEND
+               case 0:
+                       /* in postmaster child ... */
+                       /* Close the postmaster's sockets */
+                       ClosePostmasterPorts(false);
+
+                       /* Lose the postmaster's on-exit routines */
+                       on_exit_reset();
+
+                       /* Do NOT release postmaster's working memory context */
+
+                       MyBgworkerEntry = &rw->rw_worker;
+                       do_start_bgworker();
+                       break;
+#endif
+               default:
+                       rw->rw_pid = worker_pid;
+                       if (rw->rw_backend)
+                               rw->rw_backend->pid = rw->rw_pid;
+       }
+}
+
+/*
+ * Does the current postmaster state require starting a worker with the
+ * specified start_time?
+ */
+static bool
+bgworker_should_start_now(BgWorkerStartTime start_time)
+{
+       switch (pmState)
+       {
+               case PM_NO_CHILDREN:
+               case PM_WAIT_DEAD_END:
+               case PM_SHUTDOWN_2:
+               case PM_SHUTDOWN:
+               case PM_WAIT_BACKENDS:
+               case PM_WAIT_READONLY:
+               case PM_WAIT_BACKUP:
+                       break;
+
+               case PM_RUN:
+                       if (start_time == BgWorkerStart_RecoveryFinished)
+                               return true;
+                       /* fall through */
+
+               case PM_HOT_STANDBY:
+                       if (start_time == BgWorkerStart_ConsistentState)
+                               return true;
+                       /* fall through */
+
+               case PM_RECOVERY:
+               case PM_STARTUP:
+               case PM_INIT:
+                       if (start_time == BgWorkerStart_PostmasterStart)
+                               return true;
+                       /* fall through */
+
+       }
+
+       return false;
+}
+
+/*
+ * Allocate the Backend struct for a connected background worker, but don't
+ * add it to the list of backends just yet.
+ *
+ * Some info from the Backend is copied into the passed rw.
+ */
+static bool
+assign_backendlist_entry(RegisteredBgWorker *rw)
+{
+       Backend    *bn = malloc(sizeof(Backend));
+
+       if (bn == NULL)
+       {
+               ereport(LOG,
+                               (errcode(ERRCODE_OUT_OF_MEMORY),
+                                errmsg("out of memory")));
+
+               /*
+                * The worker didn't really crash, but setting this nonzero makes
+                * postmaster wait a bit before attempting to start it again; if it
+                * tried again right away, most likely it'd find itself under the same
+                * memory pressure.
+                */
+               rw->rw_crashed_at = GetCurrentTimestamp();
+               return false;
+       }
+
+       /*
+        * Compute the cancel key that will be assigned to this session. We
+        * probably don't need cancel keys for background workers, but we'd better
+        * have something random in the field to prevent unfriendly people from
+        * sending cancels to them.
+        */
+       MyCancelKey = PostmasterRandom();
+       bn->cancel_key = MyCancelKey;
+
+       bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
+       bn->bkend_type = BACKEND_TYPE_BGWORKER;
+       bn->dead_end = false;
+
+       rw->rw_backend = bn;
+       rw->rw_child_slot = bn->child_slot;
+
+       return true;
+}
+
+/*
+ * If the time is right, start one background worker.
+ *
+ * As a side effect, the bgworker control variables are set or reset whenever
+ * there are more workers to start after this one, and whenever the overall
+ * system state requires it.
+ */
+static void
+StartOneBackgroundWorker(void)
+{
+       slist_iter      iter;
+       TimestampTz now = 0;
+
+       if (FatalError)
+       {
+               StartWorkerNeeded = false;
+               HaveCrashedWorker = false;
+               return;                                 /* not yet */
+       }
+
+       HaveCrashedWorker = false;
+
+       slist_foreach(iter, &BackgroundWorkerList)
+       {
+               RegisteredBgWorker *rw;
+
+               rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+
+               /* already running? */
+               if (rw->rw_pid != 0)
+                       continue;
+
+               /*
+                * If this worker has crashed previously, maybe it needs to be
+                * restarted (unless on registration it specified it doesn't want to
+                * be restarted at all).  Check how long ago did a crash last happen.
+                * If the last crash is too recent, don't start it right away; let it
+                * be restarted once enough time has passed.
+                */
+               if (rw->rw_crashed_at != 0)
+               {
+                       if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
+                               continue;
+
+                       if (now == 0)
+                               now = GetCurrentTimestamp();
+
+                       if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now,
+                                                                         rw->rw_worker.bgw_restart_time * 1000))
+                       {
+                               HaveCrashedWorker = true;
+                               continue;
+                       }
+               }
+
+               if (bgworker_should_start_now(rw->rw_worker.bgw_start_time))
+               {
+                       /* reset crash time before calling assign_backendlist_entry */
+                       rw->rw_crashed_at = 0;
+
+                       /*
+                        * If necessary, allocate and assign the Backend element.  Note we
+                        * must do this before forking, so that we can handle out of
+                        * memory properly.
+                        *
+                        * If not connected, we don't need a Backend element, but we still
+                        * need a PMChildSlot.
+                        */
+                       if (rw->rw_worker.bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)
+                       {
+                               if (!assign_backendlist_entry(rw))
+                                       return;
+                       }
+                       else
+                               rw->rw_child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
+
+                       start_bgworker(rw); /* sets rw->rw_pid */
+
+                       if (rw->rw_backend)
+                       {
+                               dlist_push_head(&BackendList, &rw->rw_backend->elem);
+#ifdef EXEC_BACKEND
+                               ShmemBackendArrayAdd(rw->rw_backend);
+#endif
+                       }
+
+                       /*
+                        * Have ServerLoop call us again.  Note that there might not
+                        * actually *be* another runnable worker, but we don't care all
+                        * that much; we will find out the next time we run.
+                        */
+                       StartWorkerNeeded = true;
+                       return;
+               }
+       }
+
+       /* no runnable worker found */
+       StartWorkerNeeded = false;
+}
 
 #ifdef EXEC_BACKEND
 
index 41af7924c0d9e5f38e381d13fccc87c8a66acb48..a56cb825fc50267f62f6915759f88fdcbda8e6fe 100644 (file)
@@ -140,7 +140,9 @@ ProcGlobalSemas(void)
  *       So, now we grab enough semaphores to support the desired max number
  *       of backends immediately at initialization --- if the sysadmin has set
  *       MaxConnections or autovacuum_max_workers higher than his kernel will
- *       support, he'll find out sooner rather than later.
+ *       support, he'll find out sooner rather than later.  (The number of
+ *       background worker processes registered by loadable modules is also taken
+ *       into consideration.)
  *
  *       Another reason for creating semaphores here is that the semaphore
  *       implementation typically requires us to create semaphores in the
@@ -171,6 +173,7 @@ InitProcGlobal(void)
        ProcGlobal->spins_per_delay = DEFAULT_SPINS_PER_DELAY;
        ProcGlobal->freeProcs = NULL;
        ProcGlobal->autovacFreeProcs = NULL;
+       ProcGlobal->bgworkerFreeProcs = NULL;
        ProcGlobal->startupProc = NULL;
        ProcGlobal->startupProcPid = 0;
        ProcGlobal->startupBufferPinWaitBufId = -1;
@@ -179,10 +182,11 @@ InitProcGlobal(void)
 
        /*
         * Create and initialize all the PGPROC structures we'll need.  There are
-        * four separate consumers: (1) normal backends, (2) autovacuum workers
-        * and the autovacuum launcher, (3) auxiliary processes, and (4) prepared
-        * transactions.  Each PGPROC structure is dedicated to exactly one of
-        * these purposes, and they do not move between groups.
+        * five separate consumers: (1) normal backends, (2) autovacuum workers
+        * and the autovacuum launcher, (3) background workers, (4) auxiliary
+        * processes, and (5) prepared transactions.  Each PGPROC structure is
+        * dedicated to exactly one of these purposes, and they do not move between
+        * groups.
         */
        procs = (PGPROC *) ShmemAlloc(TotalProcs * sizeof(PGPROC));
        ProcGlobal->allProcs = procs;
@@ -223,12 +227,12 @@ InitProcGlobal(void)
                procs[i].pgprocno = i;
 
                /*
-                * Newly created PGPROCs for normal backends or for autovacuum must be
-                * queued up on the appropriate free list.      Because there can only
-                * ever be a small, fixed number of auxiliary processes, no free list
-                * is used in that case; InitAuxiliaryProcess() instead uses a linear
-                * search.      PGPROCs for prepared transactions are added to a free list
-                * by TwoPhaseShmemInit().
+                * Newly created PGPROCs for normal backends, autovacuum and bgworkers
+                * must be queued up on the appropriate free list.      Because there can
+                * only ever be a small, fixed number of auxiliary processes, no free
+                * list is used in that case; InitAuxiliaryProcess() instead uses a
+                * linear search.       PGPROCs for prepared transactions are added to a
+                * free list by TwoPhaseShmemInit().
                 */
                if (i < MaxConnections)
                {
@@ -236,12 +240,18 @@ InitProcGlobal(void)
                        procs[i].links.next = (SHM_QUEUE *) ProcGlobal->freeProcs;
                        ProcGlobal->freeProcs = &procs[i];
                }
-               else if (i < MaxBackends)
+               else if (i < MaxConnections + autovacuum_max_workers + 1)
                {
                        /* PGPROC for AV launcher/worker, add to autovacFreeProcs list */
                        procs[i].links.next = (SHM_QUEUE *) ProcGlobal->autovacFreeProcs;
                        ProcGlobal->autovacFreeProcs = &procs[i];
                }
+               else if (i < MaxBackends)
+               {
+                       /* PGPROC for bgworker, add to bgworkerFreeProcs list */
+                       procs[i].links.next = (SHM_QUEUE *) ProcGlobal->bgworkerFreeProcs;
+                       ProcGlobal->bgworkerFreeProcs = &procs[i];
+               }
 
                /* Initialize myProcLocks[] shared memory queues. */
                for (j = 0; j < NUM_LOCK_PARTITIONS; j++)
@@ -299,6 +309,8 @@ InitProcess(void)
 
        if (IsAnyAutoVacuumProcess())
                MyProc = procglobal->autovacFreeProcs;
+       else if (IsBackgroundWorker)
+               MyProc = procglobal->bgworkerFreeProcs;
        else
                MyProc = procglobal->freeProcs;
 
@@ -306,6 +318,8 @@ InitProcess(void)
        {
                if (IsAnyAutoVacuumProcess())
                        procglobal->autovacFreeProcs = (PGPROC *) MyProc->links.next;
+               else if (IsBackgroundWorker)
+                       procglobal->bgworkerFreeProcs = (PGPROC *) MyProc->links.next;
                else
                        procglobal->freeProcs = (PGPROC *) MyProc->links.next;
                SpinLockRelease(ProcStructLock);
@@ -782,6 +796,11 @@ ProcKill(int code, Datum arg)
                MyProc->links.next = (SHM_QUEUE *) procglobal->autovacFreeProcs;
                procglobal->autovacFreeProcs = MyProc;
        }
+       else if (IsBackgroundWorker)
+       {
+               MyProc->links.next = (SHM_QUEUE *) procglobal->bgworkerFreeProcs;
+               procglobal->bgworkerFreeProcs = MyProc;
+       }
        else
        {
                MyProc->links.next = (SHM_QUEUE *) procglobal->freeProcs;
index 4b66bd3e358bb8aea34b165f6ab47477e022305d..8dd2b4b1234a39a2305dca89514bf6ef74b392c6 100644 (file)
@@ -87,6 +87,7 @@ pid_t         PostmasterPid = 0;
 bool           IsPostmasterEnvironment = false;
 bool           IsUnderPostmaster = false;
 bool           IsBinaryUpgrade = false;
+bool           IsBackgroundWorker = false;
 
 bool           ExitOnAnyError = false;
 
index 5288aa77ee104fd271d8895e85d5354bb43bb68d..c518c2133cdbb68620282b68bee1147849ab0c21 100644 (file)
@@ -498,10 +498,10 @@ void
 InitializeSessionUserIdStandalone(void)
 {
        /*
-        * This function should only be called in single-user mode and in
-        * autovacuum workers.
+        * This function should only be called in single-user mode, in
+        * autovacuum workers, and in background workers.
         */
-       AssertState(!IsUnderPostmaster || IsAutoVacuumWorkerProcess());
+       AssertState(!IsUnderPostmaster || IsAutoVacuumWorkerProcess() || IsBackgroundWorker);
 
        /* call only once */
        AssertState(!OidIsValid(AuthenticatedUserId));
index 2eb456df45e1a5a9f55689d935e6bec320cb33bd..b87ec6c482253c5651449077f2b9546e9a4e0b78 100644 (file)
@@ -627,6 +627,19 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
                                         errhint("You should immediately run CREATE USER \"%s\" SUPERUSER;.",
                                                         username)));
        }
+       else if (IsBackgroundWorker)
+       {
+               if (username == NULL)
+               {
+                       InitializeSessionUserIdStandalone();
+                       am_superuser = true;
+               }
+               else
+               {
+                       InitializeSessionUserId(username);
+                       am_superuser = superuser();
+               }
+       }
        else
        {
                /* normal multiuser case */
index 81cf136937c1feb5a41f705b8a5c3f412876b585..2cf34cea5a4882516255cfb856e65c24a3d17e6f 100644 (file)
@@ -52,6 +52,7 @@
 #include "parser/scansup.h"
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
+#include "postmaster/bgworker.h"
 #include "postmaster/bgwriter.h"
 #include "postmaster/postmaster.h"
 #include "postmaster/syslogger.h"
  * removed, we still could not exceed INT_MAX/4 because some places compute
  * 4*MaxBackends without any overflow check.  This is rechecked in
  * check_maxconnections, since MaxBackends is computed as MaxConnections
- * plus autovacuum_max_workers plus one (for the autovacuum launcher).
+ * plus the number of bgworkers plus autovacuum_max_workers plus one (for the
+ * autovacuum launcher).
  */
 #define MAX_BACKENDS   0x7fffff
 
@@ -8628,7 +8630,8 @@ show_tcp_keepalives_count(void)
 static bool
 check_maxconnections(int *newval, void **extra, GucSource source)
 {
-       if (*newval + autovacuum_max_workers + 1 > MAX_BACKENDS)
+       if (*newval + GetNumShmemAttachedBgworkers() + autovacuum_max_workers + 1 >
+               MAX_BACKENDS)
                return false;
        return true;
 }
@@ -8636,13 +8639,15 @@ check_maxconnections(int *newval, void **extra, GucSource source)
 static void
 assign_maxconnections(int newval, void *extra)
 {
-       MaxBackends = newval + autovacuum_max_workers + 1;
+       MaxBackends = newval + autovacuum_max_workers + 1 +
+               GetNumShmemAttachedBgworkers();
 }
 
 static bool
 check_autovacuum_max_workers(int *newval, void **extra, GucSource source)
 {
-       if (MaxConnections + *newval + 1 > MAX_BACKENDS)
+       if (MaxConnections + *newval + 1 + GetNumShmemAttachedBgworkers() >
+               MAX_BACKENDS)
                return false;
        return true;
 }
@@ -8650,7 +8655,7 @@ check_autovacuum_max_workers(int *newval, void **extra, GucSource source)
 static void
 assign_autovacuum_max_workers(int newval, void *extra)
 {
-       MaxBackends = MaxConnections + newval + 1;
+       MaxBackends = MaxConnections + newval + 1 + GetNumShmemAttachedBgworkers();
 }
 
 static bool
index 3ea349315c770f2e1cd2c55028501913c0ddfede..b41227fafbd881584bd16b6ac29bee9dc06610a1 100644 (file)
@@ -131,6 +131,7 @@ do { \
 extern pid_t PostmasterPid;
 extern bool IsPostmasterEnvironment;
 extern PGDLLIMPORT bool IsUnderPostmaster;
+extern bool IsBackgroundWorker;
 extern bool IsBinaryUpgrade;
 
 extern bool ExitOnAnyError;
diff --git a/src/include/postmaster/bgworker.h b/src/include/postmaster/bgworker.h
new file mode 100644 (file)
index 0000000..737728b
--- /dev/null
@@ -0,0 +1,104 @@
+/*--------------------------------------------------------------------
+ * bgworker.h
+ *             POSTGRES pluggable background workers interface
+ *
+ * A background worker is a process able to run arbitrary, user-supplied code,
+ * including normal transactions.
+ *
+ * Any external module loaded via shared_preload_libraries can register a
+ * worker.  Then, at the appropriate time, the worker process is forked from
+ * the postmaster and runs the user-supplied "main" function.  This code may
+ * connect to a database and run transactions.  Once started, it stays active
+ * until shutdown or crash.  The process should sleep during periods of
+ * inactivity.
+ *
+ * If the fork() call fails in the postmaster, it will try again later.  Note
+ * that the failure can only be transient (fork failure due to high load,
+ * memory pressure, too many processes, etc); more permanent problems, like
+ * failure to connect to a database, are detected later in the worker and dealt
+ * with just by having the worker exit normally.  Postmaster will launch a new
+ * worker again later.
+ *
+ * Note that there might be more than one worker in a database concurrently,
+ * and the same module may request more than one worker running the same (or
+ * different) code.
+ *
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *             src/include/postmaster/bgworker.h
+ *--------------------------------------------------------------------
+ */
+#ifndef BGWORKER_H
+#define BGWORKER_H
+
+/*---------------------------------------------------------------------
+ * External module API.
+ *---------------------------------------------------------------------
+ */
+
+/*
+ * Pass this flag to have your worker be able to connect to shared memory.
+ */
+#define BGWORKER_SHMEM_ACCESS                                          0x0001
+
+/*
+ * This flag means the bgworker requires a database connection.  The connection
+ * is not established automatically; the worker must establish it later.
+ * It requires that BGWORKER_SHMEM_ACCESS was passed too.
+ */
+#define BGWORKER_BACKEND_DATABASE_CONNECTION           0x0002
+
+
+typedef void (*bgworker_main_type)(void *main_arg);
+typedef void (*bgworker_sighdlr_type)(SIGNAL_ARGS);
+
+/*
+ * Points in time at which a bgworker can request to be started
+ */
+typedef enum
+{
+       BgWorkerStart_PostmasterStart,
+       BgWorkerStart_ConsistentState,
+       BgWorkerStart_RecoveryFinished
+} BgWorkerStartTime;
+
+#define BGW_DEFAULT_RESTART_INTERVAL   60
+#define BGW_NEVER_RESTART                              -1
+
+typedef struct BackgroundWorker
+{
+       char       *bgw_name;
+       int         bgw_flags;
+       BgWorkerStartTime bgw_start_time;
+       int                     bgw_restart_time;               /* in seconds, or BGW_NEVER_RESTART */
+       bgworker_main_type      bgw_main;
+       void       *bgw_main_arg;
+       bgworker_sighdlr_type bgw_sighup;
+       bgworker_sighdlr_type bgw_sigterm;
+} BackgroundWorker;
+
+/* Register a new bgworker */
+extern void RegisterBackgroundWorker(BackgroundWorker *worker);
+
+/* This is valid in a running worker */
+extern BackgroundWorker *MyBgworkerEntry;
+
+/*
+ * Connect to the specified database, as the specified user.  Only a worker
+ * that passed BGWORKER_BACKEND_DATABASE_CONNECTION during registration may
+ * call this.
+ *
+ * If username is NULL, bootstrapping superuser is used.
+ * If dbname is NULL, connection is made to no specific database;
+ * only shared catalogs can be accessed.
+ */
+extern void BackgroundWorkerInitializeConnection(char *dbname, char *username);
+
+/* Block/unblock signals in a background worker process */
+extern void BackgroundWorkerBlockSignals(void);
+extern void BackgroundWorkerUnblockSignals(void);
+
+#endif /* BGWORKER_H */
index 0fe7ec26db7646472fca248dee6982dc5c595c6c..44221cc545793b7377fc219c4d50518471fee8c8 100644 (file)
@@ -51,6 +51,8 @@ extern void ClosePostmasterPorts(bool am_syslogger);
 
 extern int     MaxLivePostmasterChildren(void);
 
+extern int GetNumShmemAttachedBgworkers(void);
+
 #ifdef EXEC_BACKEND
 extern pid_t postmaster_forkexec(int argc, char *argv[]);
 extern void    SubPostmasterMain(int argc, char *argv[]) __attribute__((noreturn));
index 686ac486574054a3ada70cd01ee89e48f779c05d..7600007b09294de814ee6da04548c9ee3a36d352 100644 (file)
@@ -189,6 +189,8 @@ typedef struct PROC_HDR
        PGPROC     *freeProcs;
        /* Head of list of autovacuum's free PGPROC structures */
        PGPROC     *autovacFreeProcs;
+       /* Head of list of bgworker free PGPROC structures */
+       PGPROC     *bgworkerFreeProcs;
        /* WALWriter process's latch */
        Latch      *walwriterLatch;
        /* Checkpointer process's latch */