1 /*-------------------------------------------------------------------------
4 * Infrastructure for launching parallel workers
6 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/backend/access/transam/parallel.c
12 *-------------------------------------------------------------------------
17 #include "access/xact.h"
18 #include "access/xlog.h"
19 #include "access/parallel.h"
20 #include "commands/async.h"
21 #include "libpq/libpq.h"
22 #include "libpq/pqformat.h"
23 #include "libpq/pqmq.h"
24 #include "miscadmin.h"
25 #include "optimizer/planmain.h"
26 #include "storage/ipc.h"
27 #include "storage/sinval.h"
28 #include "storage/spin.h"
29 #include "tcop/tcopprot.h"
30 #include "utils/combocid.h"
31 #include "utils/guc.h"
32 #include "utils/inval.h"
33 #include "utils/memutils.h"
34 #include "utils/resowner.h"
35 #include "utils/snapmgr.h"
38 * We don't want to waste a lot of memory on an error queue which, most of
39 * the time, will process only a handful of small messages. However, it is
40 * desirable to make it large enough that a typical ErrorResponse can be sent
41 * without blocking. That way, a worker that errors out can write the whole
42 * message into the queue and terminate without waiting for the user backend.
44 #define PARALLEL_ERROR_QUEUE_SIZE 16384
46 /* Magic number for parallel context TOC. */
47 #define PARALLEL_MAGIC 0x50477c7c
50 * Magic numbers for parallel state sharing. Higher-level code should use
51 * smaller values, leaving these very large ones for use by this module.
53 #define PARALLEL_KEY_FIXED UINT64CONST(0xFFFFFFFFFFFF0001)
54 #define PARALLEL_KEY_ERROR_QUEUE UINT64CONST(0xFFFFFFFFFFFF0002)
55 #define PARALLEL_KEY_LIBRARY UINT64CONST(0xFFFFFFFFFFFF0003)
56 #define PARALLEL_KEY_GUC UINT64CONST(0xFFFFFFFFFFFF0004)
57 #define PARALLEL_KEY_COMBO_CID UINT64CONST(0xFFFFFFFFFFFF0005)
58 #define PARALLEL_KEY_TRANSACTION_SNAPSHOT UINT64CONST(0xFFFFFFFFFFFF0006)
59 #define PARALLEL_KEY_ACTIVE_SNAPSHOT UINT64CONST(0xFFFFFFFFFFFF0007)
60 #define PARALLEL_KEY_TRANSACTION_STATE UINT64CONST(0xFFFFFFFFFFFF0008)
61 #define PARALLEL_KEY_EXTENSION_TRAMPOLINE UINT64CONST(0xFFFFFFFFFFFF0009)
63 /* Fixed-size parallel state. */
64 typedef struct FixedParallelState
66 /* Fixed-size state that workers must restore. */
68 Oid authenticated_user_id;
71 PGPROC *parallel_master_pgproc;
72 pid_t parallel_master_pid;
73 BackendId parallel_master_backend_id;
75 /* Entrypoint for parallel workers. */
76 parallel_worker_main_type entrypoint;
78 /* Mutex protects remaining fields. */
81 /* Maximum XactLastRecEnd of any worker. */
82 XLogRecPtr last_xlog_end;
86 * Our parallel worker number. We initialize this to -1, meaning that we are
87 * not a parallel worker. In parallel workers, it will be set to a value >= 0
88 * and < the number of workers before any user code is invoked; each parallel
89 * worker will get a different parallel worker number.
91 int ParallelWorkerNumber = -1;
93 /* Is there a parallel message pending which we need to receive? */
94 bool ParallelMessagePending = false;
96 /* Are we initializing a parallel worker? */
97 bool InitializingParallelWorker = false;
99 /* Pointer to our fixed parallel state. */
100 static FixedParallelState *MyFixedParallelState;
102 /* List of active parallel contexts. */
103 static dlist_head pcxt_list = DLIST_STATIC_INIT(pcxt_list);
105 /* Private functions. */
106 static void HandleParallelMessage(ParallelContext *, int, StringInfo msg);
107 static void ParallelErrorContext(void *arg);
108 static void ParallelExtensionTrampoline(dsm_segment *seg, shm_toc *toc);
109 static void ParallelWorkerMain(Datum main_arg);
110 static void WaitForParallelWorkersToExit(ParallelContext *pcxt);
113 * Establish a new parallel context. This should be done after entering
114 * parallel mode, and (unless there is an error) the context should be
115 * destroyed before exiting the current subtransaction.
118 CreateParallelContext(parallel_worker_main_type entrypoint, int nworkers)
120 MemoryContext oldcontext;
121 ParallelContext *pcxt;
123 /* It is unsafe to create a parallel context if not in parallel mode. */
124 Assert(IsInParallelMode());
126 /* Number of workers should be non-negative. */
127 Assert(nworkers >= 0);
130 * If dynamic shared memory is not available, we won't be able to use
131 * background workers.
133 if (dynamic_shared_memory_type == DSM_IMPL_NONE)
137 * If we are running under serializable isolation, we can't use parallel
138 * workers, at least not until somebody enhances that mechanism to be
141 if (IsolationIsSerializable())
144 /* We might be running in a short-lived memory context. */
145 oldcontext = MemoryContextSwitchTo(TopTransactionContext);
147 /* Initialize a new ParallelContext. */
148 pcxt = palloc0(sizeof(ParallelContext));
149 pcxt->subid = GetCurrentSubTransactionId();
150 pcxt->nworkers = nworkers;
151 pcxt->entrypoint = entrypoint;
152 pcxt->error_context_stack = error_context_stack;
153 shm_toc_initialize_estimator(&pcxt->estimator);
154 dlist_push_head(&pcxt_list, &pcxt->node);
156 /* Restore previous memory context. */
157 MemoryContextSwitchTo(oldcontext);
163 * Establish a new parallel context that calls a function provided by an
164 * extension. This works around the fact that the library might get mapped
165 * at a different address in each backend.
168 CreateParallelContextForExternalFunction(char *library_name,
172 MemoryContext oldcontext;
173 ParallelContext *pcxt;
175 /* We might be running in a very short-lived memory context. */
176 oldcontext = MemoryContextSwitchTo(TopTransactionContext);
178 /* Create the context. */
179 pcxt = CreateParallelContext(ParallelExtensionTrampoline, nworkers);
180 pcxt->library_name = pstrdup(library_name);
181 pcxt->function_name = pstrdup(function_name);
183 /* Restore previous memory context. */
184 MemoryContextSwitchTo(oldcontext);
190 * Establish the dynamic shared memory segment for a parallel context and
191 * copied state and other bookkeeping information that will need by parallel
195 InitializeParallelDSM(ParallelContext *pcxt)
197 MemoryContext oldcontext;
198 Size library_len = 0;
200 Size combocidlen = 0;
206 FixedParallelState *fps;
207 Snapshot transaction_snapshot = GetTransactionSnapshot();
208 Snapshot active_snapshot = GetActiveSnapshot();
210 /* We might be running in a very short-lived memory context. */
211 oldcontext = MemoryContextSwitchTo(TopTransactionContext);
213 /* Allow space to store the fixed-size parallel state. */
214 shm_toc_estimate_chunk(&pcxt->estimator, sizeof(FixedParallelState));
215 shm_toc_estimate_keys(&pcxt->estimator, 1);
218 * Normally, the user will have requested at least one worker process, but
219 * if by chance they have not, we can skip a bunch of things here.
221 if (pcxt->nworkers > 0)
223 /* Estimate space for various kinds of state sharing. */
224 library_len = EstimateLibraryStateSpace();
225 shm_toc_estimate_chunk(&pcxt->estimator, library_len);
226 guc_len = EstimateGUCStateSpace();
227 shm_toc_estimate_chunk(&pcxt->estimator, guc_len);
228 combocidlen = EstimateComboCIDStateSpace();
229 shm_toc_estimate_chunk(&pcxt->estimator, combocidlen);
230 tsnaplen = EstimateSnapshotSpace(transaction_snapshot);
231 shm_toc_estimate_chunk(&pcxt->estimator, tsnaplen);
232 asnaplen = EstimateSnapshotSpace(active_snapshot);
233 shm_toc_estimate_chunk(&pcxt->estimator, asnaplen);
234 tstatelen = EstimateTransactionStateSpace();
235 shm_toc_estimate_chunk(&pcxt->estimator, tstatelen);
236 /* If you add more chunks here, you probably need to add keys. */
237 shm_toc_estimate_keys(&pcxt->estimator, 6);
239 /* Estimate space need for error queues. */
240 StaticAssertStmt(BUFFERALIGN(PARALLEL_ERROR_QUEUE_SIZE) ==
241 PARALLEL_ERROR_QUEUE_SIZE,
242 "parallel error queue size not buffer-aligned");
243 shm_toc_estimate_chunk(&pcxt->estimator,
244 mul_size(PARALLEL_ERROR_QUEUE_SIZE,
246 shm_toc_estimate_keys(&pcxt->estimator, 1);
248 /* Estimate how much we'll need for extension entrypoint info. */
249 if (pcxt->library_name != NULL)
251 Assert(pcxt->entrypoint == ParallelExtensionTrampoline);
252 Assert(pcxt->function_name != NULL);
253 shm_toc_estimate_chunk(&pcxt->estimator, strlen(pcxt->library_name)
254 + strlen(pcxt->function_name) + 2);
255 shm_toc_estimate_keys(&pcxt->estimator, 1);
260 * Create DSM and initialize with new table of contents. But if the user
261 * didn't request any workers, then don't bother creating a dynamic shared
262 * memory segment; instead, just use backend-private memory.
264 * Also, if we can't create a dynamic shared memory segment because the
265 * maximum number of segments have already been created, then fall back to
266 * backend-private memory, and plan not to use any workers. We hope this
267 * won't happen very often, but it's better to abandon the use of
268 * parallelism than to fail outright.
270 segsize = shm_toc_estimate(&pcxt->estimator);
271 if (pcxt->nworkers != 0)
272 pcxt->seg = dsm_create(segsize, DSM_CREATE_NULL_IF_MAXSEGMENTS);
273 if (pcxt->seg != NULL)
274 pcxt->toc = shm_toc_create(PARALLEL_MAGIC,
275 dsm_segment_address(pcxt->seg),
280 pcxt->private_memory = MemoryContextAlloc(TopMemoryContext, segsize);
281 pcxt->toc = shm_toc_create(PARALLEL_MAGIC, pcxt->private_memory,
285 /* Initialize fixed-size state in shared memory. */
286 fps = (FixedParallelState *)
287 shm_toc_allocate(pcxt->toc, sizeof(FixedParallelState));
288 fps->database_id = MyDatabaseId;
289 fps->authenticated_user_id = GetAuthenticatedUserId();
290 GetUserIdAndSecContext(&fps->current_user_id, &fps->sec_context);
291 fps->parallel_master_pgproc = MyProc;
292 fps->parallel_master_pid = MyProcPid;
293 fps->parallel_master_backend_id = MyBackendId;
294 fps->entrypoint = pcxt->entrypoint;
295 SpinLockInit(&fps->mutex);
296 fps->last_xlog_end = 0;
297 shm_toc_insert(pcxt->toc, PARALLEL_KEY_FIXED, fps);
299 /* We can skip the rest of this if we're not budgeting for any workers. */
300 if (pcxt->nworkers > 0)
308 char *error_queue_space;
310 /* Serialize shared libraries we have loaded. */
311 libraryspace = shm_toc_allocate(pcxt->toc, library_len);
312 SerializeLibraryState(library_len, libraryspace);
313 shm_toc_insert(pcxt->toc, PARALLEL_KEY_LIBRARY, libraryspace);
315 /* Serialize GUC settings. */
316 gucspace = shm_toc_allocate(pcxt->toc, guc_len);
317 SerializeGUCState(guc_len, gucspace);
318 shm_toc_insert(pcxt->toc, PARALLEL_KEY_GUC, gucspace);
320 /* Serialize combo CID state. */
321 combocidspace = shm_toc_allocate(pcxt->toc, combocidlen);
322 SerializeComboCIDState(combocidlen, combocidspace);
323 shm_toc_insert(pcxt->toc, PARALLEL_KEY_COMBO_CID, combocidspace);
325 /* Serialize transaction snapshot and active snapshot. */
326 tsnapspace = shm_toc_allocate(pcxt->toc, tsnaplen);
327 SerializeSnapshot(transaction_snapshot, tsnapspace);
328 shm_toc_insert(pcxt->toc, PARALLEL_KEY_TRANSACTION_SNAPSHOT,
330 asnapspace = shm_toc_allocate(pcxt->toc, asnaplen);
331 SerializeSnapshot(active_snapshot, asnapspace);
332 shm_toc_insert(pcxt->toc, PARALLEL_KEY_ACTIVE_SNAPSHOT, asnapspace);
334 /* Serialize transaction state. */
335 tstatespace = shm_toc_allocate(pcxt->toc, tstatelen);
336 SerializeTransactionState(tstatelen, tstatespace);
337 shm_toc_insert(pcxt->toc, PARALLEL_KEY_TRANSACTION_STATE, tstatespace);
339 /* Allocate space for worker information. */
340 pcxt->worker = palloc0(sizeof(ParallelWorkerInfo) * pcxt->nworkers);
343 * Establish error queues in dynamic shared memory.
345 * These queues should be used only for transmitting ErrorResponse,
346 * NoticeResponse, and NotifyResponse protocol messages. Tuple data
347 * should be transmitted via separate (possibly larger?) queues.
350 shm_toc_allocate(pcxt->toc,
351 mul_size(PARALLEL_ERROR_QUEUE_SIZE,
353 for (i = 0; i < pcxt->nworkers; ++i)
358 start = error_queue_space + i * PARALLEL_ERROR_QUEUE_SIZE;
359 mq = shm_mq_create(start, PARALLEL_ERROR_QUEUE_SIZE);
360 shm_mq_set_receiver(mq, MyProc);
361 pcxt->worker[i].error_mqh = shm_mq_attach(mq, pcxt->seg, NULL);
363 shm_toc_insert(pcxt->toc, PARALLEL_KEY_ERROR_QUEUE, error_queue_space);
365 /* Serialize extension entrypoint information. */
366 if (pcxt->library_name != NULL)
368 Size lnamelen = strlen(pcxt->library_name);
369 char *extensionstate;
371 extensionstate = shm_toc_allocate(pcxt->toc, lnamelen
372 + strlen(pcxt->function_name) + 2);
373 strcpy(extensionstate, pcxt->library_name);
374 strcpy(extensionstate + lnamelen + 1, pcxt->function_name);
375 shm_toc_insert(pcxt->toc, PARALLEL_KEY_EXTENSION_TRAMPOLINE,
380 /* Restore previous memory context. */
381 MemoryContextSwitchTo(oldcontext);
385 * Reinitialize the dynamic shared memory segment for a parallel context such
386 * that we could launch workers for it again.
389 ReinitializeParallelDSM(ParallelContext *pcxt)
391 FixedParallelState *fps;
392 char *error_queue_space;
395 if (pcxt->nworkers_launched == 0)
398 WaitForParallelWorkersToFinish(pcxt);
399 WaitForParallelWorkersToExit(pcxt);
401 /* Reset a few bits of fixed parallel state to a clean state. */
402 fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED);
403 fps->last_xlog_end = 0;
405 /* Recreate error queues. */
407 shm_toc_lookup(pcxt->toc, PARALLEL_KEY_ERROR_QUEUE);
408 for (i = 0; i < pcxt->nworkers; ++i)
413 start = error_queue_space + i * PARALLEL_ERROR_QUEUE_SIZE;
414 mq = shm_mq_create(start, PARALLEL_ERROR_QUEUE_SIZE);
415 shm_mq_set_receiver(mq, MyProc);
416 pcxt->worker[i].error_mqh = shm_mq_attach(mq, pcxt->seg, NULL);
419 /* Reset number of workers launched. */
420 pcxt->nworkers_launched = 0;
424 * Launch parallel workers.
427 LaunchParallelWorkers(ParallelContext *pcxt)
429 MemoryContext oldcontext;
430 BackgroundWorker worker;
432 bool any_registrations_failed = false;
434 /* Skip this if we have no workers. */
435 if (pcxt->nworkers == 0)
438 /* We need to be a lock group leader. */
439 BecomeLockGroupLeader();
441 /* If we do have workers, we'd better have a DSM segment. */
442 Assert(pcxt->seg != NULL);
444 /* We might be running in a short-lived memory context. */
445 oldcontext = MemoryContextSwitchTo(TopTransactionContext);
447 /* Configure a worker. */
448 snprintf(worker.bgw_name, BGW_MAXLEN, "parallel worker for PID %d",
451 BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
452 worker.bgw_start_time = BgWorkerStart_ConsistentState;
453 worker.bgw_restart_time = BGW_NEVER_RESTART;
454 worker.bgw_main = ParallelWorkerMain;
455 worker.bgw_main_arg = UInt32GetDatum(dsm_segment_handle(pcxt->seg));
456 worker.bgw_notify_pid = MyProcPid;
457 memset(&worker.bgw_extra, 0, BGW_EXTRALEN);
462 * The caller must be able to tolerate ending up with fewer workers than
463 * expected, so there is no need to throw an error here if registration
464 * fails. It wouldn't help much anyway, because registering the worker in
465 * no way guarantees that it will start up and initialize successfully.
467 for (i = 0; i < pcxt->nworkers; ++i)
469 memcpy(worker.bgw_extra, &i, sizeof(int));
470 if (!any_registrations_failed &&
471 RegisterDynamicBackgroundWorker(&worker,
472 &pcxt->worker[i].bgwhandle))
474 shm_mq_set_handle(pcxt->worker[i].error_mqh,
475 pcxt->worker[i].bgwhandle);
476 pcxt->nworkers_launched++;
481 * If we weren't able to register the worker, then we've bumped up
482 * against the max_worker_processes limit, and future
483 * registrations will probably fail too, so arrange to skip them.
484 * But we still have to execute this code for the remaining slots
485 * to make sure that we forget about the error queues we budgeted
486 * for those workers. Otherwise, we'll wait for them to start,
487 * but they never will.
489 any_registrations_failed = true;
490 pcxt->worker[i].bgwhandle = NULL;
491 pcxt->worker[i].error_mqh = NULL;
495 /* Restore previous memory context. */
496 MemoryContextSwitchTo(oldcontext);
500 * Wait for all workers to finish computing.
502 * Even if the parallel operation seems to have completed successfully, it's
503 * important to call this function afterwards. We must not miss any errors
504 * the workers may have thrown during the parallel operation, or any that they
505 * may yet throw while shutting down.
507 * Also, we want to update our notion of XactLastRecEnd based on worker
511 WaitForParallelWorkersToFinish(ParallelContext *pcxt)
515 bool anyone_alive = false;
519 * This will process any parallel messages that are pending, which may
520 * change the outcome of the loop that follows. It may also throw an
521 * error propagated from a worker.
523 CHECK_FOR_INTERRUPTS();
525 for (i = 0; i < pcxt->nworkers_launched; ++i)
527 if (pcxt->worker[i].error_mqh != NULL)
537 WaitLatch(&MyProc->procLatch, WL_LATCH_SET, -1);
538 ResetLatch(&MyProc->procLatch);
541 if (pcxt->toc != NULL)
543 FixedParallelState *fps;
545 fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED);
546 if (fps->last_xlog_end > XactLastRecEnd)
547 XactLastRecEnd = fps->last_xlog_end;
552 * Wait for all workers to exit.
554 * This function ensures that workers have been completely shutdown. The
555 * difference between WaitForParallelWorkersToFinish and this function is
556 * that former just ensures that last message sent by worker backend is
557 * received by master backend whereas this ensures the complete shutdown.
560 WaitForParallelWorkersToExit(ParallelContext *pcxt)
564 /* Wait until the workers actually die. */
565 for (i = 0; i < pcxt->nworkers_launched; ++i)
567 BgwHandleStatus status;
569 if (pcxt->worker == NULL || pcxt->worker[i].bgwhandle == NULL)
572 status = WaitForBackgroundWorkerShutdown(pcxt->worker[i].bgwhandle);
575 * If the postmaster kicked the bucket, we have no chance of cleaning
576 * up safely -- we won't be able to tell when our workers are actually
577 * dead. This doesn't necessitate a PANIC since they will all abort
578 * eventually, but we can't safely continue this session.
580 if (status == BGWH_POSTMASTER_DIED)
582 (errcode(ERRCODE_ADMIN_SHUTDOWN),
583 errmsg("postmaster exited during a parallel transaction")));
585 /* Release memory. */
586 pfree(pcxt->worker[i].bgwhandle);
587 pcxt->worker[i].bgwhandle = NULL;
592 * Destroy a parallel context.
594 * If expecting a clean exit, you should use WaitForParallelWorkersToFinish()
595 * first, before calling this function. When this function is invoked, any
596 * remaining workers are forcibly killed; the dynamic shared memory segment
597 * is unmapped; and we then wait (uninterruptibly) for the workers to exit.
600 DestroyParallelContext(ParallelContext *pcxt)
605 * Be careful about order of operations here! We remove the parallel
606 * context from the list before we do anything else; otherwise, if an
607 * error occurs during a subsequent step, we might try to nuke it again
608 * from AtEOXact_Parallel or AtEOSubXact_Parallel.
610 dlist_delete(&pcxt->node);
612 /* Kill each worker in turn, and forget their error queues. */
613 if (pcxt->worker != NULL)
615 for (i = 0; i < pcxt->nworkers_launched; ++i)
617 if (pcxt->worker[i].error_mqh != NULL)
619 TerminateBackgroundWorker(pcxt->worker[i].bgwhandle);
621 pfree(pcxt->worker[i].error_mqh);
622 pcxt->worker[i].error_mqh = NULL;
628 * If we have allocated a shared memory segment, detach it. This will
629 * implicitly detach the error queues, and any other shared memory queues,
632 if (pcxt->seg != NULL)
634 dsm_detach(pcxt->seg);
639 * If this parallel context is actually in backend-private memory rather
640 * than shared memory, free that memory instead.
642 if (pcxt->private_memory != NULL)
644 pfree(pcxt->private_memory);
645 pcxt->private_memory = NULL;
649 * We can't finish transaction commit or abort until all of the workers
650 * have exited. This means, in particular, that we can't respond to
651 * interrupts at this stage.
654 WaitForParallelWorkersToExit(pcxt);
657 /* Free the worker array itself. */
658 if (pcxt->worker != NULL)
669 * Are there any parallel contexts currently active?
672 ParallelContextActive(void)
674 return !dlist_is_empty(&pcxt_list);
678 * Handle receipt of an interrupt indicating a parallel worker message.
681 HandleParallelMessageInterrupt(void)
683 int save_errno = errno;
685 InterruptPending = true;
686 ParallelMessagePending = true;
693 * Handle any queued protocol messages received from parallel workers.
696 HandleParallelMessages(void)
700 ParallelMessagePending = false;
702 dlist_foreach(iter, &pcxt_list)
704 ParallelContext *pcxt;
709 pcxt = dlist_container(ParallelContext, node, iter.cur);
710 if (pcxt->worker == NULL)
713 for (i = 0; i < pcxt->nworkers_launched; ++i)
716 * Read as many messages as we can from each worker, but stop when
717 * either (1) the error queue goes away, which can happen if we
718 * receive a Terminate message from the worker; or (2) no more
719 * messages can be read from the worker without blocking.
721 while (pcxt->worker[i].error_mqh != NULL)
725 res = shm_mq_receive(pcxt->worker[i].error_mqh, &nbytes,
727 if (res == SHM_MQ_WOULD_BLOCK)
729 else if (res == SHM_MQ_SUCCESS)
733 initStringInfo(&msg);
734 appendBinaryStringInfo(&msg, data, nbytes);
735 HandleParallelMessage(pcxt, i, &msg);
740 (errcode(ERRCODE_INTERNAL_ERROR), /* XXX: wrong errcode? */
741 errmsg("lost connection to parallel worker")));
743 /* This might make the error queue go away. */
744 CHECK_FOR_INTERRUPTS();
751 * Handle a single protocol message received from a single parallel worker.
754 HandleParallelMessage(ParallelContext *pcxt, int i, StringInfo msg)
758 msgtype = pq_getmsgbyte(msg);
762 case 'K': /* BackendKeyData */
764 int32 pid = pq_getmsgint(msg, 4);
766 (void) pq_getmsgint(msg, 4); /* discard cancel key */
767 (void) pq_getmsgend(msg);
768 pcxt->worker[i].pid = pid;
772 case 'E': /* ErrorResponse */
773 case 'N': /* NoticeResponse */
776 ErrorContextCallback errctx;
777 ErrorContextCallback *save_error_context_stack;
780 * Rethrow the error using the error context callbacks that
781 * were in effect when the context was created, not the
784 save_error_context_stack = error_context_stack;
785 errctx.callback = ParallelErrorContext;
786 errctx.arg = &pcxt->worker[i].pid;
787 errctx.previous = pcxt->error_context_stack;
788 error_context_stack = &errctx;
790 /* Parse ErrorResponse or NoticeResponse. */
791 pq_parse_errornotice(msg, &edata);
793 /* Death of a worker isn't enough justification for suicide. */
794 edata.elevel = Min(edata.elevel, ERROR);
796 /* Rethrow error or notice. */
797 ThrowErrorData(&edata);
799 /* Restore previous context. */
800 error_context_stack = save_error_context_stack;
805 case 'A': /* NotifyResponse */
807 /* Propagate NotifyResponse. */
808 pq_putmessage(msg->data[0], &msg->data[1], msg->len - 1);
812 case 'X': /* Terminate, indicating clean exit */
814 pfree(pcxt->worker[i].error_mqh);
815 pcxt->worker[i].error_mqh = NULL;
821 elog(ERROR, "unknown message type: %c (%d bytes)",
828 * End-of-subtransaction cleanup for parallel contexts.
830 * Currently, it's forbidden to enter or leave a subtransaction while
831 * parallel mode is in effect, so we could just blow away everything. But
832 * we may want to relax that restriction in the future, so this code
833 * contemplates that there may be multiple subtransaction IDs in pcxt_list.
836 AtEOSubXact_Parallel(bool isCommit, SubTransactionId mySubId)
838 while (!dlist_is_empty(&pcxt_list))
840 ParallelContext *pcxt;
842 pcxt = dlist_head_element(ParallelContext, node, &pcxt_list);
843 if (pcxt->subid != mySubId)
846 elog(WARNING, "leaked parallel context");
847 DestroyParallelContext(pcxt);
852 * End-of-transaction cleanup for parallel contexts.
855 AtEOXact_Parallel(bool isCommit)
857 while (!dlist_is_empty(&pcxt_list))
859 ParallelContext *pcxt;
861 pcxt = dlist_head_element(ParallelContext, node, &pcxt_list);
863 elog(WARNING, "leaked parallel context");
864 DestroyParallelContext(pcxt);
869 * Main entrypoint for parallel workers.
872 ParallelWorkerMain(Datum main_arg)
876 FixedParallelState *fps;
877 char *error_queue_space;
886 StringInfoData msgbuf;
888 /* Set flag to indicate that we're initializing a parallel worker. */
889 InitializingParallelWorker = true;
891 /* Establish signal handlers. */
892 pqsignal(SIGTERM, die);
893 BackgroundWorkerUnblockSignals();
895 /* Determine and set our parallel worker number. */
896 Assert(ParallelWorkerNumber == -1);
897 memcpy(&ParallelWorkerNumber, MyBgworkerEntry->bgw_extra, sizeof(int));
899 /* Set up a memory context and resource owner. */
900 Assert(CurrentResourceOwner == NULL);
901 CurrentResourceOwner = ResourceOwnerCreate(NULL, "parallel toplevel");
902 CurrentMemoryContext = AllocSetContextCreate(TopMemoryContext,
904 ALLOCSET_DEFAULT_MINSIZE,
905 ALLOCSET_DEFAULT_INITSIZE,
906 ALLOCSET_DEFAULT_MAXSIZE);
909 * Now that we have a resource owner, we can attach to the dynamic shared
910 * memory segment and read the table of contents.
912 seg = dsm_attach(DatumGetUInt32(main_arg));
915 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
916 errmsg("could not map dynamic shared memory segment")));
917 toc = shm_toc_attach(PARALLEL_MAGIC, dsm_segment_address(seg));
920 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
921 errmsg("invalid magic number in dynamic shared memory segment")));
923 /* Look up fixed parallel state. */
924 fps = shm_toc_lookup(toc, PARALLEL_KEY_FIXED);
926 MyFixedParallelState = fps;
929 * Now that we have a worker number, we can find and attach to the error
930 * queue provided for us. That's good, because until we do that, any
931 * errors that happen here will not be reported back to the process that
932 * requested that this worker be launched.
934 error_queue_space = shm_toc_lookup(toc, PARALLEL_KEY_ERROR_QUEUE);
935 mq = (shm_mq *) (error_queue_space +
936 ParallelWorkerNumber * PARALLEL_ERROR_QUEUE_SIZE);
937 shm_mq_set_sender(mq, MyProc);
938 mqh = shm_mq_attach(mq, seg, NULL);
939 pq_redirect_to_shm_mq(seg, mqh);
940 pq_set_parallel_master(fps->parallel_master_pid,
941 fps->parallel_master_backend_id);
944 * Send a BackendKeyData message to the process that initiated parallelism
945 * so that it has access to our PID before it receives any other messages
946 * from us. Our cancel key is sent, too, since that's the way the
947 * protocol message is defined, but it won't actually be used for anything
950 pq_beginmessage(&msgbuf, 'K');
951 pq_sendint(&msgbuf, (int32) MyProcPid, sizeof(int32));
952 pq_sendint(&msgbuf, (int32) MyCancelKey, sizeof(int32));
953 pq_endmessage(&msgbuf);
956 * Hooray! Primary initialization is complete. Now, we need to set up our
957 * backend-local state to match the original backend.
961 * Join locking group. We must do this before anything that could try to
962 * acquire a heavyweight lock, because any heavyweight locks acquired to
963 * this point could block either directly against the parallel group
964 * leader or against some process which in turn waits for a lock that
965 * conflicts with the parallel group leader, causing an undetected
966 * deadlock. (If we can't join the lock group, the leader has gone away,
967 * so just exit quietly.)
969 if (!BecomeLockGroupMember(fps->parallel_master_pgproc,
970 fps->parallel_master_pid))
974 * Load libraries that were loaded by original backend. We want to do
975 * this before restoring GUCs, because the libraries might define custom
978 libraryspace = shm_toc_lookup(toc, PARALLEL_KEY_LIBRARY);
979 Assert(libraryspace != NULL);
980 RestoreLibraryState(libraryspace);
982 /* Restore database connection. */
983 BackgroundWorkerInitializeConnectionByOid(fps->database_id,
984 fps->authenticated_user_id);
986 /* Restore GUC values from launching backend. */
987 gucspace = shm_toc_lookup(toc, PARALLEL_KEY_GUC);
988 Assert(gucspace != NULL);
989 StartTransactionCommand();
990 RestoreGUCState(gucspace);
991 CommitTransactionCommand();
993 /* Crank up a transaction state appropriate to a parallel worker. */
994 tstatespace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_STATE);
995 StartParallelWorkerTransaction(tstatespace);
997 /* Restore combo CID state. */
998 combocidspace = shm_toc_lookup(toc, PARALLEL_KEY_COMBO_CID);
999 Assert(combocidspace != NULL);
1000 RestoreComboCIDState(combocidspace);
1002 /* Restore transaction snapshot. */
1003 tsnapspace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_SNAPSHOT);
1004 Assert(tsnapspace != NULL);
1005 RestoreTransactionSnapshot(RestoreSnapshot(tsnapspace),
1006 fps->parallel_master_pgproc);
1008 /* Restore active snapshot. */
1009 asnapspace = shm_toc_lookup(toc, PARALLEL_KEY_ACTIVE_SNAPSHOT);
1010 Assert(asnapspace != NULL);
1011 PushActiveSnapshot(RestoreSnapshot(asnapspace));
1014 * We've changed which tuples we can see, and must therefore invalidate
1017 InvalidateSystemCaches();
1019 /* Restore user ID and security context. */
1020 SetUserIdAndSecContext(fps->current_user_id, fps->sec_context);
1023 * We've initialized all of our state now; nothing should change
1026 InitializingParallelWorker = false;
1027 EnterParallelMode();
1030 * Time to do the real work: invoke the caller-supplied code.
1032 * If you get a crash at this line, see the comments for
1033 * ParallelExtensionTrampoline.
1035 fps->entrypoint(seg, toc);
1037 /* Must exit parallel mode to pop active snapshot. */
1040 /* Must pop active snapshot so resowner.c doesn't complain. */
1041 PopActiveSnapshot();
1043 /* Shut down the parallel-worker transaction. */
1044 EndParallelWorkerTransaction();
1046 /* Report success. */
1047 pq_putmessage('X', NULL, 0);
1051 * It's unsafe for the entrypoint invoked by ParallelWorkerMain to be a
1052 * function living in a dynamically loaded module, because the module might
1053 * not be loaded in every process, or might be loaded but not at the same
1054 * address. To work around that problem, CreateParallelContextForExtension()
1055 * arranges to call this function rather than calling the extension-provided
1056 * function directly; and this function then looks up the real entrypoint and
1060 ParallelExtensionTrampoline(dsm_segment *seg, shm_toc *toc)
1062 char *extensionstate;
1064 char *function_name;
1065 parallel_worker_main_type entrypt;
1067 extensionstate = shm_toc_lookup(toc, PARALLEL_KEY_EXTENSION_TRAMPOLINE);
1068 Assert(extensionstate != NULL);
1069 library_name = extensionstate;
1070 function_name = extensionstate + strlen(library_name) + 1;
1072 entrypt = (parallel_worker_main_type)
1073 load_external_function(library_name, function_name, true, NULL);
1078 * Give the user a hint that this is a message propagated from a parallel
1079 * worker. Otherwise, it can sometimes be confusing to understand what
1080 * actually happened.
1083 ParallelErrorContext(void *arg)
1085 if (force_parallel_mode != FORCE_PARALLEL_REGRESS)
1086 errcontext("parallel worker, PID %d", *(int32 *) arg);
1090 * Update shared memory with the ending location of the last WAL record we
1091 * wrote, if it's greater than the value already stored there.
1094 ParallelWorkerReportLastRecEnd(XLogRecPtr last_xlog_end)
1096 FixedParallelState *fps = MyFixedParallelState;
1098 Assert(fps != NULL);
1099 SpinLockAcquire(&fps->mutex);
1100 if (fps->last_xlog_end < last_xlog_end)
1101 fps->last_xlog_end = last_xlog_end;
1102 SpinLockRelease(&fps->mutex);