]> granicus.if.org Git - postgresql/commitdiff
Don't be so trusting that shm_toc_lookup() will always succeed.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 5 Jun 2017 16:05:42 +0000 (12:05 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 5 Jun 2017 16:05:42 +0000 (12:05 -0400)
Given the possibility of race conditions and so on, it seems entirely
unsafe to just assume that shm_toc_lookup() always finds the key it's
looking for --- but that was exactly what all but one call site were
doing.  To fix, add a "bool noError" argument, similarly to what we
have in many other functions, and throw an error on an unexpected
lookup failure.  Remove now-redundant Asserts that a rather random
subset of call sites had.

I doubt this will throw any light on buildfarm member lorikeet's
recent failures, because if an unnoticed lookup failure were involved,
you'd kind of expect a null-pointer-dereference crash rather than the
observed symptom.  But you never know ... and this is better coding
practice even if it never catches anything.

Discussion: https://postgr.es/m/9697.1496675981@sss.pgh.pa.us

src/backend/access/transam/parallel.c
src/backend/executor/execParallel.c
src/backend/executor/nodeBitmapHeapscan.c
src/backend/executor/nodeCustom.c
src/backend/executor/nodeForeignscan.c
src/backend/executor/nodeIndexonlyscan.c
src/backend/executor/nodeIndexscan.c
src/backend/executor/nodeSeqscan.c
src/backend/storage/ipc/shm_toc.c
src/include/storage/shm_toc.h
src/test/modules/test_shm_mq/worker.c

index 2dad3e8a655e1e15cb1d6826d419e369a7d7029e..cb22174270677ce2339bfcfef15d013ffbacf3ce 100644 (file)
@@ -392,12 +392,12 @@ ReinitializeParallelDSM(ParallelContext *pcxt)
        }
 
        /* Reset a few bits of fixed parallel state to a clean state. */
-       fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED);
+       fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED, false);
        fps->last_xlog_end = 0;
 
        /* Recreate error queues. */
        error_queue_space =
-               shm_toc_lookup(pcxt->toc, PARALLEL_KEY_ERROR_QUEUE);
+               shm_toc_lookup(pcxt->toc, PARALLEL_KEY_ERROR_QUEUE, false);
        for (i = 0; i < pcxt->nworkers; ++i)
        {
                char       *start;
@@ -536,7 +536,7 @@ WaitForParallelWorkersToFinish(ParallelContext *pcxt)
        {
                FixedParallelState *fps;
 
-               fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED);
+               fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED, false);
                if (fps->last_xlog_end > XactLastRecEnd)
                        XactLastRecEnd = fps->last_xlog_end;
        }
@@ -973,8 +973,7 @@ ParallelWorkerMain(Datum main_arg)
                   errmsg("invalid magic number in dynamic shared memory segment")));
 
        /* Look up fixed parallel state. */
-       fps = shm_toc_lookup(toc, PARALLEL_KEY_FIXED);
-       Assert(fps != NULL);
+       fps = shm_toc_lookup(toc, PARALLEL_KEY_FIXED, false);
        MyFixedParallelState = fps;
 
        /*
@@ -983,7 +982,7 @@ ParallelWorkerMain(Datum main_arg)
         * errors that happen here will not be reported back to the process that
         * requested that this worker be launched.
         */
-       error_queue_space = shm_toc_lookup(toc, PARALLEL_KEY_ERROR_QUEUE);
+       error_queue_space = shm_toc_lookup(toc, PARALLEL_KEY_ERROR_QUEUE, false);
        mq = (shm_mq *) (error_queue_space +
                                         ParallelWorkerNumber * PARALLEL_ERROR_QUEUE_SIZE);
        shm_mq_set_sender(mq, MyProc);
@@ -1027,8 +1026,7 @@ ParallelWorkerMain(Datum main_arg)
         * this before restoring GUCs, because the libraries might define custom
         * variables.
         */
-       libraryspace = shm_toc_lookup(toc, PARALLEL_KEY_LIBRARY);
-       Assert(libraryspace != NULL);
+       libraryspace = shm_toc_lookup(toc, PARALLEL_KEY_LIBRARY, false);
        RestoreLibraryState(libraryspace);
 
        /*
@@ -1036,8 +1034,7 @@ ParallelWorkerMain(Datum main_arg)
         * loading an additional library, though most likely the entry point is in
         * the core backend or in a library we just loaded.
         */
-       entrypointstate = shm_toc_lookup(toc, PARALLEL_KEY_ENTRYPOINT);
-       Assert(entrypointstate != NULL);
+       entrypointstate = shm_toc_lookup(toc, PARALLEL_KEY_ENTRYPOINT, false);
        library_name = entrypointstate;
        function_name = entrypointstate + strlen(library_name) + 1;
 
@@ -1054,30 +1051,26 @@ ParallelWorkerMain(Datum main_arg)
        SetClientEncoding(GetDatabaseEncoding());
 
        /* Restore GUC values from launching backend. */
-       gucspace = shm_toc_lookup(toc, PARALLEL_KEY_GUC);
-       Assert(gucspace != NULL);
+       gucspace = shm_toc_lookup(toc, PARALLEL_KEY_GUC, false);
        StartTransactionCommand();
        RestoreGUCState(gucspace);
        CommitTransactionCommand();
 
        /* Crank up a transaction state appropriate to a parallel worker. */
-       tstatespace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_STATE);
+       tstatespace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_STATE, false);
        StartParallelWorkerTransaction(tstatespace);
 
        /* Restore combo CID state. */
-       combocidspace = shm_toc_lookup(toc, PARALLEL_KEY_COMBO_CID);
-       Assert(combocidspace != NULL);
+       combocidspace = shm_toc_lookup(toc, PARALLEL_KEY_COMBO_CID, false);
        RestoreComboCIDState(combocidspace);
 
        /* Restore transaction snapshot. */
-       tsnapspace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_SNAPSHOT);
-       Assert(tsnapspace != NULL);
+       tsnapspace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_SNAPSHOT, false);
        RestoreTransactionSnapshot(RestoreSnapshot(tsnapspace),
                                                           fps->parallel_master_pgproc);
 
        /* Restore active snapshot. */
-       asnapspace = shm_toc_lookup(toc, PARALLEL_KEY_ACTIVE_SNAPSHOT);
-       Assert(asnapspace != NULL);
+       asnapspace = shm_toc_lookup(toc, PARALLEL_KEY_ACTIVE_SNAPSHOT, false);
        PushActiveSnapshot(RestoreSnapshot(asnapspace));
 
        /*
index 061018001602413b74e8267a3e5c6cf3ac971701..1c02fa140b0cddc655761543ec1c65860ddd94d2 100644 (file)
@@ -341,7 +341,7 @@ ExecParallelSetupTupleQueues(ParallelContext *pcxt, bool reinitialize)
                                                         mul_size(PARALLEL_TUPLE_QUEUE_SIZE,
                                                                          pcxt->nworkers));
        else
-               tqueuespace = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_TUPLE_QUEUE);
+               tqueuespace = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_TUPLE_QUEUE, false);
 
        /* Create the queues, and become the receiver for each. */
        for (i = 0; i < pcxt->nworkers; ++i)
@@ -684,7 +684,7 @@ ExecParallelGetReceiver(dsm_segment *seg, shm_toc *toc)
        char       *mqspace;
        shm_mq     *mq;
 
-       mqspace = shm_toc_lookup(toc, PARALLEL_KEY_TUPLE_QUEUE);
+       mqspace = shm_toc_lookup(toc, PARALLEL_KEY_TUPLE_QUEUE, false);
        mqspace += ParallelWorkerNumber * PARALLEL_TUPLE_QUEUE_SIZE;
        mq = (shm_mq *) mqspace;
        shm_mq_set_sender(mq, MyProc);
@@ -705,14 +705,14 @@ ExecParallelGetQueryDesc(shm_toc *toc, DestReceiver *receiver,
        char       *queryString;
 
        /* Get the query string from shared memory */
-       queryString = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT);
+       queryString = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, false);
 
        /* Reconstruct leader-supplied PlannedStmt. */
-       pstmtspace = shm_toc_lookup(toc, PARALLEL_KEY_PLANNEDSTMT);
+       pstmtspace = shm_toc_lookup(toc, PARALLEL_KEY_PLANNEDSTMT, false);
        pstmt = (PlannedStmt *) stringToNode(pstmtspace);
 
        /* Reconstruct ParamListInfo. */
-       paramspace = shm_toc_lookup(toc, PARALLEL_KEY_PARAMS);
+       paramspace = shm_toc_lookup(toc, PARALLEL_KEY_PARAMS, false);
        paramLI = RestoreParamList(&paramspace);
 
        /*
@@ -843,7 +843,7 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
 
        /* Set up DestReceiver, SharedExecutorInstrumentation, and QueryDesc. */
        receiver = ExecParallelGetReceiver(seg, toc);
-       instrumentation = shm_toc_lookup(toc, PARALLEL_KEY_INSTRUMENTATION);
+       instrumentation = shm_toc_lookup(toc, PARALLEL_KEY_INSTRUMENTATION, true);
        if (instrumentation != NULL)
                instrument_options = instrumentation->instrument_options;
        queryDesc = ExecParallelGetQueryDesc(toc, receiver, instrument_options);
@@ -858,7 +858,7 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
        InstrStartParallelQuery();
 
        /* Attach to the dynamic shared memory area. */
-       area_space = shm_toc_lookup(toc, PARALLEL_KEY_DSA);
+       area_space = shm_toc_lookup(toc, PARALLEL_KEY_DSA, false);
        area = dsa_attach_in_place(area_space, seg);
 
        /* Start up the executor */
@@ -875,7 +875,7 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
        ExecutorFinish(queryDesc);
 
        /* Report buffer usage during parallel execution. */
-       buffer_usage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE);
+       buffer_usage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
        InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber]);
 
        /* Report instrumentation data if any instrumentation options are set. */
index c453362230856a6aa2a547557d141edd7eb9f770..77f65db0ca0bf2cb151466e6d730a1cb4ff5c546 100644 (file)
@@ -1005,7 +1005,7 @@ ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, shm_toc *toc)
        ParallelBitmapHeapState *pstate;
        Snapshot        snapshot;
 
-       pstate = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id);
+       pstate = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false);
        node->pstate = pstate;
 
        snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
index 5d309828ef185616b8d8c3afdbac6c954f9ad61d..69e27047f1c667182e079347a73ff4ffa1957ec4 100644 (file)
@@ -194,7 +194,7 @@ ExecCustomScanInitializeWorker(CustomScanState *node, shm_toc *toc)
                int                     plan_node_id = node->ss.ps.plan->plan_node_id;
                void       *coordinate;
 
-               coordinate = shm_toc_lookup(toc, plan_node_id);
+               coordinate = shm_toc_lookup(toc, plan_node_id, false);
                methods->InitializeWorkerCustomScan(node, toc, coordinate);
        }
 }
index 9ae1561404b4ed84db2b26b2bef65d59760a5101..9cde112554b2e1801328182b8802d19fb9b3fd27 100644 (file)
@@ -344,7 +344,7 @@ ExecForeignScanInitializeWorker(ForeignScanState *node, shm_toc *toc)
                int                     plan_node_id = node->ss.ps.plan->plan_node_id;
                void       *coordinate;
 
-               coordinate = shm_toc_lookup(toc, plan_node_id);
+               coordinate = shm_toc_lookup(toc, plan_node_id, false);
                fdwroutine->InitializeWorkerForeignScan(node, toc, coordinate);
        }
 }
index 5550f6c0a4be70bbb3735c2c7a83ef4f5beed2a3..fb3d3bb1218fcf772ccb3449c1171705678d7cd2 100644 (file)
@@ -676,7 +676,7 @@ ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node, shm_toc *toc)
 {
        ParallelIndexScanDesc piscan;
 
-       piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id);
+       piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false);
        node->ioss_ScanDesc =
                index_beginscan_parallel(node->ss.ss_currentRelation,
                                                                 node->ioss_RelationDesc,
index 5afd02e09ddfad4417af5324e5986e7807ee37a5..0fb3fb5e7ece008bda33b305d556fb1d048b7b98 100644 (file)
@@ -1714,7 +1714,7 @@ ExecIndexScanInitializeWorker(IndexScanState *node, shm_toc *toc)
 {
        ParallelIndexScanDesc piscan;
 
-       piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id);
+       piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false);
        node->iss_ScanDesc =
                index_beginscan_parallel(node->ss.ss_currentRelation,
                                                                 node->iss_RelationDesc,
index 5680464fa273d554e0266572811e1ae5941e5677..c0e37dcd834c8459aa30a3377995f56b00f1f458 100644 (file)
@@ -332,7 +332,7 @@ ExecSeqScanInitializeWorker(SeqScanState *node, shm_toc *toc)
 {
        ParallelHeapScanDesc pscan;
 
-       pscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id);
+       pscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false);
        node->ss.ss_currentScanDesc =
                heap_beginscan_parallel(node->ss.ss_currentRelation, pscan);
 }
index 9110ffa4a0bff1582351c21b4a1d07f70f278991..5e290df3366b4083924d4f97450d1a8d20013dde 100644 (file)
@@ -208,6 +208,9 @@ shm_toc_insert(shm_toc *toc, uint64 key, void *address)
 /*
  * Look up a TOC entry.
  *
+ * If the key is not found, returns NULL if noError is true, otherwise
+ * throws elog(ERROR).
+ *
  * Unlike the other functions in this file, this operation acquires no lock;
  * it uses only barriers.  It probably wouldn't hurt concurrency very much even
  * if it did get a lock, but since it's reasonably likely that a group of
@@ -215,7 +218,7 @@ shm_toc_insert(shm_toc *toc, uint64 key, void *address)
  * right around the same time, there seems to be some value in avoiding it.
  */
 void *
-shm_toc_lookup(shm_toc *toc, uint64 key)
+shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
 {
        uint64          nentry;
        uint64          i;
@@ -226,10 +229,15 @@ shm_toc_lookup(shm_toc *toc, uint64 key)
 
        /* Now search for a matching entry. */
        for (i = 0; i < nentry; ++i)
+       {
                if (toc->toc_entry[i].key == key)
                        return ((char *) toc) + toc->toc_entry[i].offset;
+       }
 
        /* No matching entry was found. */
+       if (!noError)
+               elog(ERROR, "could not find key " UINT64_FORMAT " in shm TOC at %p",
+                        key, toc);
        return NULL;
 }
 
index ae0a3878feb9266f974cdb3b3238dfda78a2bde3..0548e309bd0b5f455148c30ada81da1e56552729 100644 (file)
@@ -32,7 +32,7 @@ extern shm_toc *shm_toc_attach(uint64 magic, void *address);
 extern void *shm_toc_allocate(shm_toc *toc, Size nbytes);
 extern Size shm_toc_freespace(shm_toc *toc);
 extern void shm_toc_insert(shm_toc *toc, uint64 key, void *address);
-extern void *shm_toc_lookup(shm_toc *toc, uint64 key);
+extern void *shm_toc_lookup(shm_toc *toc, uint64 key, bool noError);
 
 /*
  * Tools for estimating how large a chunk of shared memory will be needed
index 3e45c75dc0594313b5368f5bdb04f792c1a2e334..f8aef263f72ac886578442a76b441024366dbb69 100644 (file)
@@ -95,7 +95,7 @@ test_shm_mq_main(Datum main_arg)
         * find it.  Our worker number gives our identity: there may be just one
         * worker involved in this parallel operation, or there may be many.
         */
-       hdr = shm_toc_lookup(toc, 0);
+       hdr = shm_toc_lookup(toc, 0, false);
        SpinLockAcquire(&hdr->mutex);
        myworkernumber = ++hdr->workers_attached;
        SpinLockRelease(&hdr->mutex);
@@ -158,10 +158,10 @@ attach_to_queues(dsm_segment *seg, shm_toc *toc, int myworkernumber,
        shm_mq     *inq;
        shm_mq     *outq;
 
-       inq = shm_toc_lookup(toc, myworkernumber);
+       inq = shm_toc_lookup(toc, myworkernumber, false);
        shm_mq_set_receiver(inq, MyProc);
        *inqhp = shm_mq_attach(inq, seg, NULL);
-       outq = shm_toc_lookup(toc, myworkernumber + 1);
+       outq = shm_toc_lookup(toc, myworkernumber + 1, false);
        shm_mq_set_sender(outq, MyProc);
        *outqhp = shm_mq_attach(outq, seg, NULL);
 }