From 85c9d3475e4f680dbca7c04fe096af018f3b8760 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Fri, 3 Aug 2018 11:02:02 +0530 Subject: [PATCH] Fix buffer usage stats for parallel nodes. The buffer usage stats is accounted only for the execution phase of the node. For Gather and Gather Merge nodes, such stats are accumulated at the time of shutdown of workers which is done after execution of node due to which we missed to account them for such nodes. Fix it by treating nodes as running while we shut down them. We can also miss accounting for a Limit node when Gather or Gather Merge is beneath it, because it can finish the execution before shutting down such nodes. So we allow a Limit node to shut down the resources before it completes the execution. In the passing fix the gather node code to allow workers to shut down as soon as we find that all the tuples from the workers have been retrieved. The original code use to do that, but is accidently removed by commit 01edb5c7fc. Reported-by: Adrien Nayrat Author: Amit Kapila and Robert Haas Reviewed-by: Robert Haas and Andres Freund Backpatch-through: 9.6 where this code was introduced Discussion: https://postgr.es/m/86137f17-1dfb-42f9-7421-82fd786b04a1@anayrat.info --- src/backend/executor/execProcnode.c | 17 +++++++++++++++++ src/backend/executor/nodeGather.c | 3 +++ src/backend/executor/nodeLimit.c | 2 ++ 3 files changed, 22 insertions(+) diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 8b3663b3c9..eaed9fb5a9 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -752,6 +752,19 @@ ExecShutdownNode(PlanState *node) planstate_tree_walker(node, ExecShutdownNode, NULL); + /* + * Treat the node as running while we shut it down, but only if it's run + * at least once already. We don't expect much CPU consumption during + * node shutdown, but in the case of Gather or Gather Merge, we may shut + * down workers at this stage. If so, their buffer usage will get + * propagated into pgBufferUsage at this point, and we want to make sure + * that it gets associated with the Gather node. We skip this if the node + * has never been executed, so as to avoid incorrectly making it appear + * that it has. + */ + if (node->instrument && node->instrument->running) + InstrStartNode(node->instrument); + switch (nodeTag(node)) { case T_GatherState: @@ -776,6 +789,10 @@ ExecShutdownNode(PlanState *node) break; } + /* Stop the node if we started it above, reporting 0 tuples. */ + if (node->instrument && node->instrument->running) + InstrStopNode(node->instrument, 0); + return false; } diff --git a/src/backend/executor/nodeGather.c b/src/backend/executor/nodeGather.c index cdc9c51bd1..4a700b7b30 100644 --- a/src/backend/executor/nodeGather.c +++ b/src/backend/executor/nodeGather.c @@ -324,7 +324,10 @@ gather_readnext(GatherState *gatherstate) Assert(!tup); --gatherstate->nreaders; if (gatherstate->nreaders == 0) + { + ExecShutdownGatherWorkers(gatherstate); return NULL; + } memmove(&gatherstate->reader[gatherstate->nextreader], &gatherstate->reader[gatherstate->nextreader + 1], sizeof(TupleQueueReader *) diff --git a/src/backend/executor/nodeLimit.c b/src/backend/executor/nodeLimit.c index 56d98b4490..66ea6aa3c3 100644 --- a/src/backend/executor/nodeLimit.c +++ b/src/backend/executor/nodeLimit.c @@ -134,6 +134,8 @@ ExecLimit(PlanState *pstate) node->position - node->offset >= node->count) { node->lstate = LIMIT_WINDOWEND; + /* Allow nodes to release or shut down resources. */ + (void) ExecShutdownNode(outerPlan); return NULL; } -- 2.40.0