1 /*-------------------------------------------------------------------------
4 * routines to handle append nodes.
6 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/executor/nodeAppend.c
13 *-------------------------------------------------------------------------
16 * ExecInitAppend - initialize the append node
17 * ExecAppend - retrieve the next tuple from the node
18 * ExecEndAppend - shut down the append node
19 * ExecReScanAppend - rescan the append node
22 * Each append node contains a list of one or more subplans which
23 * must be iteratively processed (forwards or backwards).
24 * Tuples are retrieved by executing the 'whichplan'th subplan
25 * until the subplan stops returning tuples, at which point that
26 * plan is shut down and the next started up.
28 * Append nodes don't make use of their left and right
29 * subtrees, rather they maintain a list of subplans so
30 * a typical append node looks like this in the plan tree:
34 * Append -------+------+------+--- nil
39 * Append nodes are currently used for unions, and to support
40 * inheritance queries, where several relations need to be scanned.
41 * For example, in our standard person/student/employee/student-emp
42 * example, where student and employee inherit from person
43 * and student-emp inherits from student and employee, the
46 * select name from person
51 * Append -------+-------+--------+--------+
53 * nil nil Scan Scan Scan Scan
55 * person employee student student-emp
60 #include "executor/execdebug.h"
61 #include "executor/nodeAppend.h"
62 #include "miscadmin.h"
64 /* Shared state for parallel-aware Append. */
65 struct ParallelAppendState
67 LWLock pa_lock; /* mutual exclusion to choose next subplan */
68 int pa_next_plan; /* next plan to choose by any worker */
71 * pa_finished[i] should be true if no more workers should select subplan
72 * i. for a non-partial plan, this should be set to true as soon as a
73 * worker selects the plan; for a partial plan, it remains false until
74 * some worker executes the plan to completion.
76 bool pa_finished[FLEXIBLE_ARRAY_MEMBER];
79 #define INVALID_SUBPLAN_INDEX -1
81 static TupleTableSlot *ExecAppend(PlanState *pstate);
82 static bool choose_next_subplan_locally(AppendState *node);
83 static bool choose_next_subplan_for_leader(AppendState *node);
84 static bool choose_next_subplan_for_worker(AppendState *node);
86 /* ----------------------------------------------------------------
89 * Begin all of the subscans of the append node.
91 * (This is potentially wasteful, since the entire result of the
92 * append node may not be scanned, but this way all of the
93 * structures get allocated in the executor's top level memory
94 * block instead of that of the call to ExecAppend.)
95 * ----------------------------------------------------------------
98 ExecInitAppend(Append *node, EState *estate, int eflags)
100 AppendState *appendstate = makeNode(AppendState);
101 PlanState **appendplanstates;
106 /* check for unsupported flags */
107 Assert(!(eflags & EXEC_FLAG_MARK));
110 * Lock the non-leaf tables in the partition tree controlled by this node.
111 * It's a no-op for non-partitioned parent tables.
113 ExecLockNonLeafAppendTables(node->partitioned_rels, estate);
116 * Set up empty vector of subplan states
118 nplans = list_length(node->appendplans);
120 appendplanstates = (PlanState **) palloc0(nplans * sizeof(PlanState *));
123 * create new AppendState for our append node
125 appendstate->ps.plan = (Plan *) node;
126 appendstate->ps.state = estate;
127 appendstate->ps.ExecProcNode = ExecAppend;
128 appendstate->appendplans = appendplanstates;
129 appendstate->as_nplans = nplans;
132 * Miscellaneous initialization
134 * Append plans don't have expression contexts because they never call
135 * ExecQual or ExecProject.
139 * append nodes still have Result slots, which hold pointers to tuples, so
140 * we have to initialize them.
142 ExecInitResultTupleSlot(estate, &appendstate->ps);
145 * call ExecInitNode on each of the plans to be executed and save the
146 * results into the array "appendplans".
149 foreach(lc, node->appendplans)
151 Plan *initNode = (Plan *) lfirst(lc);
153 appendplanstates[i] = ExecInitNode(initNode, estate, eflags);
158 * initialize output tuple type
160 ExecAssignResultTypeFromTL(&appendstate->ps);
161 appendstate->ps.ps_ProjInfo = NULL;
164 * Parallel-aware append plans must choose the first subplan to execute by
165 * looking at shared memory, but non-parallel-aware append plans can
166 * always start with the first subplan.
168 appendstate->as_whichplan =
169 appendstate->ps.plan->parallel_aware ? INVALID_SUBPLAN_INDEX : 0;
171 /* If parallel-aware, this will be overridden later. */
172 appendstate->choose_next_subplan = choose_next_subplan_locally;
177 /* ----------------------------------------------------------------
180 * Handles iteration over multiple subplans.
181 * ----------------------------------------------------------------
183 static TupleTableSlot *
184 ExecAppend(PlanState *pstate)
186 AppendState *node = castNode(AppendState, pstate);
188 /* If no subplan has been chosen, we must choose one before proceeding. */
189 if (node->as_whichplan == INVALID_SUBPLAN_INDEX &&
190 !node->choose_next_subplan(node))
191 return ExecClearTuple(node->ps.ps_ResultTupleSlot);
196 TupleTableSlot *result;
198 CHECK_FOR_INTERRUPTS();
201 * figure out which subplan we are currently processing
203 Assert(node->as_whichplan >= 0 && node->as_whichplan < node->as_nplans);
204 subnode = node->appendplans[node->as_whichplan];
207 * get a tuple from the subplan
209 result = ExecProcNode(subnode);
211 if (!TupIsNull(result))
214 * If the subplan gave us something then return it as-is. We do
215 * NOT make use of the result slot that was set up in
216 * ExecInitAppend; there's no need for it.
221 /* choose new subplan; if none, we're done */
222 if (!node->choose_next_subplan(node))
223 return ExecClearTuple(node->ps.ps_ResultTupleSlot);
227 /* ----------------------------------------------------------------
230 * Shuts down the subscans of the append node.
232 * Returns nothing of interest.
233 * ----------------------------------------------------------------
236 ExecEndAppend(AppendState *node)
238 PlanState **appendplans;
243 * get information from the node
245 appendplans = node->appendplans;
246 nplans = node->as_nplans;
249 * shut down each of the subscans
251 for (i = 0; i < nplans; i++)
252 ExecEndNode(appendplans[i]);
256 ExecReScanAppend(AppendState *node)
260 for (i = 0; i < node->as_nplans; i++)
262 PlanState *subnode = node->appendplans[i];
265 * ExecReScan doesn't know about my subplans, so I have to do
266 * changed-parameter signaling myself.
268 if (node->ps.chgParam != NULL)
269 UpdateChangedParamSet(subnode, node->ps.chgParam);
272 * If chgParam of subnode is not null then plan will be re-scanned by
273 * first ExecProcNode.
275 if (subnode->chgParam == NULL)
280 node->ps.plan->parallel_aware ? INVALID_SUBPLAN_INDEX : 0;
283 /* ----------------------------------------------------------------
284 * Parallel Append Support
285 * ----------------------------------------------------------------
288 /* ----------------------------------------------------------------
291 * Compute the amount of space we'll need in the parallel
292 * query DSM, and inform pcxt->estimator about our needs.
293 * ----------------------------------------------------------------
296 ExecAppendEstimate(AppendState *node,
297 ParallelContext *pcxt)
300 add_size(offsetof(ParallelAppendState, pa_finished),
301 sizeof(bool) * node->as_nplans);
303 shm_toc_estimate_chunk(&pcxt->estimator, node->pstate_len);
304 shm_toc_estimate_keys(&pcxt->estimator, 1);
308 /* ----------------------------------------------------------------
309 * ExecAppendInitializeDSM
311 * Set up shared state for Parallel Append.
312 * ----------------------------------------------------------------
315 ExecAppendInitializeDSM(AppendState *node,
316 ParallelContext *pcxt)
318 ParallelAppendState *pstate;
320 pstate = shm_toc_allocate(pcxt->toc, node->pstate_len);
321 memset(pstate, 0, node->pstate_len);
322 LWLockInitialize(&pstate->pa_lock, LWTRANCHE_PARALLEL_APPEND);
323 shm_toc_insert(pcxt->toc, node->ps.plan->plan_node_id, pstate);
325 node->as_pstate = pstate;
326 node->choose_next_subplan = choose_next_subplan_for_leader;
329 /* ----------------------------------------------------------------
330 * ExecAppendReInitializeDSM
332 * Reset shared state before beginning a fresh scan.
333 * ----------------------------------------------------------------
336 ExecAppendReInitializeDSM(AppendState *node, ParallelContext *pcxt)
338 ParallelAppendState *pstate = node->as_pstate;
340 pstate->pa_next_plan = 0;
341 memset(pstate->pa_finished, 0, sizeof(bool) * node->as_nplans);
344 /* ----------------------------------------------------------------
345 * ExecAppendInitializeWorker
347 * Copy relevant information from TOC into planstate, and initialize
348 * whatever is required to choose and execute the optimal subplan.
349 * ----------------------------------------------------------------
352 ExecAppendInitializeWorker(AppendState *node, ParallelWorkerContext *pwcxt)
354 node->as_pstate = shm_toc_lookup(pwcxt->toc, node->ps.plan->plan_node_id, false);
355 node->choose_next_subplan = choose_next_subplan_for_worker;
358 /* ----------------------------------------------------------------
359 * choose_next_subplan_locally
361 * Choose next subplan for a non-parallel-aware Append,
362 * returning false if there are no more.
363 * ----------------------------------------------------------------
366 choose_next_subplan_locally(AppendState *node)
368 int whichplan = node->as_whichplan;
370 /* We should never see INVALID_SUBPLAN_INDEX in this case. */
371 Assert(whichplan >= 0 && whichplan <= node->as_nplans);
373 if (ScanDirectionIsForward(node->ps.state->es_direction))
375 if (whichplan >= node->as_nplans - 1)
377 node->as_whichplan++;
383 node->as_whichplan--;
389 /* ----------------------------------------------------------------
390 * choose_next_subplan_for_leader
392 * Try to pick a plan which doesn't commit us to doing much
393 * work locally, so that as much work as possible is done in
394 * the workers. Cheapest subplans are at the end.
395 * ----------------------------------------------------------------
398 choose_next_subplan_for_leader(AppendState *node)
400 ParallelAppendState *pstate = node->as_pstate;
401 Append *append = (Append *) node->ps.plan;
403 /* Backward scan is not supported by parallel-aware plans */
404 Assert(ScanDirectionIsForward(node->ps.state->es_direction));
406 LWLockAcquire(&pstate->pa_lock, LW_EXCLUSIVE);
408 if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
410 /* Mark just-completed subplan as finished. */
411 node->as_pstate->pa_finished[node->as_whichplan] = true;
415 /* Start with last subplan. */
416 node->as_whichplan = node->as_nplans - 1;
419 /* Loop until we find a subplan to execute. */
420 while (pstate->pa_finished[node->as_whichplan])
422 if (node->as_whichplan == 0)
424 pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
425 node->as_whichplan = INVALID_SUBPLAN_INDEX;
426 LWLockRelease(&pstate->pa_lock);
429 node->as_whichplan--;
432 /* If non-partial, immediately mark as finished. */
433 if (node->as_whichplan < append->first_partial_plan)
434 node->as_pstate->pa_finished[node->as_whichplan] = true;
436 LWLockRelease(&pstate->pa_lock);
441 /* ----------------------------------------------------------------
442 * choose_next_subplan_for_worker
444 * Choose next subplan for a parallel-aware Append, returning
445 * false if there are no more.
447 * We start from the first plan and advance through the list;
448 * when we get back to the end, we loop back to the first
449 * nonpartial plan. This assigns the non-partial plans first
450 * in order of descending cost and then spreads out the
451 * workers as evenly as possible across the remaining partial
453 * ----------------------------------------------------------------
456 choose_next_subplan_for_worker(AppendState *node)
458 ParallelAppendState *pstate = node->as_pstate;
459 Append *append = (Append *) node->ps.plan;
461 /* Backward scan is not supported by parallel-aware plans */
462 Assert(ScanDirectionIsForward(node->ps.state->es_direction));
464 LWLockAcquire(&pstate->pa_lock, LW_EXCLUSIVE);
466 /* Mark just-completed subplan as finished. */
467 if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
468 node->as_pstate->pa_finished[node->as_whichplan] = true;
470 /* If all the plans are already done, we have nothing to do */
471 if (pstate->pa_next_plan == INVALID_SUBPLAN_INDEX)
473 LWLockRelease(&pstate->pa_lock);
477 /* Loop until we find a subplan to execute. */
478 while (pstate->pa_finished[pstate->pa_next_plan])
480 if (pstate->pa_next_plan < node->as_nplans - 1)
482 /* Advance to next plan. */
483 pstate->pa_next_plan++;
485 else if (append->first_partial_plan < node->as_nplans)
487 /* Loop back to first partial plan. */
488 pstate->pa_next_plan = append->first_partial_plan;
492 /* At last plan, no partial plans, arrange to bail out. */
493 pstate->pa_next_plan = node->as_whichplan;
496 if (pstate->pa_next_plan == node->as_whichplan)
498 /* We've tried everything! */
499 pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
500 LWLockRelease(&pstate->pa_lock);
505 /* Pick the plan we found, and advance pa_next_plan one more time. */
506 node->as_whichplan = pstate->pa_next_plan++;
507 if (pstate->pa_next_plan >= node->as_nplans)
509 Assert(append->first_partial_plan < node->as_nplans);
510 pstate->pa_next_plan = append->first_partial_plan;
513 /* If non-partial, immediately mark as finished. */
514 if (node->as_whichplan < append->first_partial_plan)
515 node->as_pstate->pa_finished[node->as_whichplan] = true;
517 LWLockRelease(&pstate->pa_lock);