1 /*-------------------------------------------------------------------------
4 * routines to handle append nodes.
6 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/executor/nodeAppend.c
13 *-------------------------------------------------------------------------
16 * ExecInitAppend - initialize the append node
17 * ExecAppend - retrieve the next tuple from the node
18 * ExecEndAppend - shut down the append node
19 * ExecReScanAppend - rescan the append node
22 * Each append node contains a list of one or more subplans which
23 * must be iteratively processed (forwards or backwards).
24 * Tuples are retrieved by executing the 'whichplan'th subplan
25 * until the subplan stops returning tuples, at which point that
26 * plan is shut down and the next started up.
28 * Append nodes don't make use of their left and right
29 * subtrees, rather they maintain a list of subplans so
30 * a typical append node looks like this in the plan tree:
34 * Append -------+------+------+--- nil
39 * Append nodes are currently used for unions, and to support
40 * inheritance queries, where several relations need to be scanned.
41 * For example, in our standard person/student/employee/student-emp
42 * example, where student and employee inherit from person
43 * and student-emp inherits from student and employee, the
46 * select name from person
51 * Append -------+-------+--------+--------+
53 * nil nil Scan Scan Scan Scan
55 * person employee student student-emp
60 #include "executor/execdebug.h"
61 #include "executor/execPartition.h"
62 #include "executor/nodeAppend.h"
63 #include "miscadmin.h"
65 /* Shared state for parallel-aware Append. */
66 struct ParallelAppendState
68 LWLock pa_lock; /* mutual exclusion to choose next subplan */
69 int pa_next_plan; /* next plan to choose by any worker */
72 * pa_finished[i] should be true if no more workers should select subplan
73 * i. for a non-partial plan, this should be set to true as soon as a
74 * worker selects the plan; for a partial plan, it remains false until
75 * some worker executes the plan to completion.
77 bool pa_finished[FLEXIBLE_ARRAY_MEMBER];
80 #define INVALID_SUBPLAN_INDEX -1
81 #define NO_MATCHING_SUBPLANS -2
83 static TupleTableSlot *ExecAppend(PlanState *pstate);
84 static bool choose_next_subplan_locally(AppendState *node);
85 static bool choose_next_subplan_for_leader(AppendState *node);
86 static bool choose_next_subplan_for_worker(AppendState *node);
87 static void mark_invalid_subplans_as_finished(AppendState *node);
89 /* ----------------------------------------------------------------
92 * Begin all of the subscans of the append node.
94 * (This is potentially wasteful, since the entire result of the
95 * append node may not be scanned, but this way all of the
96 * structures get allocated in the executor's top level memory
97 * block instead of that of the call to ExecAppend.)
98 * ----------------------------------------------------------------
101 ExecInitAppend(Append *node, EState *estate, int eflags)
103 AppendState *appendstate = makeNode(AppendState);
104 PlanState **appendplanstates;
105 Bitmapset *validsubplans;
111 /* check for unsupported flags */
112 Assert(!(eflags & EXEC_FLAG_MARK));
115 * Lock the non-leaf tables in the partition tree controlled by this node.
116 * It's a no-op for non-partitioned parent tables.
118 ExecLockNonLeafAppendTables(node->partitioned_rels, estate);
121 * create new AppendState for our append node
123 appendstate->ps.plan = (Plan *) node;
124 appendstate->ps.state = estate;
125 appendstate->ps.ExecProcNode = ExecAppend;
127 /* Let choose_next_subplan_* function handle setting the first subplan */
128 appendstate->as_whichplan = INVALID_SUBPLAN_INDEX;
130 /* If run-time partition pruning is enabled, then set that up now */
131 if (node->part_prune_infos != NIL)
133 PartitionPruneState *prunestate;
135 ExecAssignExprContext(estate, &appendstate->ps);
137 prunestate = ExecSetupPartitionPruneState(&appendstate->ps,
138 node->part_prune_infos);
141 * When there are external params matching the partition key we may be
142 * able to prune away Append subplans now.
144 if (!bms_is_empty(prunestate->extparams))
146 /* Determine which subplans match the external params */
147 validsubplans = ExecFindInitialMatchingSubPlans(prunestate,
148 list_length(node->appendplans));
151 * If no subplans match the given parameters then we must handle
152 * this case in a special way. The problem here is that code in
153 * explain.c requires an Append to have at least one subplan in
154 * order for it to properly determine the Vars in that subplan's
155 * targetlist. We sidestep this issue by just initializing the
156 * first subplan and setting as_whichplan to NO_MATCHING_SUBPLANS
157 * to indicate that we don't need to scan any subnodes.
159 if (bms_is_empty(validsubplans))
161 appendstate->as_whichplan = NO_MATCHING_SUBPLANS;
163 /* Mark the first as valid so that it's initialized below */
164 validsubplans = bms_make_singleton(0);
167 nplans = bms_num_members(validsubplans);
171 /* We'll need to initialize all subplans */
172 nplans = list_length(node->appendplans);
173 validsubplans = bms_add_range(NULL, 0, nplans - 1);
177 * If there's no exec params then no further pruning can be done, we
178 * can just set the valid subplans to all remaining subplans.
180 if (bms_is_empty(prunestate->execparams))
181 appendstate->as_valid_subplans = bms_add_range(NULL, 0, nplans - 1);
183 appendstate->as_prune_state = prunestate;
188 nplans = list_length(node->appendplans);
191 * When run-time partition pruning is not enabled we can just mark all
192 * subplans as valid, they must also all be initialized.
194 appendstate->as_valid_subplans = validsubplans =
195 bms_add_range(NULL, 0, nplans - 1);
196 appendstate->as_prune_state = NULL;
200 * Initialize result tuple type and slot.
202 ExecInitResultTupleSlotTL(estate, &appendstate->ps);
204 appendplanstates = (PlanState **) palloc(nplans *
205 sizeof(PlanState *));
208 * call ExecInitNode on each of the valid plans to be executed and save
209 * the results into the appendplanstates array.
212 foreach(lc, node->appendplans)
214 if (bms_is_member(i, validsubplans))
216 Plan *initNode = (Plan *) lfirst(lc);
218 appendplanstates[j++] = ExecInitNode(initNode, estate, eflags);
223 appendstate->appendplans = appendplanstates;
224 appendstate->as_nplans = nplans;
227 * Miscellaneous initialization
230 appendstate->ps.ps_ProjInfo = NULL;
232 /* For parallel query, this will be overridden later. */
233 appendstate->choose_next_subplan = choose_next_subplan_locally;
238 /* ----------------------------------------------------------------
241 * Handles iteration over multiple subplans.
242 * ----------------------------------------------------------------
244 static TupleTableSlot *
245 ExecAppend(PlanState *pstate)
247 AppendState *node = castNode(AppendState, pstate);
249 if (node->as_whichplan < 0)
252 * If no subplan has been chosen, we must choose one before
255 if (node->as_whichplan == INVALID_SUBPLAN_INDEX &&
256 !node->choose_next_subplan(node))
257 return ExecClearTuple(node->ps.ps_ResultTupleSlot);
259 /* Nothing to do if there are no matching subplans */
260 else if (node->as_whichplan == NO_MATCHING_SUBPLANS)
261 return ExecClearTuple(node->ps.ps_ResultTupleSlot);
267 TupleTableSlot *result;
269 CHECK_FOR_INTERRUPTS();
272 * figure out which subplan we are currently processing
274 Assert(node->as_whichplan >= 0 && node->as_whichplan < node->as_nplans);
275 subnode = node->appendplans[node->as_whichplan];
278 * get a tuple from the subplan
280 result = ExecProcNode(subnode);
282 if (!TupIsNull(result))
285 * If the subplan gave us something then return it as-is. We do
286 * NOT make use of the result slot that was set up in
287 * ExecInitAppend; there's no need for it.
292 /* choose new subplan; if none, we're done */
293 if (!node->choose_next_subplan(node))
294 return ExecClearTuple(node->ps.ps_ResultTupleSlot);
298 /* ----------------------------------------------------------------
301 * Shuts down the subscans of the append node.
303 * Returns nothing of interest.
304 * ----------------------------------------------------------------
307 ExecEndAppend(AppendState *node)
309 PlanState **appendplans;
314 * get information from the node
316 appendplans = node->appendplans;
317 nplans = node->as_nplans;
320 * shut down each of the subscans
322 for (i = 0; i < nplans; i++)
323 ExecEndNode(appendplans[i]);
327 ExecReScanAppend(AppendState *node)
332 * If any of the parameters being used for partition pruning have changed,
333 * then we'd better unset the valid subplans so that they are reselected
334 * for the new parameter values.
336 if (node->as_prune_state &&
337 bms_overlap(node->ps.chgParam,
338 node->as_prune_state->execparams))
340 bms_free(node->as_valid_subplans);
341 node->as_valid_subplans = NULL;
344 for (i = 0; i < node->as_nplans; i++)
346 PlanState *subnode = node->appendplans[i];
349 * ExecReScan doesn't know about my subplans, so I have to do
350 * changed-parameter signaling myself.
352 if (node->ps.chgParam != NULL)
353 UpdateChangedParamSet(subnode, node->ps.chgParam);
356 * If chgParam of subnode is not null then plan will be re-scanned by
357 * first ExecProcNode.
359 if (subnode->chgParam == NULL)
363 /* Let choose_next_subplan_* function handle setting the first subplan */
364 node->as_whichplan = INVALID_SUBPLAN_INDEX;
367 /* ----------------------------------------------------------------
368 * Parallel Append Support
369 * ----------------------------------------------------------------
372 /* ----------------------------------------------------------------
375 * Compute the amount of space we'll need in the parallel
376 * query DSM, and inform pcxt->estimator about our needs.
377 * ----------------------------------------------------------------
380 ExecAppendEstimate(AppendState *node,
381 ParallelContext *pcxt)
384 add_size(offsetof(ParallelAppendState, pa_finished),
385 sizeof(bool) * node->as_nplans);
387 shm_toc_estimate_chunk(&pcxt->estimator, node->pstate_len);
388 shm_toc_estimate_keys(&pcxt->estimator, 1);
392 /* ----------------------------------------------------------------
393 * ExecAppendInitializeDSM
395 * Set up shared state for Parallel Append.
396 * ----------------------------------------------------------------
399 ExecAppendInitializeDSM(AppendState *node,
400 ParallelContext *pcxt)
402 ParallelAppendState *pstate;
404 pstate = shm_toc_allocate(pcxt->toc, node->pstate_len);
405 memset(pstate, 0, node->pstate_len);
406 LWLockInitialize(&pstate->pa_lock, LWTRANCHE_PARALLEL_APPEND);
407 shm_toc_insert(pcxt->toc, node->ps.plan->plan_node_id, pstate);
409 node->as_pstate = pstate;
410 node->choose_next_subplan = choose_next_subplan_for_leader;
413 /* ----------------------------------------------------------------
414 * ExecAppendReInitializeDSM
416 * Reset shared state before beginning a fresh scan.
417 * ----------------------------------------------------------------
420 ExecAppendReInitializeDSM(AppendState *node, ParallelContext *pcxt)
422 ParallelAppendState *pstate = node->as_pstate;
424 pstate->pa_next_plan = 0;
425 memset(pstate->pa_finished, 0, sizeof(bool) * node->as_nplans);
428 /* ----------------------------------------------------------------
429 * ExecAppendInitializeWorker
431 * Copy relevant information from TOC into planstate, and initialize
432 * whatever is required to choose and execute the optimal subplan.
433 * ----------------------------------------------------------------
436 ExecAppendInitializeWorker(AppendState *node, ParallelWorkerContext *pwcxt)
438 node->as_pstate = shm_toc_lookup(pwcxt->toc, node->ps.plan->plan_node_id, false);
439 node->choose_next_subplan = choose_next_subplan_for_worker;
442 /* ----------------------------------------------------------------
443 * choose_next_subplan_locally
445 * Choose next subplan for a non-parallel-aware Append,
446 * returning false if there are no more.
447 * ----------------------------------------------------------------
450 choose_next_subplan_locally(AppendState *node)
452 int whichplan = node->as_whichplan;
455 /* We should never be called when there are no subplans */
456 Assert(whichplan != NO_MATCHING_SUBPLANS);
459 * If first call then have the bms member function choose the first valid
460 * subplan by initializing whichplan to -1. If there happen to be no
461 * valid subplans then the bms member function will handle that by
462 * returning a negative number which will allow us to exit returning a
465 if (whichplan == INVALID_SUBPLAN_INDEX)
467 if (node->as_valid_subplans == NULL)
468 node->as_valid_subplans =
469 ExecFindMatchingSubPlans(node->as_prune_state);
474 /* Ensure whichplan is within the expected range */
475 Assert(whichplan >= -1 && whichplan <= node->as_nplans);
477 if (ScanDirectionIsForward(node->ps.state->es_direction))
478 nextplan = bms_next_member(node->as_valid_subplans, whichplan);
480 nextplan = bms_prev_member(node->as_valid_subplans, whichplan);
485 node->as_whichplan = nextplan;
490 /* ----------------------------------------------------------------
491 * choose_next_subplan_for_leader
493 * Try to pick a plan which doesn't commit us to doing much
494 * work locally, so that as much work as possible is done in
495 * the workers. Cheapest subplans are at the end.
496 * ----------------------------------------------------------------
499 choose_next_subplan_for_leader(AppendState *node)
501 ParallelAppendState *pstate = node->as_pstate;
502 Append *append = (Append *) node->ps.plan;
504 /* Backward scan is not supported by parallel-aware plans */
505 Assert(ScanDirectionIsForward(node->ps.state->es_direction));
507 /* We should never be called when there are no subplans */
508 Assert(node->as_whichplan != NO_MATCHING_SUBPLANS);
510 LWLockAcquire(&pstate->pa_lock, LW_EXCLUSIVE);
512 if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
514 /* Mark just-completed subplan as finished. */
515 node->as_pstate->pa_finished[node->as_whichplan] = true;
519 /* Start with last subplan. */
520 node->as_whichplan = node->as_nplans - 1;
523 * If we've yet to determine the valid subplans for these parameters
524 * then do so now. If run-time pruning is disabled then the valid
525 * subplans will always be set to all subplans.
527 if (node->as_valid_subplans == NULL)
529 node->as_valid_subplans =
530 ExecFindMatchingSubPlans(node->as_prune_state);
533 * Mark each invalid plan as finished to allow the loop below to
534 * select the first valid subplan.
536 mark_invalid_subplans_as_finished(node);
540 /* Loop until we find a subplan to execute. */
541 while (pstate->pa_finished[node->as_whichplan])
543 if (node->as_whichplan == 0)
545 pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
546 node->as_whichplan = INVALID_SUBPLAN_INDEX;
547 LWLockRelease(&pstate->pa_lock);
550 node->as_whichplan--;
553 /* If non-partial, immediately mark as finished. */
554 if (node->as_whichplan < append->first_partial_plan)
555 node->as_pstate->pa_finished[node->as_whichplan] = true;
557 LWLockRelease(&pstate->pa_lock);
562 /* ----------------------------------------------------------------
563 * choose_next_subplan_for_worker
565 * Choose next subplan for a parallel-aware Append, returning
566 * false if there are no more.
568 * We start from the first plan and advance through the list;
569 * when we get back to the end, we loop back to the first
570 * partial plan. This assigns the non-partial plans first in
571 * order of descending cost and then spreads out the workers
572 * as evenly as possible across the remaining partial plans.
573 * ----------------------------------------------------------------
576 choose_next_subplan_for_worker(AppendState *node)
578 ParallelAppendState *pstate = node->as_pstate;
579 Append *append = (Append *) node->ps.plan;
581 /* Backward scan is not supported by parallel-aware plans */
582 Assert(ScanDirectionIsForward(node->ps.state->es_direction));
584 /* We should never be called when there are no subplans */
585 Assert(node->as_whichplan != NO_MATCHING_SUBPLANS);
587 LWLockAcquire(&pstate->pa_lock, LW_EXCLUSIVE);
589 /* Mark just-completed subplan as finished. */
590 if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
591 node->as_pstate->pa_finished[node->as_whichplan] = true;
594 * If we've yet to determine the valid subplans for these parameters then
595 * do so now. If run-time pruning is disabled then the valid subplans
596 * will always be set to all subplans.
598 else if (node->as_valid_subplans == NULL)
600 node->as_valid_subplans =
601 ExecFindMatchingSubPlans(node->as_prune_state);
602 mark_invalid_subplans_as_finished(node);
605 /* If all the plans are already done, we have nothing to do */
606 if (pstate->pa_next_plan == INVALID_SUBPLAN_INDEX)
608 LWLockRelease(&pstate->pa_lock);
612 /* Save the plan from which we are starting the search. */
613 node->as_whichplan = pstate->pa_next_plan;
615 /* Loop until we find a subplan to execute. */
616 while (pstate->pa_finished[pstate->pa_next_plan])
618 if (pstate->pa_next_plan < node->as_nplans - 1)
620 /* Advance to next plan. */
621 pstate->pa_next_plan++;
623 else if (node->as_whichplan > append->first_partial_plan)
625 /* Loop back to first partial plan. */
626 pstate->pa_next_plan = append->first_partial_plan;
631 * At last plan, and either there are no partial plans or we've
632 * tried them all. Arrange to bail out.
634 pstate->pa_next_plan = node->as_whichplan;
637 if (pstate->pa_next_plan == node->as_whichplan)
639 /* We've tried everything! */
640 pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
641 LWLockRelease(&pstate->pa_lock);
646 /* Pick the plan we found, and advance pa_next_plan one more time. */
647 node->as_whichplan = pstate->pa_next_plan++;
648 if (pstate->pa_next_plan >= node->as_nplans)
650 if (append->first_partial_plan < node->as_nplans)
651 pstate->pa_next_plan = append->first_partial_plan;
655 * We have only non-partial plans, and we already chose the last
656 * one; so arrange for the other workers to immediately bail out.
658 pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
662 /* If non-partial, immediately mark as finished. */
663 if (node->as_whichplan < append->first_partial_plan)
664 node->as_pstate->pa_finished[node->as_whichplan] = true;
666 LWLockRelease(&pstate->pa_lock);
672 * mark_invalid_subplans_as_finished
673 * Marks the ParallelAppendState's pa_finished as true for each invalid
676 * This function should only be called for parallel Append with run-time
680 mark_invalid_subplans_as_finished(AppendState *node)
684 /* Only valid to call this while in parallel Append mode */
685 Assert(node->as_pstate);
687 /* Shouldn't have been called when run-time pruning is not enabled */
688 Assert(node->as_prune_state);
690 /* Nothing to do if all plans are valid */
691 if (bms_num_members(node->as_valid_subplans) == node->as_nplans)
694 /* Mark all non-valid plans as finished */
695 for (i = 0; i < node->as_nplans; i++)
697 if (!bms_is_member(i, node->as_valid_subplans))
698 node->as_pstate->pa_finished[i] = true;