]> granicus.if.org Git - postgresql/blob - src/backend/executor/nodeAppend.c
Support partition pruning at execution time
[postgresql] / src / backend / executor / nodeAppend.c
1 /*-------------------------------------------------------------------------
2  *
3  * nodeAppend.c
4  *        routines to handle append nodes.
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        src/backend/executor/nodeAppend.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /* INTERFACE ROUTINES
16  *              ExecInitAppend  - initialize the append node
17  *              ExecAppend              - retrieve the next tuple from the node
18  *              ExecEndAppend   - shut down the append node
19  *              ExecReScanAppend - rescan the append node
20  *
21  *       NOTES
22  *              Each append node contains a list of one or more subplans which
23  *              must be iteratively processed (forwards or backwards).
24  *              Tuples are retrieved by executing the 'whichplan'th subplan
25  *              until the subplan stops returning tuples, at which point that
26  *              plan is shut down and the next started up.
27  *
28  *              Append nodes don't make use of their left and right
29  *              subtrees, rather they maintain a list of subplans so
30  *              a typical append node looks like this in the plan tree:
31  *
32  *                                 ...
33  *                                 /
34  *                              Append -------+------+------+--- nil
35  *                              /       \                 |              |              |
36  *                        nil   nil              ...    ...    ...
37  *                                                               subplans
38  *
39  *              Append nodes are currently used for unions, and to support
40  *              inheritance queries, where several relations need to be scanned.
41  *              For example, in our standard person/student/employee/student-emp
42  *              example, where student and employee inherit from person
43  *              and student-emp inherits from student and employee, the
44  *              query:
45  *
46  *                              select name from person
47  *
48  *              generates the plan:
49  *
50  *                                |
51  *                              Append -------+-------+--------+--------+
52  *                              /       \                 |               |                |            |
53  *                        nil   nil              Scan    Scan     Scan     Scan
54  *                                                        |               |                |            |
55  *                                                      person employee student student-emp
56  */
57
58 #include "postgres.h"
59
60 #include "executor/execdebug.h"
61 #include "executor/execPartition.h"
62 #include "executor/nodeAppend.h"
63 #include "miscadmin.h"
64
65 /* Shared state for parallel-aware Append. */
66 struct ParallelAppendState
67 {
68         LWLock          pa_lock;                /* mutual exclusion to choose next subplan */
69         int                     pa_next_plan;   /* next plan to choose by any worker */
70
71         /*
72          * pa_finished[i] should be true if no more workers should select subplan
73          * i.  for a non-partial plan, this should be set to true as soon as a
74          * worker selects the plan; for a partial plan, it remains false until
75          * some worker executes the plan to completion.
76          */
77         bool            pa_finished[FLEXIBLE_ARRAY_MEMBER];
78 };
79
80 #define INVALID_SUBPLAN_INDEX           -1
81 #define NO_MATCHING_SUBPLANS            -2
82
83 static TupleTableSlot *ExecAppend(PlanState *pstate);
84 static bool choose_next_subplan_locally(AppendState *node);
85 static bool choose_next_subplan_for_leader(AppendState *node);
86 static bool choose_next_subplan_for_worker(AppendState *node);
87 static void mark_invalid_subplans_as_finished(AppendState *node);
88
89 /* ----------------------------------------------------------------
90  *              ExecInitAppend
91  *
92  *              Begin all of the subscans of the append node.
93  *
94  *         (This is potentially wasteful, since the entire result of the
95  *              append node may not be scanned, but this way all of the
96  *              structures get allocated in the executor's top level memory
97  *              block instead of that of the call to ExecAppend.)
98  * ----------------------------------------------------------------
99  */
100 AppendState *
101 ExecInitAppend(Append *node, EState *estate, int eflags)
102 {
103         AppendState *appendstate = makeNode(AppendState);
104         PlanState **appendplanstates;
105         Bitmapset  *validsubplans;
106         int                     nplans;
107         int                     i,
108                                 j;
109         ListCell   *lc;
110
111         /* check for unsupported flags */
112         Assert(!(eflags & EXEC_FLAG_MARK));
113
114         /*
115          * Lock the non-leaf tables in the partition tree controlled by this node.
116          * It's a no-op for non-partitioned parent tables.
117          */
118         ExecLockNonLeafAppendTables(node->partitioned_rels, estate);
119
120         /*
121          * create new AppendState for our append node
122          */
123         appendstate->ps.plan = (Plan *) node;
124         appendstate->ps.state = estate;
125         appendstate->ps.ExecProcNode = ExecAppend;
126
127         /* Let choose_next_subplan_* function handle setting the first subplan */
128         appendstate->as_whichplan = INVALID_SUBPLAN_INDEX;
129
130         /* If run-time partition pruning is enabled, then set that up now */
131         if (node->part_prune_infos != NIL)
132         {
133                 PartitionPruneState *prunestate;
134
135                 ExecAssignExprContext(estate, &appendstate->ps);
136
137                 prunestate = ExecSetupPartitionPruneState(&appendstate->ps,
138                                                                                                   node->part_prune_infos);
139
140                 /*
141                  * When there are external params matching the partition key we may be
142                  * able to prune away Append subplans now.
143                  */
144                 if (!bms_is_empty(prunestate->extparams))
145                 {
146                         /* Determine which subplans match the external params */
147                         validsubplans = ExecFindInitialMatchingSubPlans(prunestate,
148                                                                                                                         list_length(node->appendplans));
149
150                         /*
151                          * If no subplans match the given parameters then we must handle
152                          * this case in a special way.  The problem here is that code in
153                          * explain.c requires an Append to have at least one subplan in
154                          * order for it to properly determine the Vars in that subplan's
155                          * targetlist.  We sidestep this issue by just initializing the
156                          * first subplan and setting as_whichplan to NO_MATCHING_SUBPLANS
157                          * to indicate that we don't need to scan any subnodes.
158                          */
159                         if (bms_is_empty(validsubplans))
160                         {
161                                 appendstate->as_whichplan = NO_MATCHING_SUBPLANS;
162
163                                 /* Mark the first as valid so that it's initialized below */
164                                 validsubplans = bms_make_singleton(0);
165                         }
166
167                         nplans = bms_num_members(validsubplans);
168                 }
169                 else
170                 {
171                         /* We'll need to initialize all subplans */
172                         nplans = list_length(node->appendplans);
173                         validsubplans = bms_add_range(NULL, 0, nplans - 1);
174                 }
175
176                 /*
177                  * If there's no exec params then no further pruning can be done, we
178                  * can just set the valid subplans to all remaining subplans.
179                  */
180                 if (bms_is_empty(prunestate->execparams))
181                         appendstate->as_valid_subplans = bms_add_range(NULL, 0, nplans - 1);
182
183                 appendstate->as_prune_state = prunestate;
184
185         }
186         else
187         {
188                 nplans = list_length(node->appendplans);
189
190                 /*
191                  * When run-time partition pruning is not enabled we can just mark all
192                  * subplans as valid, they must also all be initialized.
193                  */
194                 appendstate->as_valid_subplans = validsubplans =
195                         bms_add_range(NULL, 0, nplans - 1);
196                 appendstate->as_prune_state = NULL;
197         }
198
199         /*
200          * Initialize result tuple type and slot.
201          */
202         ExecInitResultTupleSlotTL(estate, &appendstate->ps);
203
204         appendplanstates = (PlanState **) palloc(nplans *
205                                                                                          sizeof(PlanState *));
206
207         /*
208          * call ExecInitNode on each of the valid plans to be executed and save
209          * the results into the appendplanstates array.
210          */
211         j = i = 0;
212         foreach(lc, node->appendplans)
213         {
214                 if (bms_is_member(i, validsubplans))
215                 {
216                         Plan       *initNode = (Plan *) lfirst(lc);
217
218                         appendplanstates[j++] = ExecInitNode(initNode, estate, eflags);
219                 }
220                 i++;
221         }
222
223         appendstate->appendplans = appendplanstates;
224         appendstate->as_nplans = nplans;
225
226         /*
227          * Miscellaneous initialization
228          */
229
230         appendstate->ps.ps_ProjInfo = NULL;
231
232         /* For parallel query, this will be overridden later. */
233         appendstate->choose_next_subplan = choose_next_subplan_locally;
234
235         return appendstate;
236 }
237
238 /* ----------------------------------------------------------------
239  *         ExecAppend
240  *
241  *              Handles iteration over multiple subplans.
242  * ----------------------------------------------------------------
243  */
244 static TupleTableSlot *
245 ExecAppend(PlanState *pstate)
246 {
247         AppendState *node = castNode(AppendState, pstate);
248
249         if (node->as_whichplan < 0)
250         {
251                 /*
252                  * If no subplan has been chosen, we must choose one before
253                  * proceeding.
254                  */
255                 if (node->as_whichplan == INVALID_SUBPLAN_INDEX &&
256                         !node->choose_next_subplan(node))
257                         return ExecClearTuple(node->ps.ps_ResultTupleSlot);
258
259                 /* Nothing to do if there are no matching subplans */
260                 else if (node->as_whichplan == NO_MATCHING_SUBPLANS)
261                         return ExecClearTuple(node->ps.ps_ResultTupleSlot);
262         }
263
264         for (;;)
265         {
266                 PlanState  *subnode;
267                 TupleTableSlot *result;
268
269                 CHECK_FOR_INTERRUPTS();
270
271                 /*
272                  * figure out which subplan we are currently processing
273                  */
274                 Assert(node->as_whichplan >= 0 && node->as_whichplan < node->as_nplans);
275                 subnode = node->appendplans[node->as_whichplan];
276
277                 /*
278                  * get a tuple from the subplan
279                  */
280                 result = ExecProcNode(subnode);
281
282                 if (!TupIsNull(result))
283                 {
284                         /*
285                          * If the subplan gave us something then return it as-is. We do
286                          * NOT make use of the result slot that was set up in
287                          * ExecInitAppend; there's no need for it.
288                          */
289                         return result;
290                 }
291
292                 /* choose new subplan; if none, we're done */
293                 if (!node->choose_next_subplan(node))
294                         return ExecClearTuple(node->ps.ps_ResultTupleSlot);
295         }
296 }
297
298 /* ----------------------------------------------------------------
299  *              ExecEndAppend
300  *
301  *              Shuts down the subscans of the append node.
302  *
303  *              Returns nothing of interest.
304  * ----------------------------------------------------------------
305  */
306 void
307 ExecEndAppend(AppendState *node)
308 {
309         PlanState **appendplans;
310         int                     nplans;
311         int                     i;
312
313         /*
314          * get information from the node
315          */
316         appendplans = node->appendplans;
317         nplans = node->as_nplans;
318
319         /*
320          * shut down each of the subscans
321          */
322         for (i = 0; i < nplans; i++)
323                 ExecEndNode(appendplans[i]);
324 }
325
326 void
327 ExecReScanAppend(AppendState *node)
328 {
329         int                     i;
330
331         /*
332          * If any of the parameters being used for partition pruning have changed,
333          * then we'd better unset the valid subplans so that they are reselected
334          * for the new parameter values.
335          */
336         if (node->as_prune_state &&
337                 bms_overlap(node->ps.chgParam,
338                                         node->as_prune_state->execparams))
339         {
340                 bms_free(node->as_valid_subplans);
341                 node->as_valid_subplans = NULL;
342         }
343
344         for (i = 0; i < node->as_nplans; i++)
345         {
346                 PlanState  *subnode = node->appendplans[i];
347
348                 /*
349                  * ExecReScan doesn't know about my subplans, so I have to do
350                  * changed-parameter signaling myself.
351                  */
352                 if (node->ps.chgParam != NULL)
353                         UpdateChangedParamSet(subnode, node->ps.chgParam);
354
355                 /*
356                  * If chgParam of subnode is not null then plan will be re-scanned by
357                  * first ExecProcNode.
358                  */
359                 if (subnode->chgParam == NULL)
360                         ExecReScan(subnode);
361         }
362
363         /* Let choose_next_subplan_* function handle setting the first subplan */
364         node->as_whichplan = INVALID_SUBPLAN_INDEX;
365 }
366
367 /* ----------------------------------------------------------------
368  *                                              Parallel Append Support
369  * ----------------------------------------------------------------
370  */
371
372 /* ----------------------------------------------------------------
373  *              ExecAppendEstimate
374  *
375  *              Compute the amount of space we'll need in the parallel
376  *              query DSM, and inform pcxt->estimator about our needs.
377  * ----------------------------------------------------------------
378  */
379 void
380 ExecAppendEstimate(AppendState *node,
381                                    ParallelContext *pcxt)
382 {
383         node->pstate_len =
384                 add_size(offsetof(ParallelAppendState, pa_finished),
385                                  sizeof(bool) * node->as_nplans);
386
387         shm_toc_estimate_chunk(&pcxt->estimator, node->pstate_len);
388         shm_toc_estimate_keys(&pcxt->estimator, 1);
389 }
390
391
392 /* ----------------------------------------------------------------
393  *              ExecAppendInitializeDSM
394  *
395  *              Set up shared state for Parallel Append.
396  * ----------------------------------------------------------------
397  */
398 void
399 ExecAppendInitializeDSM(AppendState *node,
400                                                 ParallelContext *pcxt)
401 {
402         ParallelAppendState *pstate;
403
404         pstate = shm_toc_allocate(pcxt->toc, node->pstate_len);
405         memset(pstate, 0, node->pstate_len);
406         LWLockInitialize(&pstate->pa_lock, LWTRANCHE_PARALLEL_APPEND);
407         shm_toc_insert(pcxt->toc, node->ps.plan->plan_node_id, pstate);
408
409         node->as_pstate = pstate;
410         node->choose_next_subplan = choose_next_subplan_for_leader;
411 }
412
413 /* ----------------------------------------------------------------
414  *              ExecAppendReInitializeDSM
415  *
416  *              Reset shared state before beginning a fresh scan.
417  * ----------------------------------------------------------------
418  */
419 void
420 ExecAppendReInitializeDSM(AppendState *node, ParallelContext *pcxt)
421 {
422         ParallelAppendState *pstate = node->as_pstate;
423
424         pstate->pa_next_plan = 0;
425         memset(pstate->pa_finished, 0, sizeof(bool) * node->as_nplans);
426 }
427
428 /* ----------------------------------------------------------------
429  *              ExecAppendInitializeWorker
430  *
431  *              Copy relevant information from TOC into planstate, and initialize
432  *              whatever is required to choose and execute the optimal subplan.
433  * ----------------------------------------------------------------
434  */
435 void
436 ExecAppendInitializeWorker(AppendState *node, ParallelWorkerContext *pwcxt)
437 {
438         node->as_pstate = shm_toc_lookup(pwcxt->toc, node->ps.plan->plan_node_id, false);
439         node->choose_next_subplan = choose_next_subplan_for_worker;
440 }
441
442 /* ----------------------------------------------------------------
443  *              choose_next_subplan_locally
444  *
445  *              Choose next subplan for a non-parallel-aware Append,
446  *              returning false if there are no more.
447  * ----------------------------------------------------------------
448  */
449 static bool
450 choose_next_subplan_locally(AppendState *node)
451 {
452         int                     whichplan = node->as_whichplan;
453         int                     nextplan;
454
455         /* We should never be called when there are no subplans */
456         Assert(whichplan != NO_MATCHING_SUBPLANS);
457
458         /*
459          * If first call then have the bms member function choose the first valid
460          * subplan by initializing whichplan to -1.  If there happen to be no
461          * valid subplans then the bms member function will handle that by
462          * returning a negative number which will allow us to exit returning a
463          * false value.
464          */
465         if (whichplan == INVALID_SUBPLAN_INDEX)
466         {
467                 if (node->as_valid_subplans == NULL)
468                         node->as_valid_subplans =
469                                 ExecFindMatchingSubPlans(node->as_prune_state);
470
471                 whichplan = -1;
472         }
473
474         /* Ensure whichplan is within the expected range */
475         Assert(whichplan >= -1 && whichplan <= node->as_nplans);
476
477         if (ScanDirectionIsForward(node->ps.state->es_direction))
478                 nextplan = bms_next_member(node->as_valid_subplans, whichplan);
479         else
480                 nextplan = bms_prev_member(node->as_valid_subplans, whichplan);
481
482         if (nextplan < 0)
483                 return false;
484
485         node->as_whichplan = nextplan;
486
487         return true;
488 }
489
490 /* ----------------------------------------------------------------
491  *              choose_next_subplan_for_leader
492  *
493  *      Try to pick a plan which doesn't commit us to doing much
494  *      work locally, so that as much work as possible is done in
495  *      the workers.  Cheapest subplans are at the end.
496  * ----------------------------------------------------------------
497  */
498 static bool
499 choose_next_subplan_for_leader(AppendState *node)
500 {
501         ParallelAppendState *pstate = node->as_pstate;
502         Append     *append = (Append *) node->ps.plan;
503
504         /* Backward scan is not supported by parallel-aware plans */
505         Assert(ScanDirectionIsForward(node->ps.state->es_direction));
506
507         /* We should never be called when there are no subplans */
508         Assert(node->as_whichplan != NO_MATCHING_SUBPLANS);
509
510         LWLockAcquire(&pstate->pa_lock, LW_EXCLUSIVE);
511
512         if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
513         {
514                 /* Mark just-completed subplan as finished. */
515                 node->as_pstate->pa_finished[node->as_whichplan] = true;
516         }
517         else
518         {
519                 /* Start with last subplan. */
520                 node->as_whichplan = node->as_nplans - 1;
521
522                 /*
523                  * If we've yet to determine the valid subplans for these parameters
524                  * then do so now.  If run-time pruning is disabled then the valid
525                  * subplans will always be set to all subplans.
526                  */
527                 if (node->as_valid_subplans == NULL)
528                 {
529                         node->as_valid_subplans =
530                                 ExecFindMatchingSubPlans(node->as_prune_state);
531
532                         /*
533                          * Mark each invalid plan as finished to allow the loop below to
534                          * select the first valid subplan.
535                          */
536                         mark_invalid_subplans_as_finished(node);
537                 }
538         }
539
540         /* Loop until we find a subplan to execute. */
541         while (pstate->pa_finished[node->as_whichplan])
542         {
543                 if (node->as_whichplan == 0)
544                 {
545                         pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
546                         node->as_whichplan = INVALID_SUBPLAN_INDEX;
547                         LWLockRelease(&pstate->pa_lock);
548                         return false;
549                 }
550                 node->as_whichplan--;
551         }
552
553         /* If non-partial, immediately mark as finished. */
554         if (node->as_whichplan < append->first_partial_plan)
555                 node->as_pstate->pa_finished[node->as_whichplan] = true;
556
557         LWLockRelease(&pstate->pa_lock);
558
559         return true;
560 }
561
562 /* ----------------------------------------------------------------
563  *              choose_next_subplan_for_worker
564  *
565  *              Choose next subplan for a parallel-aware Append, returning
566  *              false if there are no more.
567  *
568  *              We start from the first plan and advance through the list;
569  *              when we get back to the end, we loop back to the first
570  *              partial plan.  This assigns the non-partial plans first in
571  *              order of descending cost and then spreads out the workers
572  *              as evenly as possible across the remaining partial plans.
573  * ----------------------------------------------------------------
574  */
575 static bool
576 choose_next_subplan_for_worker(AppendState *node)
577 {
578         ParallelAppendState *pstate = node->as_pstate;
579         Append     *append = (Append *) node->ps.plan;
580
581         /* Backward scan is not supported by parallel-aware plans */
582         Assert(ScanDirectionIsForward(node->ps.state->es_direction));
583
584         /* We should never be called when there are no subplans */
585         Assert(node->as_whichplan != NO_MATCHING_SUBPLANS);
586
587         LWLockAcquire(&pstate->pa_lock, LW_EXCLUSIVE);
588
589         /* Mark just-completed subplan as finished. */
590         if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
591                 node->as_pstate->pa_finished[node->as_whichplan] = true;
592
593         /*
594          * If we've yet to determine the valid subplans for these parameters then
595          * do so now.  If run-time pruning is disabled then the valid subplans
596          * will always be set to all subplans.
597          */
598         else if (node->as_valid_subplans == NULL)
599         {
600                 node->as_valid_subplans =
601                         ExecFindMatchingSubPlans(node->as_prune_state);
602                 mark_invalid_subplans_as_finished(node);
603         }
604
605         /* If all the plans are already done, we have nothing to do */
606         if (pstate->pa_next_plan == INVALID_SUBPLAN_INDEX)
607         {
608                 LWLockRelease(&pstate->pa_lock);
609                 return false;
610         }
611
612         /* Save the plan from which we are starting the search. */
613         node->as_whichplan = pstate->pa_next_plan;
614
615         /* Loop until we find a subplan to execute. */
616         while (pstate->pa_finished[pstate->pa_next_plan])
617         {
618                 if (pstate->pa_next_plan < node->as_nplans - 1)
619                 {
620                         /* Advance to next plan. */
621                         pstate->pa_next_plan++;
622                 }
623                 else if (node->as_whichplan > append->first_partial_plan)
624                 {
625                         /* Loop back to first partial plan. */
626                         pstate->pa_next_plan = append->first_partial_plan;
627                 }
628                 else
629                 {
630                         /*
631                          * At last plan, and either there are no partial plans or we've
632                          * tried them all.  Arrange to bail out.
633                          */
634                         pstate->pa_next_plan = node->as_whichplan;
635                 }
636
637                 if (pstate->pa_next_plan == node->as_whichplan)
638                 {
639                         /* We've tried everything! */
640                         pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
641                         LWLockRelease(&pstate->pa_lock);
642                         return false;
643                 }
644         }
645
646         /* Pick the plan we found, and advance pa_next_plan one more time. */
647         node->as_whichplan = pstate->pa_next_plan++;
648         if (pstate->pa_next_plan >= node->as_nplans)
649         {
650                 if (append->first_partial_plan < node->as_nplans)
651                         pstate->pa_next_plan = append->first_partial_plan;
652                 else
653                 {
654                         /*
655                          * We have only non-partial plans, and we already chose the last
656                          * one; so arrange for the other workers to immediately bail out.
657                          */
658                         pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
659                 }
660         }
661
662         /* If non-partial, immediately mark as finished. */
663         if (node->as_whichplan < append->first_partial_plan)
664                 node->as_pstate->pa_finished[node->as_whichplan] = true;
665
666         LWLockRelease(&pstate->pa_lock);
667
668         return true;
669 }
670
671 /*
672  * mark_invalid_subplans_as_finished
673  *              Marks the ParallelAppendState's pa_finished as true for each invalid
674  *              subplan.
675  *
676  * This function should only be called for parallel Append with run-time
677  * pruning enabled.
678  */
679 static void
680 mark_invalid_subplans_as_finished(AppendState *node)
681 {
682         int                     i;
683
684         /* Only valid to call this while in parallel Append mode */
685         Assert(node->as_pstate);
686
687         /* Shouldn't have been called when run-time pruning is not enabled */
688         Assert(node->as_prune_state);
689
690         /* Nothing to do if all plans are valid */
691         if (bms_num_members(node->as_valid_subplans) == node->as_nplans)
692                 return;
693
694         /* Mark all non-valid plans as finished */
695         for (i = 0; i < node->as_nplans; i++)
696         {
697                 if (!bms_is_member(i, node->as_valid_subplans))
698                         node->as_pstate->pa_finished[i] = true;
699         }
700 }