]> granicus.if.org Git - postgresql/blobdiff - src/backend/executor/nodeAppend.c
Make some small planner API cleanups.
[postgresql] / src / backend / executor / nodeAppend.c
index 745269e07171418d132de5a72e7339a5e239e30a..f3be2429dbeb122d186e941d26b2094982441bdc 100644 (file)
@@ -3,17 +3,18 @@
  * nodeAppend.c
  *       routines to handle append nodes.
  *
- * Copyright (c) 1994, Regents of the University of California
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeAppend.c,v 1.20 1999/07/15 15:19:01 momjian Exp $
+ *       src/backend/executor/nodeAppend.c
  *
  *-------------------------------------------------------------------------
  */
 /* INTERFACE ROUTINES
  *             ExecInitAppend  - initialize the append node
- *             ExecProcAppend  - retrieve the next tuple from the node
+ *             ExecAppend              - retrieve the next tuple from the node
  *             ExecEndAppend   - shut down the append node
  *             ExecReScanAppend - rescan the append node
  *
  *                                /
  *                             Append -------+------+------+--- nil
  *                             /       \                 |              |              |
- *                       nil   nil              ...    ...    ...
+ *                       nil   nil              ...    ...    ...
  *                                                              subplans
  *
- *             Append nodes are currently used for unions, and to support inheritance
- *             queries, where several relations need to be scanned.
+ *             Append nodes are currently used for unions, and to support
+ *             inheritance queries, where several relations need to be scanned.
  *             For example, in our standard person/student/employee/student-emp
  *             example, where student and employee inherit from person
  *             and student-emp inherits from student and employee, the
  *             query:
  *
- *                             retrieve (e.name) from e in person*
+ *                             select name from person
  *
  *             generates the plan:
  *
  *                                                       |               |                |            |
  *                                                     person employee student student-emp
  */
-#include "postgres.h"
 
+#include "postgres.h"
 
-#include "access/heapam.h"
-#include "executor/executor.h"
 #include "executor/execdebug.h"
+#include "executor/execPartition.h"
 #include "executor/nodeAppend.h"
-#include "executor/nodeIndexscan.h"
-#include "parser/parsetree.h"  /* for rt_store() macro */
+#include "miscadmin.h"
+
+/* Shared state for parallel-aware Append. */
+struct ParallelAppendState
+{
+       LWLock          pa_lock;                /* mutual exclusion to choose next subplan */
+       int                     pa_next_plan;   /* next plan to choose by any worker */
+
+       /*
+        * pa_finished[i] should be true if no more workers should select subplan
+        * i.  for a non-partial plan, this should be set to true as soon as a
+        * worker selects the plan; for a partial plan, it remains false until
+        * some worker executes the plan to completion.
+        */
+       bool            pa_finished[FLEXIBLE_ARRAY_MEMBER];
+};
+
+#define INVALID_SUBPLAN_INDEX          -1
+#define NO_MATCHING_SUBPLANS           -2
 
-static bool exec_append_initialize_next(Append *node);
+static TupleTableSlot *ExecAppend(PlanState *pstate);
+static bool choose_next_subplan_locally(AppendState *node);
+static bool choose_next_subplan_for_leader(AppendState *node);
+static bool choose_next_subplan_for_worker(AppendState *node);
+static void mark_invalid_subplans_as_finished(AppendState *node);
 
 /* ----------------------------------------------------------------
- *             exec_append_initialize_next
+ *             ExecInitAppend
  *
- *             Sets up the append node state (i.e. the append state node)
- *             for the "next" scan.
+ *             Begin all of the subscans of the append node.
  *
- *             Returns t iff there is a "next" scan to process.
+ *        (This is potentially wasteful, since the entire result of the
+ *             append node may not be scanned, but this way all of the
+ *             structures get allocated in the executor's top level memory
+ *             block instead of that of the call to ExecAppend.)
  * ----------------------------------------------------------------
  */
-static bool
-exec_append_initialize_next(Append *node)
+AppendState *
+ExecInitAppend(Append *node, EState *estate, int eflags)
 {
-       EState     *estate;
-       AppendState *appendstate;
-       TupleTableSlot *result_slot;
-       List       *rangeTable;
-
-       int                     whichplan;
+       AppendState *appendstate = makeNode(AppendState);
+       PlanState **appendplanstates;
+       Bitmapset  *validsubplans;
        int                     nplans;
-       List       *rtables;
-       List       *rtable;
-       ResTarget  *rtentry;
+       int                     firstvalid;
+       int                     i,
+                               j;
+       ListCell   *lc;
+
+       /* check for unsupported flags */
+       Assert(!(eflags & EXEC_FLAG_MARK));
 
-       /* ----------------
-        *      get information from the append node
-        * ----------------
+       /*
+        * create new AppendState for our append node
         */
-       estate = node->plan.state;
-       appendstate = node->appendstate;
-       result_slot = appendstate->cstate.cs_ResultTupleSlot;
-       rangeTable = estate->es_range_table;
+       appendstate->ps.plan = (Plan *) node;
+       appendstate->ps.state = estate;
+       appendstate->ps.ExecProcNode = ExecAppend;
 
-       whichplan = appendstate->as_whichplan;
-       nplans = appendstate->as_nplans;
-       rtables = node->unionrtables;
-       rtable = node->inheritrtable;
+       /* Let choose_next_subplan_* function handle setting the first subplan */
+       appendstate->as_whichplan = INVALID_SUBPLAN_INDEX;
 
-       if (whichplan < 0)
+       /* If run-time partition pruning is enabled, then set that up now */
+       if (node->part_prune_info != NULL)
        {
-               /* ----------------
-                *              if scanning in reverse, we start at
-                *              the last scan in the list and then
-                *              proceed back to the first.. in any case
-                *              we inform ExecProcAppend that we are
-                *              at the end of the line by returning FALSE
-                * ----------------
+               PartitionPruneState *prunestate;
+
+               /* We may need an expression context to evaluate partition exprs */
+               ExecAssignExprContext(estate, &appendstate->ps);
+
+               /* Create the working data structure for pruning. */
+               prunestate = ExecCreatePartitionPruneState(&appendstate->ps,
+                                                                                                  node->part_prune_info);
+               appendstate->as_prune_state = prunestate;
+
+               /* Perform an initial partition prune, if required. */
+               if (prunestate->do_initial_prune)
+               {
+                       /* Determine which subplans survive initial pruning */
+                       validsubplans = ExecFindInitialMatchingSubPlans(prunestate,
+                                                                                                                       list_length(node->appendplans));
+
+                       /*
+                        * The case where no subplans survive pruning must be handled
+                        * specially.  The problem here is that code in explain.c requires
+                        * an Append to have at least one subplan in order for it to
+                        * properly determine the Vars in that subplan's targetlist.  We
+                        * sidestep this issue by just initializing the first subplan and
+                        * setting as_whichplan to NO_MATCHING_SUBPLANS to indicate that
+                        * we don't really need to scan any subnodes.
+                        */
+                       if (bms_is_empty(validsubplans))
+                       {
+                               appendstate->as_whichplan = NO_MATCHING_SUBPLANS;
+
+                               /* Mark the first as valid so that it's initialized below */
+                               validsubplans = bms_make_singleton(0);
+                       }
+
+                       nplans = bms_num_members(validsubplans);
+               }
+               else
+               {
+                       /* We'll need to initialize all subplans */
+                       nplans = list_length(node->appendplans);
+                       Assert(nplans > 0);
+                       validsubplans = bms_add_range(NULL, 0, nplans - 1);
+               }
+
+               /*
+                * If no runtime pruning is required, we can fill as_valid_subplans
+                * immediately, preventing later calls to ExecFindMatchingSubPlans.
                 */
-               appendstate->as_whichplan = 0;
-               return FALSE;
+               if (!prunestate->do_exec_prune)
+               {
+                       Assert(nplans > 0);
+                       appendstate->as_valid_subplans = bms_add_range(NULL, 0, nplans - 1);
+               }
+       }
+       else
+       {
+               nplans = list_length(node->appendplans);
 
+               /*
+                * When run-time partition pruning is not enabled we can just mark all
+                * subplans as valid; they must also all be initialized.
+                */
+               Assert(nplans > 0);
+               appendstate->as_valid_subplans = validsubplans =
+                       bms_add_range(NULL, 0, nplans - 1);
+               appendstate->as_prune_state = NULL;
        }
-       else if (whichplan >= nplans)
+
+       /*
+        * Initialize result tuple type and slot.
+        */
+       ExecInitResultTupleSlotTL(&appendstate->ps, &TTSOpsVirtual);
+
+       /* node returns slots from each of its subnodes, therefore not fixed */
+       appendstate->ps.resultopsset = true;
+       appendstate->ps.resultopsfixed = false;
+
+       appendplanstates = (PlanState **) palloc(nplans *
+                                                                                        sizeof(PlanState *));
+
+       /*
+        * call ExecInitNode on each of the valid plans to be executed and save
+        * the results into the appendplanstates array.
+        *
+        * While at it, find out the first valid partial plan.
+        */
+       j = i = 0;
+       firstvalid = nplans;
+       foreach(lc, node->appendplans)
+       {
+               if (bms_is_member(i, validsubplans))
+               {
+                       Plan       *initNode = (Plan *) lfirst(lc);
+
+                       /*
+                        * Record the lowest appendplans index which is a valid partial
+                        * plan.
+                        */
+                       if (i >= node->first_partial_plan && j < firstvalid)
+                               firstvalid = j;
+
+                       appendplanstates[j++] = ExecInitNode(initNode, estate, eflags);
+               }
+               i++;
+       }
+
+       appendstate->as_first_partial_plan = firstvalid;
+       appendstate->appendplans = appendplanstates;
+       appendstate->as_nplans = nplans;
+
+       /*
+        * Miscellaneous initialization
+        */
+
+       appendstate->ps.ps_ProjInfo = NULL;
+
+       /* For parallel query, this will be overridden later. */
+       appendstate->choose_next_subplan = choose_next_subplan_locally;
+
+       return appendstate;
+}
+
+/* ----------------------------------------------------------------
+ *        ExecAppend
+ *
+ *             Handles iteration over multiple subplans.
+ * ----------------------------------------------------------------
+ */
+static TupleTableSlot *
+ExecAppend(PlanState *pstate)
+{
+       AppendState *node = castNode(AppendState, pstate);
+
+       if (node->as_whichplan < 0)
        {
-               /* ----------------
-                *              as above, end the scan if we go beyond
-                *              the last scan in our list..
-                * ----------------
+               /*
+                * If no subplan has been chosen, we must choose one before
+                * proceeding.
                 */
-               appendstate->as_whichplan = nplans - 1;
-               return FALSE;
+               if (node->as_whichplan == INVALID_SUBPLAN_INDEX &&
+                       !node->choose_next_subplan(node))
+                       return ExecClearTuple(node->ps.ps_ResultTupleSlot);
 
+               /* Nothing to do if there are no matching subplans */
+               else if (node->as_whichplan == NO_MATCHING_SUBPLANS)
+                       return ExecClearTuple(node->ps.ps_ResultTupleSlot);
        }
-       else
+
+       for (;;)
        {
-               /* ----------------
-                *              initialize the scan
-                *              (and update the range table appropriately)
-                *                (doesn't this leave the range table hosed for anybody upstream
-                *                 of the Append node??? - jolly )
-                * ----------------
+               PlanState  *subnode;
+               TupleTableSlot *result;
+
+               CHECK_FOR_INTERRUPTS();
+
+               /*
+                * figure out which subplan we are currently processing
                 */
-               if (node->inheritrelid > 0)
-               {
-                       rtentry = nth(whichplan, rtable);
-                       Assert(rtentry != NULL);
+               Assert(node->as_whichplan >= 0 && node->as_whichplan < node->as_nplans);
+               subnode = node->appendplans[node->as_whichplan];
 
-                       rt_store(node->inheritrelid, rangeTable, rtentry);
-               }
-               else
-                       estate->es_range_table = nth(whichplan, rtables);
+               /*
+                * get a tuple from the subplan
+                */
+               result = ExecProcNode(subnode);
 
-               if (appendstate->as_junkFilter_list)
+               if (!TupIsNull(result))
                {
-                       estate->es_junkFilter = (JunkFilter *) nth(whichplan,
-                                                                               appendstate->as_junkFilter_list);
+                       /*
+                        * If the subplan gave us something then return it as-is. We do
+                        * NOT make use of the result slot that was set up in
+                        * ExecInitAppend; there's no need for it.
+                        */
+                       return result;
                }
-               if (appendstate->as_result_relation_info_list)
-               {
-                       estate->es_result_relation_info = (RelationInfo *) nth(whichplan,
-                                                         appendstate->as_result_relation_info_list);
-               }
-               result_slot->ttc_whichplan = whichplan;
 
-               return TRUE;
+               /* choose new subplan; if none, we're done */
+               if (!node->choose_next_subplan(node))
+                       return ExecClearTuple(node->ps.ps_ResultTupleSlot);
        }
 }
 
 /* ----------------------------------------------------------------
- *             ExecInitAppend
- *
- *             Begins all of the subscans of the append node, storing the
- *             scan structures in the 'initialized' vector of the append-state
- *             structure.
+ *             ExecEndAppend
  *
- *        (This is potentially wasteful, since the entire result of the
- *             append node may not be scanned, but this way all of the
- *             structures get allocated in the executor's top level memory
- *             block instead of that of the call to ExecProcAppend.)
+ *             Shuts down the subscans of the append node.
  *
- *             Returns the scan result of the first scan.
+ *             Returns nothing of interest.
  * ----------------------------------------------------------------
  */
-bool
-ExecInitAppend(Append *node, EState *estate, Plan *parent)
+void
+ExecEndAppend(AppendState *node)
 {
-       AppendState *appendstate;
+       PlanState **appendplans;
        int                     nplans;
-       List       *resultList = NULL;
-       List       *rtable;
-       List       *appendplans;
-       bool       *initialized;
        int                     i;
-       Plan       *initNode;
-       List       *junkList;
-       RelationInfo *es_rri = estate->es_result_relation_info;
-
-       /* ----------------
-        *      assign execution state to node and get information
-        *      for append state
-        * ----------------
-        */
-       node->plan.state = estate;
 
-       appendplans = node->appendplans;
-       nplans = length(appendplans);
-       rtable = node->inheritrtable;
-
-       CXT1_printf("ExecInitAppend: context is %d\n", CurrentMemoryContext);
-       initialized = (bool *) palloc(nplans * sizeof(bool));
-
-       /* ----------------
-        *      create new AppendState for our append node
-        * ----------------
+       /*
+        * get information from the node
         */
-       appendstate = makeNode(AppendState);
-       appendstate->as_whichplan = 0;
-       appendstate->as_nplans = nplans;
-       appendstate->as_initialized = initialized;
-       appendstate->as_rtentries = rtable;
-
-       node->appendstate = appendstate;
+       appendplans = node->appendplans;
+       nplans = node->as_nplans;
 
-       /* ----------------
-        *      Miscellanious initialization
-        *
-        *               +      assign node's base_id
-        *               +      assign debugging hooks
-        *
-        *      Append plans don't have expression contexts because they
-        *      never call ExecQual or ExecTargetList.
-        * ----------------
+       /*
+        * shut down each of the subscans
         */
-       ExecAssignNodeBaseInfo(estate, &appendstate->cstate, parent);
+       for (i = 0; i < nplans; i++)
+               ExecEndNode(appendplans[i]);
+}
 
-#define APPEND_NSLOTS 1
-       /* ----------------
-        *      append nodes still have Result slots, which hold pointers
-        *      to tuples, so we have to initialize them..
-        * ----------------
-        */
-       ExecInitResultTupleSlot(estate, &appendstate->cstate);
+void
+ExecReScanAppend(AppendState *node)
+{
+       int                     i;
 
        /*
-        * If the inherits rtentry is the result relation, we have to make a
-        * result relation info list for all inheritors so we can update their
-        * indices and put the result tuples in the right place etc.
-        *
-        * e.g. replace p (age = p.age + 1) from p in person*
+        * If any PARAM_EXEC Params used in pruning expressions have changed, then
+        * we'd better unset the valid subplans so that they are reselected for
+        * the new parameter values.
         */
-       if ((es_rri != (RelationInfo *) NULL) &&
-               (node->inheritrelid == es_rri->ri_RangeTableIndex))
+       if (node->as_prune_state &&
+               bms_overlap(node->ps.chgParam,
+                                       node->as_prune_state->execparamids))
        {
-               RelationInfo *rri;
-               List       *rtentryP;
-
-               foreach(rtentryP, rtable)
-               {
-                       Oid                     reloid;
-                       RangeTblEntry *rtentry = lfirst(rtentryP);
-
-                       reloid = rtentry->relid;
-                       rri = makeNode(RelationInfo);
-                       rri->ri_RangeTableIndex = es_rri->ri_RangeTableIndex;
-                       rri->ri_RelationDesc = heap_open(reloid);
-                       rri->ri_NumIndices = 0;
-                       rri->ri_IndexRelationDescs = NULL;      /* index descs */
-                       rri->ri_IndexRelationInfo = NULL;       /* index key info */
-
-                       resultList = lcons(rri, resultList);
-                       ExecOpenIndices(reloid, rri);
-               }
-               appendstate->as_result_relation_info_list = resultList;
+               bms_free(node->as_valid_subplans);
+               node->as_valid_subplans = NULL;
        }
-       /* ----------------
-        *      call ExecInitNode on each of the plans in our list
-        *      and save the results into the array "initialized"
-        * ----------------
-        */
-       junkList = NIL;
 
-       for (i = 0; i < nplans; i++)
+       for (i = 0; i < node->as_nplans; i++)
        {
-               JunkFilter *j;
-               List       *targetList;
-
-               /* ----------------
-                *      NOTE: we first modify range table in
-                *                exec_append_initialize_next() and
-                *                then initialize the subnode,
-                *                since it may use the range table.
-                * ----------------
+               PlanState  *subnode = node->appendplans[i];
+
+               /*
+                * ExecReScan doesn't know about my subplans, so I have to do
+                * changed-parameter signaling myself.
                 */
-               appendstate->as_whichplan = i;
-               exec_append_initialize_next(node);
-
-               initNode = (Plan *) nth(i, appendplans);
-               initialized[i] = ExecInitNode(initNode, estate, (Plan *) node);
-
-               /* ---------------
-                *      Each targetlist in the subplan may need its own junk filter
-                *
-                *      This is true only when the reln being replaced/deleted is
-                *      the one that we're looking at the subclasses of
-                * ---------------
+               if (node->ps.chgParam != NULL)
+                       UpdateChangedParamSet(subnode, node->ps.chgParam);
+
+               /*
+                * If chgParam of subnode is not null then plan will be re-scanned by
+                * first ExecProcNode.
                 */
-               if ((es_rri != (RelationInfo *) NULL) &&
-                       (node->inheritrelid == es_rri->ri_RangeTableIndex))
-               {
+               if (subnode->chgParam == NULL)
+                       ExecReScan(subnode);
+       }
 
-                       targetList = initNode->targetlist;
-                       j = (JunkFilter *) ExecInitJunkFilter(targetList);
-                       junkList = lappend(junkList, j);
-               }
+       /* Let choose_next_subplan_* function handle setting the first subplan */
+       node->as_whichplan = INVALID_SUBPLAN_INDEX;
+}
 
-       }
-       appendstate->as_junkFilter_list = junkList;
-       if (junkList != NIL)
-               estate->es_junkFilter = (JunkFilter *) lfirst(junkList);
+/* ----------------------------------------------------------------
+ *                                             Parallel Append Support
+ * ----------------------------------------------------------------
+ */
 
-       /* ----------------
-        *      initialize the return type from the appropriate subplan.
-        * ----------------
-        */
-       initNode = (Plan *) nth(0, appendplans);
-       ExecAssignResultType(&appendstate->cstate,
-/*                                              ExecGetExecTupDesc(initNode), */
-                                                ExecGetTupType(initNode));
-       appendstate->cstate.cs_ProjInfo = NULL;
-
-       /* ----------------
-        *      return the result from the first subplan's initialization
-        * ----------------
-        */
-       appendstate->as_whichplan = 0;
-       exec_append_initialize_next(node);
-#ifdef NOT_USED
-       result = (List *) initialized[0];
-#endif
-       return TRUE;
+/* ----------------------------------------------------------------
+ *             ExecAppendEstimate
+ *
+ *             Compute the amount of space we'll need in the parallel
+ *             query DSM, and inform pcxt->estimator about our needs.
+ * ----------------------------------------------------------------
+ */
+void
+ExecAppendEstimate(AppendState *node,
+                                  ParallelContext *pcxt)
+{
+       node->pstate_len =
+               add_size(offsetof(ParallelAppendState, pa_finished),
+                                sizeof(bool) * node->as_nplans);
+
+       shm_toc_estimate_chunk(&pcxt->estimator, node->pstate_len);
+       shm_toc_estimate_keys(&pcxt->estimator, 1);
 }
 
-int
-ExecCountSlotsAppend(Append *node)
+
+/* ----------------------------------------------------------------
+ *             ExecAppendInitializeDSM
+ *
+ *             Set up shared state for Parallel Append.
+ * ----------------------------------------------------------------
+ */
+void
+ExecAppendInitializeDSM(AppendState *node,
+                                               ParallelContext *pcxt)
 {
-       List       *plan;
-       List       *appendplans = node->appendplans;
-       int                     nSlots = 0;
+       ParallelAppendState *pstate;
+
+       pstate = shm_toc_allocate(pcxt->toc, node->pstate_len);
+       memset(pstate, 0, node->pstate_len);
+       LWLockInitialize(&pstate->pa_lock, LWTRANCHE_PARALLEL_APPEND);
+       shm_toc_insert(pcxt->toc, node->ps.plan->plan_node_id, pstate);
 
-       foreach(plan, appendplans)
-               nSlots += ExecCountSlotsNode((Plan *) lfirst(plan));
-       return nSlots + APPEND_NSLOTS;
+       node->as_pstate = pstate;
+       node->choose_next_subplan = choose_next_subplan_for_leader;
 }
 
 /* ----------------------------------------------------------------
- *        ExecProcAppend
+ *             ExecAppendReInitializeDSM
  *
- *             Handles the iteration over the multiple scans.
+ *             Reset shared state before beginning a fresh scan.
+ * ----------------------------------------------------------------
+ */
+void
+ExecAppendReInitializeDSM(AppendState *node, ParallelContext *pcxt)
+{
+       ParallelAppendState *pstate = node->as_pstate;
+
+       pstate->pa_next_plan = 0;
+       memset(pstate->pa_finished, 0, sizeof(bool) * node->as_nplans);
+}
+
+/* ----------------------------------------------------------------
+ *             ExecAppendInitializeWorker
  *
- *        NOTE: Can't call this ExecAppend, that name is used in execMain.l
+ *             Copy relevant information from TOC into planstate, and initialize
+ *             whatever is required to choose and execute the optimal subplan.
  * ----------------------------------------------------------------
  */
-TupleTableSlot *
-ExecProcAppend(Append *node)
+void
+ExecAppendInitializeWorker(AppendState *node, ParallelWorkerContext *pwcxt)
 {
-       EState     *estate;
-       AppendState *appendstate;
-
-       int                     whichplan;
-       List       *appendplans;
-       Plan       *subnode;
-       TupleTableSlot *result;
-       TupleTableSlot *result_slot;
-       ScanDirection direction;
-
-       /* ----------------
-        *      get information from the node
-        * ----------------
-        */
-       appendstate = node->appendstate;
-       estate = node->plan.state;
-       direction = estate->es_direction;
+       node->as_pstate = shm_toc_lookup(pwcxt->toc, node->ps.plan->plan_node_id, false);
+       node->choose_next_subplan = choose_next_subplan_for_worker;
+}
 
-       appendplans = node->appendplans;
-       whichplan = appendstate->as_whichplan;
-       result_slot = appendstate->cstate.cs_ResultTupleSlot;
+/* ----------------------------------------------------------------
+ *             choose_next_subplan_locally
+ *
+ *             Choose next subplan for a non-parallel-aware Append,
+ *             returning false if there are no more.
+ * ----------------------------------------------------------------
+ */
+static bool
+choose_next_subplan_locally(AppendState *node)
+{
+       int                     whichplan = node->as_whichplan;
+       int                     nextplan;
 
-       /* ----------------
-        *      figure out which subplan we are currently processing
-        * ----------------
+       /* We should never be called when there are no subplans */
+       Assert(whichplan != NO_MATCHING_SUBPLANS);
+
+       /*
+        * If first call then have the bms member function choose the first valid
+        * subplan by initializing whichplan to -1.  If there happen to be no
+        * valid subplans then the bms member function will handle that by
+        * returning a negative number which will allow us to exit returning a
+        * false value.
         */
-       subnode = (Plan *) nth(whichplan, appendplans);
+       if (whichplan == INVALID_SUBPLAN_INDEX)
+       {
+               if (node->as_valid_subplans == NULL)
+                       node->as_valid_subplans =
+                               ExecFindMatchingSubPlans(node->as_prune_state);
 
-       if (subnode == NULL)
-               elog(DEBUG, "ExecProcAppend: subnode is NULL");
+               whichplan = -1;
+       }
 
-       /* ----------------
-        *      get a tuple from the subplan
-        * ----------------
-        */
-       result = ExecProcNode(subnode, (Plan *) node);
+       /* Ensure whichplan is within the expected range */
+       Assert(whichplan >= -1 && whichplan <= node->as_nplans);
 
-       if (!TupIsNull(result))
-       {
-               /* ----------------
-                *      if the subplan gave us something then place a copy of
-                *      whatever we get into our result slot and return it, else..
-                * ----------------
-                */
-               return ExecStoreTuple(result->val,
-                                                         result_slot, result->ttc_buffer, false);
+       if (ScanDirectionIsForward(node->ps.state->es_direction))
+               nextplan = bms_next_member(node->as_valid_subplans, whichplan);
+       else
+               nextplan = bms_prev_member(node->as_valid_subplans, whichplan);
 
+       if (nextplan < 0)
+               return false;
+
+       node->as_whichplan = nextplan;
+
+       return true;
+}
+
+/* ----------------------------------------------------------------
+ *             choose_next_subplan_for_leader
+ *
+ *      Try to pick a plan which doesn't commit us to doing much
+ *      work locally, so that as much work as possible is done in
+ *      the workers.  Cheapest subplans are at the end.
+ * ----------------------------------------------------------------
+ */
+static bool
+choose_next_subplan_for_leader(AppendState *node)
+{
+       ParallelAppendState *pstate = node->as_pstate;
+
+       /* Backward scan is not supported by parallel-aware plans */
+       Assert(ScanDirectionIsForward(node->ps.state->es_direction));
+
+       /* We should never be called when there are no subplans */
+       Assert(node->as_whichplan != NO_MATCHING_SUBPLANS);
+
+       LWLockAcquire(&pstate->pa_lock, LW_EXCLUSIVE);
+
+       if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
+       {
+               /* Mark just-completed subplan as finished. */
+               node->as_pstate->pa_finished[node->as_whichplan] = true;
        }
        else
        {
-               /* ----------------
-                *      .. go on to the "next" subplan in the appropriate
-                *      direction and try processing again (recursively)
-                * ----------------
-                */
-               whichplan = appendstate->as_whichplan;
-
-               if (ScanDirectionIsForward(direction))
-                       appendstate->as_whichplan = whichplan + 1;
-               else
-                       appendstate->as_whichplan = whichplan - 1;
+               /* Start with last subplan. */
+               node->as_whichplan = node->as_nplans - 1;
 
-               /* ----------------
-                *      return something from next node or an empty slot
-                *      all of our subplans have been exhausted.
-                * ----------------
+               /*
+                * If we've yet to determine the valid subplans then do so now.  If
+                * run-time pruning is disabled then the valid subplans will always be
+                * set to all subplans.
                 */
-               if (exec_append_initialize_next(node))
+               if (node->as_valid_subplans == NULL)
                {
-                       ExecSetSlotDescriptorIsNew(result_slot, true);
-                       return ExecProcAppend(node);
+                       node->as_valid_subplans =
+                               ExecFindMatchingSubPlans(node->as_prune_state);
+
+                       /*
+                        * Mark each invalid plan as finished to allow the loop below to
+                        * select the first valid subplan.
+                        */
+                       mark_invalid_subplans_as_finished(node);
                }
-               else
-                       return ExecClearTuple(result_slot);
        }
+
+       /* Loop until we find a subplan to execute. */
+       while (pstate->pa_finished[node->as_whichplan])
+       {
+               if (node->as_whichplan == 0)
+               {
+                       pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
+                       node->as_whichplan = INVALID_SUBPLAN_INDEX;
+                       LWLockRelease(&pstate->pa_lock);
+                       return false;
+               }
+
+               /*
+                * We needn't pay attention to as_valid_subplans here as all invalid
+                * plans have been marked as finished.
+                */
+               node->as_whichplan--;
+       }
+
+       /* If non-partial, immediately mark as finished. */
+       if (node->as_whichplan < node->as_first_partial_plan)
+               node->as_pstate->pa_finished[node->as_whichplan] = true;
+
+       LWLockRelease(&pstate->pa_lock);
+
+       return true;
 }
 
 /* ----------------------------------------------------------------
- *             ExecEndAppend
+ *             choose_next_subplan_for_worker
  *
- *             Shuts down the subscans of the append node.
+ *             Choose next subplan for a parallel-aware Append, returning
+ *             false if there are no more.
  *
- *             Returns nothing of interest.
+ *             We start from the first plan and advance through the list;
+ *             when we get back to the end, we loop back to the first
+ *             partial plan.  This assigns the non-partial plans first in
+ *             order of descending cost and then spreads out the workers
+ *             as evenly as possible across the remaining partial plans.
  * ----------------------------------------------------------------
  */
-void
-ExecEndAppend(Append *node)
+static bool
+choose_next_subplan_for_worker(AppendState *node)
 {
-       AppendState *appendstate;
-       int                     nplans;
-       List       *appendplans;
-       bool       *initialized;
-       int                     i;
-       List       *resultRelationInfoList;
-       RelationInfo *resultRelationInfo;
+       ParallelAppendState *pstate = node->as_pstate;
 
-       /* ----------------
-        *      get information from the node
-        * ----------------
-        */
-       appendstate = node->appendstate;
-       appendplans = node->appendplans;
-       nplans = appendstate->as_nplans;
-       initialized = appendstate->as_initialized;
+       /* Backward scan is not supported by parallel-aware plans */
+       Assert(ScanDirectionIsForward(node->ps.state->es_direction));
+
+       /* We should never be called when there are no subplans */
+       Assert(node->as_whichplan != NO_MATCHING_SUBPLANS);
+
+       LWLockAcquire(&pstate->pa_lock, LW_EXCLUSIVE);
 
-       /* ----------------
-        *      shut down each of the subscans
-        * ----------------
+       /* Mark just-completed subplan as finished. */
+       if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
+               node->as_pstate->pa_finished[node->as_whichplan] = true;
+
+       /*
+        * If we've yet to determine the valid subplans then do so now.  If
+        * run-time pruning is disabled then the valid subplans will always be set
+        * to all subplans.
         */
-       for (i = 0; i < nplans; i++)
+       else if (node->as_valid_subplans == NULL)
        {
-               if (initialized[i] == TRUE)
-                       ExecEndNode((Plan *) nth(i, appendplans), (Plan *) node);
+               node->as_valid_subplans =
+                       ExecFindMatchingSubPlans(node->as_prune_state);
+               mark_invalid_subplans_as_finished(node);
        }
 
-       /* ----------------
-        *      close out the different result relations
-        * ----------------
-        */
-       resultRelationInfoList = appendstate->as_result_relation_info_list;
-       while (resultRelationInfoList != NIL)
+       /* If all the plans are already done, we have nothing to do */
+       if (pstate->pa_next_plan == INVALID_SUBPLAN_INDEX)
        {
-               Relation        resultRelationDesc;
+               LWLockRelease(&pstate->pa_lock);
+               return false;
+       }
+
+       /* Save the plan from which we are starting the search. */
+       node->as_whichplan = pstate->pa_next_plan;
+
+       /* Loop until we find a valid subplan to execute. */
+       while (pstate->pa_finished[pstate->pa_next_plan])
+       {
+               int                     nextplan;
+
+               nextplan = bms_next_member(node->as_valid_subplans,
+                                                                  pstate->pa_next_plan);
+               if (nextplan >= 0)
+               {
+                       /* Advance to the next valid plan. */
+                       pstate->pa_next_plan = nextplan;
+               }
+               else if (node->as_whichplan > node->as_first_partial_plan)
+               {
+                       /*
+                        * Try looping back to the first valid partial plan, if there is
+                        * one.  If there isn't, arrange to bail out below.
+                        */
+                       nextplan = bms_next_member(node->as_valid_subplans,
+                                                                          node->as_first_partial_plan - 1);
+                       pstate->pa_next_plan =
+                               nextplan < 0 ? node->as_whichplan : nextplan;
+               }
+               else
+               {
+                       /*
+                        * At last plan, and either there are no partial plans or we've
+                        * tried them all.  Arrange to bail out.
+                        */
+                       pstate->pa_next_plan = node->as_whichplan;
+               }
 
-               resultRelationInfo = (RelationInfo *) lfirst(resultRelationInfoList);
-               resultRelationDesc = resultRelationInfo->ri_RelationDesc;
-               heap_close(resultRelationDesc);
-               pfree(resultRelationInfo);
-               resultRelationInfoList = lnext(resultRelationInfoList);
+               if (pstate->pa_next_plan == node->as_whichplan)
+               {
+                       /* We've tried everything! */
+                       pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
+                       LWLockRelease(&pstate->pa_lock);
+                       return false;
+               }
        }
-       if (appendstate->as_result_relation_info_list)
-               pfree(appendstate->as_result_relation_info_list);
+
+       /* Pick the plan we found, and advance pa_next_plan one more time. */
+       node->as_whichplan = pstate->pa_next_plan;
+       pstate->pa_next_plan = bms_next_member(node->as_valid_subplans,
+                                                                                  pstate->pa_next_plan);
 
        /*
-        * XXX should free appendstate->as_rtentries  and
-        * appendstate->as_junkfilter_list here
+        * If there are no more valid plans then try setting the next plan to the
+        * first valid partial plan.
         */
+       if (pstate->pa_next_plan < 0)
+       {
+               int                     nextplan = bms_next_member(node->as_valid_subplans,
+                                                                                          node->as_first_partial_plan - 1);
+
+               if (nextplan >= 0)
+                       pstate->pa_next_plan = nextplan;
+               else
+               {
+                       /*
+                        * There are no valid partial plans, and we already chose the last
+                        * non-partial plan; so flag that there's nothing more for our
+                        * fellow workers to do.
+                        */
+                       pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
+               }
+       }
+
+       /* If non-partial, immediately mark as finished. */
+       if (node->as_whichplan < node->as_first_partial_plan)
+               node->as_pstate->pa_finished[node->as_whichplan] = true;
+
+       LWLockRelease(&pstate->pa_lock);
+
+       return true;
 }
-void
-ExecReScanAppend(Append *node, ExprContext *exprCtxt, Plan *parent)
+
+/*
+ * mark_invalid_subplans_as_finished
+ *             Marks the ParallelAppendState's pa_finished as true for each invalid
+ *             subplan.
+ *
+ * This function should only be called for parallel Append with run-time
+ * pruning enabled.
+ */
+static void
+mark_invalid_subplans_as_finished(AppendState *node)
 {
-       AppendState *appendstate = node->appendstate;
-       int                     nplans = length(node->appendplans);
        int                     i;
 
-       for (i = 0; i < nplans; i++)
-       {
-               Plan       *rescanNode;
+       /* Only valid to call this while in parallel Append mode */
+       Assert(node->as_pstate);
 
-               appendstate->as_whichplan = i;
-               rescanNode = (Plan *) nth(i, node->appendplans);
-               if (rescanNode->chgParam == NULL)
-               {
-                       exec_append_initialize_next(node);
-                       ExecReScan((Plan *) rescanNode, exprCtxt, (Plan *) node);
-               }
+       /* Shouldn't have been called when run-time pruning is not enabled */
+       Assert(node->as_prune_state);
+
+       /* Nothing to do if all plans are valid */
+       if (bms_num_members(node->as_valid_subplans) == node->as_nplans)
+               return;
+
+       /* Mark all non-valid plans as finished */
+       for (i = 0; i < node->as_nplans; i++)
+       {
+               if (!bms_is_member(i, node->as_valid_subplans))
+                       node->as_pstate->pa_finished[i] = true;
        }
-       appendstate->as_whichplan = 0;
-       exec_append_initialize_next(node);
 }