Faster expression evaluation and targetlist projection.

[postgresql] / src / include / nodes / execnodes.h
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index fec07b8e426e479e10511404bbd198bae7e26282..ff428951186007083f9bb35d4be6a57bdfb8e760 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -4,7 +4,7 @@
   *       definitions for executor state nodes
   *
   *
- * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * src/include/nodes/execnodes.h
@@ -16,12 +16,84 @@
  
  #include "access/genam.h"
  #include "access/heapam.h"
+#include "access/tupconvert.h"
  #include "executor/instrument.h"
+#include "lib/pairingheap.h"
  #include "nodes/params.h"
  #include "nodes/plannodes.h"
+#include "utils/hsearch.h"
  #include "utils/reltrigger.h"
  #include "utils/sortsupport.h"
  #include "utils/tuplestore.h"
+#include "utils/tuplesort.h"
+#include "nodes/tidbitmap.h"
+#include "storage/condition_variable.h"
+
+
+/* ----------------
+ *             ExprState node
+ *
+ * ExprState is the top-level node for expression evaluation.
+ * It contains instructions (in ->steps) to evaluate the expression.
+ * ----------------
+ */
+struct ExprState;                              /* forward references in this file */
+struct ExprContext;
+struct ExprEvalStep;                   /* avoid including execExpr.h everywhere */
+
+typedef Datum (*ExprStateEvalFunc) (struct ExprState *expression,
+                                                                                               struct ExprContext *econtext,
+                                                                                               bool *isNull);
+
+/* Bits in ExprState->flags (see also execExpr.h for private flag bits): */
+/* expression is for use with ExecQual() */
+#define EEO_FLAG_IS_QUAL                                       (1 << 0)
+
+typedef struct ExprState
+{
+       Node            tag;
+
+       uint8           flags;                  /* bitmask of EEO_FLAG_* bits, see above */
+
+       /*
+        * Storage for result value of a scalar expression, or for individual
+        * column results within expressions built by ExecBuildProjectionInfo().
+        */
+       bool            resnull;
+       Datum           resvalue;
+
+       /*
+        * If projecting a tuple result, this slot holds the result; else NULL.
+        */
+       TupleTableSlot *resultslot;
+
+       /*
+        * Instructions to compute expression's return value.
+        */
+       struct ExprEvalStep *steps;
+
+       /*
+        * Function that actually evaluates the expression.  This can be set to
+        * different values depending on the complexity of the expression.
+        */
+       ExprStateEvalFunc evalfunc;
+
+       /* original expression tree, for debugging only */
+       Expr       *expr;
+
+       /*
+        * XXX: following only needed during "compilation", could be thrown away.
+        */
+
+       int                     steps_len;              /* number of steps currently */
+       int                     steps_alloc;    /* allocated length of steps array */
+
+       Datum      *innermost_caseval;
+       bool       *innermost_casenull;
+
+       Datum      *innermost_domainval;
+       bool       *innermost_domainnull;
+} ExprState;
  
  
  /* ----------------
@@ -41,10 +113,15 @@
   *             ExclusionOps            Per-column exclusion operators, or NULL if none
   *             ExclusionProcs          Underlying function OIDs for ExclusionOps
   *             ExclusionStrats         Opclass strategy numbers for ExclusionOps
+ *             UniqueOps                       Theses are like Exclusion*, but for unique indexes
+ *             UniqueProcs
+ *             UniqueStrats
   *             Unique                          is it a unique index?
   *             ReadyForInserts         is it valid for inserts?
   *             Concurrent                      are we doing a concurrent index build?
   *             BrokenHotChain          did we detect any broken HOT chains?
+ *             AmCache                         private cache area for index AM
+ *             Context                         memory context holding this IndexInfo
   *
   * ii_Concurrent and ii_BrokenHotChain are used only during index build;
   * they're conventionally set to false otherwise.
@@ -58,14 +135,19 @@ typedef struct IndexInfo
         List       *ii_Expressions; /* list of Expr */
         List       *ii_ExpressionsState;        /* list of ExprState */
         List       *ii_Predicate;       /* list of Expr */
-       List       *ii_PredicateState;          /* list of ExprState */
+       ExprState  *ii_PredicateState;
         Oid                *ii_ExclusionOps;    /* array with one entry per column */
         Oid                *ii_ExclusionProcs;          /* array with one entry per column */
         uint16     *ii_ExclusionStrats;         /* array with one entry per column */
+       Oid                *ii_UniqueOps;       /* array with one entry per column */
+       Oid                *ii_UniqueProcs; /* array with one entry per column */
+       uint16     *ii_UniqueStrats;    /* array with one entry per column */
         bool            ii_Unique;
         bool            ii_ReadyForInserts;
         bool            ii_Concurrent;
         bool            ii_BrokenHotChain;
+       void       *ii_AmCache;
+       MemoryContext ii_Context;
  } IndexInfo;
  
  /* ----------------
@@ -88,14 +170,14 @@ typedef struct ExprContext_CB
   *
   *             This class holds the "current context" information
   *             needed to evaluate expressions for doing tuple qualifications
- *             and tuple projections.  For example, if an expression refers
+ *             and tuple projections.  For example, if an expression refers
   *             to an attribute in the current inner tuple then we need to know
   *             what the current inner tuple is and so we look at the expression
   *             context.
   *
   *     There are two memory contexts associated with an ExprContext:
   *     * ecxt_per_query_memory is a query-lifespan context, typically the same
- *       context the ExprContext node itself is allocated in.  This context
+ *       context the ExprContext node itself is allocated in.  This context
   *       can be used for purposes such as storing function call cache info.
   *     * ecxt_per_tuple_memory is a short-term context for expression results.
   *       As the name suggests, it will typically be reset once per tuple,
@@ -141,12 +223,13 @@ typedef struct ExprContext
         /* Link to containing EState (NULL if a standalone ExprContext) */
         struct EState *ecxt_estate;
  
-       /* Functions to call back when ExprContext is shut down */
+       /* Functions to call back when ExprContext is shut down or rescanned */
         ExprContext_CB *ecxt_callbacks;
  } ExprContext;
  
  /*
- * Set-result status returned by ExecEvalExpr()
+ * Set-result status used when evaluating functions potentially returning a
+ * set.
   */
  typedef enum
  {
@@ -197,53 +280,21 @@ typedef struct ReturnSetInfo
   *             that is, form new tuples by evaluation of targetlist expressions.
   *             Nodes which need to do projections create one of these.
   *
+ *             The target tuple slot is kept in ProjectionInfo->pi_state.resultslot.
   *             ExecProject() evaluates the tlist, forms a tuple, and stores it
- *             in the given slot.      Note that the result will be a "virtual" tuple
+ *             in the given slot.  Note that the result will be a "virtual" tuple
   *             unless ExecMaterializeSlot() is then called to force it to be
- *             converted to a physical tuple.  The slot must have a tupledesc
+ *             converted to a physical tuple.  The slot must have a tupledesc
   *             that matches the output of the tlist!
- *
- *             The planner very often produces tlists that consist entirely of
- *             simple Var references (lower levels of a plan tree almost always
- *             look like that).  And top-level tlists are often mostly Vars too.
- *             We therefore optimize execution of simple-Var tlist entries.
- *             The pi_targetlist list actually contains only the tlist entries that
- *             aren't simple Vars, while those that are Vars are processed using the
- *             varSlotOffsets/varNumbers/varOutputCols arrays.
- *
- *             The lastXXXVar fields are used to optimize fetching of fields from
- *             input tuples: they let us do a slot_getsomeattrs() call to ensure
- *             that all needed attributes are extracted in one pass.
- *
- *             targetlist              target list for projection (non-Var expressions only)
- *             exprContext             expression context in which to evaluate targetlist
- *             slot                    slot to place projection result in
- *             itemIsDone              workspace array for ExecProject
- *             directMap               true if varOutputCols[] is an identity map
- *             numSimpleVars   number of simple Vars found in original tlist
- *             varSlotOffsets  array indicating which slot each simple Var is from
- *             varNumbers              array containing input attr numbers of simple Vars
- *             varOutputCols   array containing output attr numbers of simple Vars
- *             lastInnerVar    highest attnum from inner tuple slot (0 if none)
- *             lastOuterVar    highest attnum from outer tuple slot (0 if none)
- *             lastScanVar             highest attnum from scan tuple slot (0 if none)
   * ----------------
   */
  typedef struct ProjectionInfo
  {
         NodeTag         type;
-       List       *pi_targetlist;
+       /* instructions to evaluate projection */
+       ExprState       pi_state;
+       /* expression context in which to evaluate expression */
         ExprContext *pi_exprContext;
-       TupleTableSlot *pi_slot;
-       ExprDoneCond *pi_itemIsDone;
-       bool            pi_directMap;
-       int                     pi_numSimpleVars;
-       int                *pi_varSlotOffsets;
-       int                *pi_varNumbers;
-       int                *pi_varOutputCols;
-       int                     pi_lastInnerVar;
-       int                     pi_lastOuterVar;
-       int                     pi_lastScanVar;
  } ProjectionInfo;
  
  /* ----------------
@@ -255,7 +306,7 @@ typedef struct ProjectionInfo
   *       in emitted tuples.  For example, when we do an UPDATE query,
   *       the planner adds a "junk" entry to the targetlist so that the tuples
   *       returned to ExecutePlan() contain an extra attribute: the ctid of
- *       the tuple to be updated.      This is needed to do the update, but we
+ *       the tuple to be updated.  This is needed to do the update, but we
   *       don't want the ctid to be part of the stored new tuple!  So, we
   *       apply a "junk filter" to remove the junk attributes and form the
   *       real output tuple.  The junkfilter code also provides routines to
@@ -270,7 +321,8 @@ typedef struct ProjectionInfo
   *       resultSlot:           tuple slot used to hold cleaned tuple.
   *       junkAttNo:            not used by junkfilter code.  Can be used by caller
   *                                             to remember the attno of a specific junk attribute
- *                                             (execMain.c stores the "ctid" attno here).
+ *                                             (nodeModifyTable.c keeps the "ctid" or "wholerow"
+ *                                             attno here).
   * ----------------
   */
  typedef struct JunkFilter
@@ -300,9 +352,18 @@ typedef struct JunkFilter
   *             TrigFunctions                   cached lookup info for trigger functions
   *             TrigWhenExprs                   array of trigger WHEN expr states
   *             TrigInstrument                  optional runtime measurements for triggers
+ *             FdwRoutine                              FDW callback functions, if foreign table
+ *             FdwState                                available to save private state of FDW
+ *             usesFdwDirectModify             true when modifying foreign table directly
+ *             WithCheckOptions                list of WithCheckOption's to be checked
+ *             WithCheckOptionExprs    list of WithCheckOption expr states
   *             ConstraintExprs                 array of constraint-checking expr states
   *             junkFilter                              for removing junk attributes from tuples
   *             projectReturning                for computing a RETURNING list
+ *             onConflictSetProj               for computing ON CONFLICT DO UPDATE SET
+ *             onConflictSetWhere              list of ON CONFLICT DO UPDATE exprs (qual)
+ *             PartitionCheck                  partition check expression
+ *             PartitionCheckExpr              partition check expression state
   * ----------------
   */
  typedef struct ResultRelInfo
@@ -315,11 +376,21 @@ typedef struct ResultRelInfo
         IndexInfo **ri_IndexRelationInfo;
         TriggerDesc *ri_TrigDesc;
         FmgrInfo   *ri_TrigFunctions;
-       List      **ri_TrigWhenExprs;
+       ExprState **ri_TrigWhenExprs;
         Instrumentation *ri_TrigInstrument;
-       List      **ri_ConstraintExprs;
+       struct FdwRoutine *ri_FdwRoutine;
+       void       *ri_FdwState;
+       bool            ri_usesFdwDirectModify;
+       List       *ri_WithCheckOptions;
+       List       *ri_WithCheckOptionExprs;
+       ExprState **ri_ConstraintExprs;
         JunkFilter *ri_junkFilter;
         ProjectionInfo *ri_projectReturning;
+       ProjectionInfo *ri_onConflictSetProj;
+       ExprState  *ri_onConflictSetWhere;
+       List       *ri_PartitionCheck;
+       ExprState  *ri_PartitionCheckExpr;
+       Relation        ri_PartitionRoot;
  } ResultRelInfo;
  
  /* ----------------
@@ -338,6 +409,7 @@ typedef struct EState
         Snapshot        es_crosscheck_snapshot; /* crosscheck time qual for RI */
         List       *es_range_table; /* List of RangeTblEntry */
         PlannedStmt *es_plannedstmt;    /* link to top of plan tree */
+       const char *es_sourceText;      /* Source text from QueryDesc */
  
         JunkFilter *es_junkFilter;      /* top-level junk filter, if any */
  
@@ -366,7 +438,7 @@ typedef struct EState
  
         List       *es_rowMarks;        /* List of ExecRowMarks */
  
-       uint32          es_processed;   /* # of tuples processed */
+       uint64          es_processed;   /* # of tuples processed */
         Oid                     es_lastoid;             /* last oid processed (by INSERT) */
  
         int                     es_top_eflags;  /* eflags passed to ExecutorStart */
@@ -388,7 +460,7 @@ typedef struct EState
  
         /*
          * These fields are for re-evaluating plan quals when an updated tuple is
-        * substituted in READ COMMITTED mode.  es_epqTuple[] contains tuples that
+        * substituted in READ COMMITTED mode.  es_epqTuple[] contains tuples that
          * scan plan nodes should return instead of whatever they'd normally
          * return, or NULL if nothing to return; es_epqTupleSet[] is true if a
          * particular array entry is valid; and es_epqScanDone[] is state to
@@ -398,35 +470,47 @@ typedef struct EState
         HeapTuple  *es_epqTuple;        /* array of EPQ substitute tuples */
         bool       *es_epqTupleSet; /* true if EPQ tuple is provided */
         bool       *es_epqScanDone; /* true if EPQ tuple has been fetched */
+
+       /* The per-query shared memory area to use for parallel execution. */
+       struct dsa_area   *es_query_dsa;
  } EState;
  
  
  /*
   * ExecRowMark -
- *        runtime representation of FOR UPDATE/SHARE clauses
+ *        runtime representation of FOR [KEY] UPDATE/SHARE clauses
   *
- * When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, we should have an
+ * When doing UPDATE, DELETE, or SELECT FOR [KEY] UPDATE/SHARE, we will have an
   * ExecRowMark for each non-target relation in the query (except inheritance
- * parent RTEs, which can be ignored at runtime).  See PlanRowMark for details
- * about most of the fields.  In addition to fields directly derived from
- * PlanRowMark, we store curCtid, which is used by the WHERE CURRENT OF code.
+ * parent RTEs, which can be ignored at runtime).  Virtual relations such as
+ * subqueries-in-FROM will have an ExecRowMark with relation == NULL.  See
+ * PlanRowMark for details about most of the fields.  In addition to fields
+ * directly derived from PlanRowMark, we store an activity flag (to denote
+ * inactive children of inheritance trees), curCtid, which is used by the
+ * WHERE CURRENT OF code, and ermExtra, which is available for use by the plan
+ * node that sources the relation (e.g., for a foreign table the FDW can use
+ * ermExtra to hold information).
   *
   * EState->es_rowMarks is a list of these structs.
   */
  typedef struct ExecRowMark
  {
         Relation        relation;               /* opened and suitably locked relation */
+       Oid                     relid;                  /* its OID (or InvalidOid, if subquery) */
         Index           rti;                    /* its range table index */
         Index           prti;                   /* parent range table index, if child */
         Index           rowmarkId;              /* unique identifier for resjunk columns */
         RowMarkType markType;           /* see enum in nodes/plannodes.h */
-       bool            noWait;                 /* NOWAIT option */
+       LockClauseStrength strength;    /* LockingClause's strength, or LCS_NONE */
+       LockWaitPolicy waitPolicy;      /* NOWAIT and SKIP LOCKED */
+       bool            ermActive;              /* is this mark relevant for current tuple? */
         ItemPointerData curCtid;        /* ctid of currently locked tuple, if any */
+       void       *ermExtra;           /* available for use by relation source node */
  } ExecRowMark;
  
  /*
   * ExecAuxRowMark -
- *        additional runtime representation of FOR UPDATE/SHARE clauses
+ *        additional runtime representation of FOR [KEY] UPDATE/SHARE clauses
   *
   * Each LockRows and ModifyTable node keeps a list of the rowmarks it needs to
   * deal with.  In addition to a pointer to the related entry in es_rowMarks,
@@ -466,14 +550,23 @@ typedef struct TupleHashTableData *TupleHashTable;
  
  typedef struct TupleHashEntryData
  {
-       /* firstTuple must be the first field in this struct! */
         MinimalTuple firstTuple;        /* copy of first tuple in this group */
-       /* there may be additional data beyond the end of this struct */
-} TupleHashEntryData;                  /* VARIABLE LENGTH STRUCT */
+       void       *additional;         /* user data */
+       uint32          status;                 /* hash status */
+       uint32          hash;                   /* hash value (cached) */
+} TupleHashEntryData;
+
+/* define paramters necessary to generate the tuple hash table interface */
+#define SH_PREFIX tuplehash
+#define SH_ELEMENT_TYPE TupleHashEntryData
+#define SH_KEY_TYPE MinimalTuple
+#define SH_SCOPE extern
+#define SH_DECLARE
+#include "lib/simplehash.h"
  
  typedef struct TupleHashTableData
  {
-       HTAB       *hashtab;            /* underlying dynahash table */
+       tuplehash_hash *hashtab;        /* underlying hash table */
         int                     numCols;                /* number of columns in lookup key */
         AttrNumber *keyColIdx;          /* attr numbers of key columns */
         FmgrInfo   *tab_hash_funcs; /* hash functions for table datatype(s) */
@@ -486,9 +579,10 @@ typedef struct TupleHashTableData
         TupleTableSlot *inputslot;      /* current input tuple's slot */
         FmgrInfo   *in_hash_funcs;      /* hash functions for input datatype(s) */
         FmgrInfo   *cur_eq_funcs;       /* equality functions for input vs. table */
+       uint32          hash_iv;                /* hash-function IV */
  }      TupleHashTableData;
  
-typedef HASH_SEQ_STATUS TupleHashIterator;
+typedef tuplehash_iterator TupleHashIterator;
  
  /*
   * Use InitTupleHashIterator/TermTupleHashIterator for a read/write scan.
@@ -496,89 +590,35 @@ typedef HASH_SEQ_STATUS TupleHashIterator;
   * explicit scan termination is needed).
   */
  #define InitTupleHashIterator(htable, iter) \
-       hash_seq_init(iter, (htable)->hashtab)
+       tuplehash_start_iterate(htable->hashtab, iter)
  #define TermTupleHashIterator(iter) \
-       hash_seq_term(iter)
+       ((void) 0)
  #define ResetTupleHashIterator(htable, iter) \
-       do { \
-               hash_freeze((htable)->hashtab); \
-               hash_seq_init(iter, (htable)->hashtab); \
-       } while (0)
-#define ScanTupleHashTable(iter) \
-       ((TupleHashEntry) hash_seq_search(iter))
+       InitTupleHashIterator(htable, iter)
+#define ScanTupleHashTable(htable, iter) \
+       tuplehash_iterate(htable->hashtab, iter)
  
  
  /* ----------------------------------------------------------------
- *                              Expression State Trees
- *
- * Each executable expression tree has a parallel ExprState tree.
- *
- * Unlike PlanState, there is not an exact one-for-one correspondence between
- * ExprState node types and Expr node types.  Many Expr node types have no
- * need for node-type-specific run-time state, and so they can use plain
- * ExprState or GenericExprState as their associated ExprState node type.
+ *                              Expression State Nodes
+ *
+ * Formerly, there was a separate executor expression state node corresponding
+ * to each node in a planned expression tree.  That's no longer the case; for
+ * common expression node types, all the execution info is embedded into
+ * step(s) in a single ExprState node.  But we still have a few executor state
+ * node types for selected expression node types, mostly those in which info
+ * has to be shared with other parts of the execution state tree.
   * ----------------------------------------------------------------
   */
  
-/* ----------------
- *             ExprState node
- *
- * ExprState is the common superclass for all ExprState-type nodes.
- *
- * It can also be instantiated directly for leaf Expr nodes that need no
- * local run-time state (such as Var, Const, or Param).
- *
- * To save on dispatch overhead, each ExprState node contains a function
- * pointer to the routine to execute to evaluate the node.
- * ----------------
- */
-
-typedef struct ExprState ExprState;
-
-typedef Datum (*ExprStateEvalFunc) (ExprState *expression,
-                                                                                               ExprContext *econtext,
-                                                                                               bool *isNull,
-                                                                                               ExprDoneCond *isDone);
-
-struct ExprState
-{
-       NodeTag         type;
-       Expr       *expr;                       /* associated Expr node */
-       ExprStateEvalFunc evalfunc; /* routine to run to execute node */
-};
-
-/* ----------------
- *             GenericExprState node
- *
- * This is used for Expr node types that need no local run-time state,
- * but have one child Expr node.
- * ----------------
- */
-typedef struct GenericExprState
-{
-       ExprState       xprstate;
-       ExprState  *arg;                        /* state of my child node */
-} GenericExprState;
-
-/* ----------------
- *             WholeRowVarExprState node
- * ----------------
- */
-typedef struct WholeRowVarExprState
-{
-       ExprState       xprstate;
-       struct PlanState *parent;       /* parent PlanState, or NULL if none */
-       JunkFilter *wrv_junkFilter; /* JunkFilter to remove resjunk cols */
-} WholeRowVarExprState;
-
  /* ----------------
   *             AggrefExprState node
   * ----------------
   */
  typedef struct AggrefExprState
  {
-       ExprState       xprstate;
-       List       *args;                       /* states of argument expressions */
+       NodeTag         type;
+       Aggref     *aggref;                     /* expression plan node */
         int                     aggno;                  /* ID number for agg within its plan node */
  } AggrefExprState;
  
@@ -588,49 +628,40 @@ typedef struct AggrefExprState
   */
  typedef struct WindowFuncExprState
  {
-       ExprState       xprstate;
-       List       *args;                       /* states of argument expressions */
+       NodeTag         type;
+       WindowFunc *wfunc;                      /* expression plan node */
+       List       *args;                       /* ExprStates for argument expressions */
+       ExprState  *aggfilter;          /* FILTER expression */
         int                     wfuncno;                /* ID number for wfunc within its plan node */
  } WindowFuncExprState;
  
-/* ----------------
- *             ArrayRefExprState node
- *
- * Note: array types can be fixed-length (typlen > 0), but only when the
- * element type is itself fixed-length.  Otherwise they are varlena structures
- * and have typlen = -1.  In any case, an array type is never pass-by-value.
- * ----------------
- */
-typedef struct ArrayRefExprState
-{
-       ExprState       xprstate;
-       List       *refupperindexpr;    /* states for child nodes */
-       List       *reflowerindexpr;
-       ExprState  *refexpr;
-       ExprState  *refassgnexpr;
-       int16           refattrlength;  /* typlen of array type */
-       int16           refelemlength;  /* typlen of the array element type */
-       bool            refelembyval;   /* is the element type pass-by-value? */
-       char            refelemalign;   /* typalign of the element type */
-} ArrayRefExprState;
  
  /* ----------------
- *             FuncExprState node
+ *             SetExprState node
   *
- * Although named for FuncExpr, this is also used for OpExpr, DistinctExpr,
- * and NullIf nodes; be careful to check what xprstate.expr is actually
- * pointing at!
+ * State for evaluating a potentially set-returning expression (like FuncExpr
+ * or OpExpr).  In some cases, like some of the expressions in ROWS FROM(...)
+ * the expression might not be a SRF, but nonetheless it uses the same
+ * machinery as SRFs; it will be treated as a SRF returning a single row.
   * ----------------
   */
-typedef struct FuncExprState
+typedef struct SetExprState
  {
-       ExprState       xprstate;
-       List       *args;                       /* states of argument expressions */
+       NodeTag         type;
+       Expr       *expr;                       /* expression plan node */
+       List       *args;                       /* ExprStates for argument expressions */
+
+       /*
+        * In ROWS FROM, functions can be inlined, removing the FuncExpr normally
+        * inside.  In such a case this is the compiled expression (which cannot
+        * return a set), which'll be evaluated using regular ExecEvalExpr().
+        */
+       ExprState  *elidedFuncState;
  
         /*
          * Function manager's lookup info for the target function.  If func.fn_oid
          * is InvalidOid, we haven't initialized it yet (nor any of the following
-        * fields).
+        * fields, except funcReturnsSet).
          */
         FmgrInfo        func;
  
@@ -644,12 +675,18 @@ typedef struct FuncExprState
  
         /*
          * In some cases we need to compute a tuple descriptor for the function's
-        * output.      If so, it's stored here.
+        * output.  If so, it's stored here.
          */
         TupleDesc       funcResultDesc;
         bool            funcReturnsTuple;               /* valid when funcResultDesc isn't
                                                                                  * NULL */
  
+       /*
+        * Remember whether the function is declared to return a set.  This is set
+        * by ExecInitExpr, and is valid even before the FmgrInfo is set up.
+        */
+       bool            funcReturnsSet;
+
         /*
          * setArgsValid is true when we are evaluating a set-returning function
          * that uses value-per-call mode and we are in the middle of a call
@@ -659,16 +696,9 @@ typedef struct FuncExprState
          */
         bool            setArgsValid;
  
-       /*
-        * Flag to remember whether we found a set-valued argument to the
-        * function. This causes the function result to be a set as well. Valid
-        * only when setArgsValid is true or funcResultStore isn't NULL.
-        */
-       bool            setHasSetArg;   /* some argument returns a set */
-
         /*
          * Flag to remember whether we have registered a shutdown callback for
-        * this FuncExprState.  We do so only if funcResultStore or setArgsValid
+        * this SetExprState.  We do so only if funcResultStore or setArgsValid
          * has been set at least once (since all the callback is for is to release
          * the tuplestore or clear setArgsValid).
          */
@@ -680,33 +710,7 @@ typedef struct FuncExprState
          * argument values between calls, when setArgsValid is true.
          */
         FunctionCallInfoData fcinfo_data;
-} FuncExprState;
-
-/* ----------------
- *             ScalarArrayOpExprState node
- *
- * This is a FuncExprState plus some additional data.
- * ----------------
- */
-typedef struct ScalarArrayOpExprState
-{
-       FuncExprState fxprstate;
-       /* Cached info about array element type */
-       Oid                     element_type;
-       int16           typlen;
-       bool            typbyval;
-       char            typalign;
-} ScalarArrayOpExprState;
-
-/* ----------------
- *             BoolExprState node
- * ----------------
- */
-typedef struct BoolExprState
-{
-       ExprState       xprstate;
-       List       *args;                       /* states of argument expression(s) */
-} BoolExprState;
+} SetExprState;
  
  /* ----------------
   *             SubPlanState node
@@ -714,8 +718,10 @@ typedef struct BoolExprState
   */
  typedef struct SubPlanState
  {
-       ExprState       xprstate;
+       NodeTag         type;
+       SubPlan    *subplan;            /* expression plan node */
         struct PlanState *planstate;    /* subselect plan's state tree */
+       struct PlanState *parent;       /* parent plan node's state tree */
         ExprState  *testexpr;           /* state of combining expression */
         List       *args;                       /* states of argument expression(s) */
         HeapTuple       curTuple;               /* copy of most recent tuple from subplan */
@@ -743,201 +749,18 @@ typedef struct SubPlanState
   */
  typedef struct AlternativeSubPlanState
  {
-       ExprState       xprstate;
-       List       *subplans;           /* states of alternative subplans */
+       NodeTag         type;
+       AlternativeSubPlan *subplan;    /* expression plan node */
+       List       *subplans;           /* SubPlanStates of alternative subplans */
         int                     active;                 /* list index of the one we're using */
  } AlternativeSubPlanState;
  
-/* ----------------
- *             FieldSelectState node
- * ----------------
- */
-typedef struct FieldSelectState
-{
-       ExprState       xprstate;
-       ExprState  *arg;                        /* input expression */
-       TupleDesc       argdesc;                /* tupdesc for most recent input */
-} FieldSelectState;
-
-/* ----------------
- *             FieldStoreState node
- * ----------------
- */
-typedef struct FieldStoreState
-{
-       ExprState       xprstate;
-       ExprState  *arg;                        /* input tuple value */
-       List       *newvals;            /* new value(s) for field(s) */
-       TupleDesc       argdesc;                /* tupdesc for most recent input */
-} FieldStoreState;
-
-/* ----------------
- *             CoerceViaIOState node
- * ----------------
- */
-typedef struct CoerceViaIOState
-{
-       ExprState       xprstate;
-       ExprState  *arg;                        /* input expression */
-       FmgrInfo        outfunc;                /* lookup info for source output function */
-       FmgrInfo        infunc;                 /* lookup info for result input function */
-       Oid                     intypioparam;   /* argument needed for input function */
-} CoerceViaIOState;
-
-/* ----------------
- *             ArrayCoerceExprState node
- * ----------------
- */
-typedef struct ArrayCoerceExprState
-{
-       ExprState       xprstate;
-       ExprState  *arg;                        /* input array value */
-       Oid                     resultelemtype; /* element type of result array */
-       FmgrInfo        elemfunc;               /* lookup info for element coercion function */
-       /* use struct pointer to avoid including array.h here */
-       struct ArrayMapState *amstate;          /* workspace for array_map */
-} ArrayCoerceExprState;
-
-/* ----------------
- *             ConvertRowtypeExprState node
- * ----------------
- */
-typedef struct ConvertRowtypeExprState
-{
-       ExprState       xprstate;
-       ExprState  *arg;                        /* input tuple value */
-       TupleDesc       indesc;                 /* tupdesc for source rowtype */
-       TupleDesc       outdesc;                /* tupdesc for result rowtype */
-       /* use "struct" so we needn't include tupconvert.h here */
-       struct TupleConversionMap *map;
-       bool            initialized;
-} ConvertRowtypeExprState;
-
-/* ----------------
- *             CaseExprState node
- * ----------------
- */
-typedef struct CaseExprState
-{
-       ExprState       xprstate;
-       ExprState  *arg;                        /* implicit equality comparison argument */
-       List       *args;                       /* the arguments (list of WHEN clauses) */
-       ExprState  *defresult;          /* the default result (ELSE clause) */
-} CaseExprState;
-
-/* ----------------
- *             CaseWhenState node
- * ----------------
- */
-typedef struct CaseWhenState
-{
-       ExprState       xprstate;
-       ExprState  *expr;                       /* condition expression */
-       ExprState  *result;                     /* substitution result */
-} CaseWhenState;
-
-/* ----------------
- *             ArrayExprState node
- *
- * Note: ARRAY[] expressions always produce varlena arrays, never fixed-length
- * arrays.
- * ----------------
- */
-typedef struct ArrayExprState
-{
-       ExprState       xprstate;
-       List       *elements;           /* states for child nodes */
-       int16           elemlength;             /* typlen of the array element type */
-       bool            elembyval;              /* is the element type pass-by-value? */
-       char            elemalign;              /* typalign of the element type */
-} ArrayExprState;
-
-/* ----------------
- *             RowExprState node
- * ----------------
- */
-typedef struct RowExprState
-{
-       ExprState       xprstate;
-       List       *args;                       /* the arguments */
-       TupleDesc       tupdesc;                /* descriptor for result tuples */
-} RowExprState;
-
-/* ----------------
- *             RowCompareExprState node
- * ----------------
- */
-typedef struct RowCompareExprState
-{
-       ExprState       xprstate;
-       List       *largs;                      /* the left-hand input arguments */
-       List       *rargs;                      /* the right-hand input arguments */
-       FmgrInfo   *funcs;                      /* array of comparison function info */
-       Oid                *collations;         /* array of collations to use */
-} RowCompareExprState;
-
-/* ----------------
- *             CoalesceExprState node
- * ----------------
- */
-typedef struct CoalesceExprState
-{
-       ExprState       xprstate;
-       List       *args;                       /* the arguments */
-} CoalesceExprState;
-
-/* ----------------
- *             MinMaxExprState node
- * ----------------
- */
-typedef struct MinMaxExprState
-{
-       ExprState       xprstate;
-       List       *args;                       /* the arguments */
-       FmgrInfo        cfunc;                  /* lookup info for comparison func */
-} MinMaxExprState;
-
-/* ----------------
- *             XmlExprState node
- * ----------------
- */
-typedef struct XmlExprState
-{
-       ExprState       xprstate;
-       List       *named_args;         /* ExprStates for named arguments */
-       List       *args;                       /* ExprStates for other arguments */
-} XmlExprState;
-
-/* ----------------
- *             NullTestState node
- * ----------------
- */
-typedef struct NullTestState
-{
-       ExprState       xprstate;
-       ExprState  *arg;                        /* input expression */
-       /* used only if input is of composite type: */
-       TupleDesc       argdesc;                /* tupdesc for most recent input */
-} NullTestState;
-
-/* ----------------
- *             CoerceToDomainState node
- * ----------------
- */
-typedef struct CoerceToDomainState
-{
-       ExprState       xprstate;
-       ExprState  *arg;                        /* input expression */
-       /* Cached list of constraints that need to be checked */
-       List       *constraints;        /* list of DomainConstraintState nodes */
-} CoerceToDomainState;
-
  /*
   * DomainConstraintState - one item to check during CoerceToDomain
   *
- * Note: this is just a Node, and not an ExprState, because it has no
- * corresponding Expr to link to.  Nonetheless it is part of an ExprState
- * tree, so we give it a name following the xxxState convention.
+ * Note: we consider this to be part of an ExprState tree, so we give it
+ * a name following the xxxState convention.  But there's no directly
+ * associated plan-tree node.
   */
  typedef enum DomainConstraintType
  {
@@ -950,7 +773,8 @@ typedef struct DomainConstraintState
         NodeTag         type;
         DomainConstraintType constrainttype;            /* constraint type */
         char       *name;                       /* name of constraint (for error msgs) */
-       ExprState  *check_expr;         /* for CHECK, a boolean expression */
+       Expr       *check_expr;         /* for CHECK, a boolean expression */
+       ExprState  *check_exprstate;    /* check_expr's eval state, or NULL */
  } DomainConstraintState;
  
  
@@ -980,14 +804,14 @@ typedef struct PlanState
                                                                  * top-level plan */
  
         Instrumentation *instrument;    /* Optional runtime stats for this node */
+       WorkerInstrumentation *worker_instrument;       /* per-worker instrumentation */
  
         /*
          * Common structural data for all Plan types.  These links to subsidiary
          * state trees parallel links in the associated plan tree (except for the
          * subPlan list, which does not exist in the plan tree).
          */
-       List       *targetlist;         /* target list to be computed at this node */
-       List       *qual;                       /* implicitly-ANDed qual conditions */
+       ExprState  *qual;                       /* boolean qual condition */
         struct PlanState *lefttree; /* input plan tree(s) */
         struct PlanState *righttree;
         List       *initPlan;           /* Init SubPlanState nodes (un-correlated expr
@@ -1005,8 +829,6 @@ typedef struct PlanState
         TupleTableSlot *ps_ResultTupleSlot; /* slot for my result tuples */
         ExprContext *ps_ExprContext;    /* node's expression-evaluation context */
         ProjectionInfo *ps_ProjInfo;    /* info for doing tuple projection */
-       bool            ps_TupFromTlist;/* state flag for processing set-valued
-                                                                * functions in targetlist */
  } PlanState;
  
  /* ----------------
@@ -1059,6 +881,22 @@ typedef struct ResultState
         bool            rs_checkqual;   /* do we need to check the qual? */
  } ResultState;
  
+/* ----------------
+ *      ProjectSetState information
+ *
+ * Note: at least one of the "elems" will be a SetExprState; the rest are
+ * regular ExprStates.
+ * ----------------
+ */
+typedef struct ProjectSetState
+{
+       PlanState       ps;                             /* its first field is NodeTag */
+       Node      **elems;                      /* array of expression states */
+       ExprDoneCond *elemdone;         /* array of per-SRF is-done states */
+       int                     nelems;                 /* length of elemdone[] array */
+       bool            pending_srf_tuples;             /* still evaluating srfs in tlist? */
+} ProjectSetState;
+
  /* ----------------
   *      ModifyTableState information
   * ----------------
@@ -1076,6 +914,24 @@ typedef struct ModifyTableState
         List      **mt_arowmarks;       /* per-subplan ExecAuxRowMark lists */
         EPQState        mt_epqstate;    /* for evaluating EvalPlanQual rechecks */
         bool            fireBSTriggers; /* do we need to fire stmt triggers? */
+       OnConflictAction mt_onconflict;         /* ON CONFLICT type */
+       List       *mt_arbiterindexes;          /* unique index OIDs to arbitrate
+                                                                                * taking alt path */
+       TupleTableSlot *mt_existing;    /* slot to store existing target tuple in */
+       List       *mt_excludedtlist;           /* the excluded pseudo relation's
+                                                                                * tlist  */
+       TupleTableSlot *mt_conflproj;           /* CONFLICT ... SET ... projection
+                                                                                * target */
+       struct PartitionDispatchData **mt_partition_dispatch_info;
+                                                                               /* Tuple-routing support info */
+       int                             mt_num_dispatch;        /* Number of entries in the above
+                                                                                * array */
+       int                             mt_num_partitions;      /* Number of members in the
+                                                                                * following arrays */
+       ResultRelInfo  *mt_partitions;  /* Per partition result relation */
+       TupleConversionMap **mt_partition_tupconv_maps;
+                                                                       /* Per partition tuple conversion map */
+       TupleTableSlot *mt_partition_tuple_slot;
  } ModifyTableState;
  
  /* ----------------
@@ -1100,10 +956,8 @@ typedef struct AppendState
   *             nkeys                   number of sort key columns
   *             sortkeys                sort keys in SortSupport representation
   *             slots                   current output tuple of each subplan
- *             heap                    heap of active tuples (represented as array indexes)
- *             heap_size               number of active heap entries
+ *             heap                    heap of active tuples
   *             initialized             true if we have fetched first tuple from each subplan
- *             last_slot               last subplan fetched from (which must be re-called)
   * ----------------
   */
  typedef struct MergeAppendState
@@ -1114,10 +968,8 @@ typedef struct MergeAppendState
         int                     ms_nkeys;
         SortSupport ms_sortkeys;        /* array of length ms_nkeys */
         TupleTableSlot **ms_slots;      /* array of length ms_nplans */
-       int                *ms_heap;            /* array of length ms_nplans */
-       int                     ms_heap_size;   /* current active length of ms_heap[] */
+       struct binaryheap *ms_heap; /* binary heap of slot indices */
         bool            ms_initialized; /* are subplans started? */
-       int                     ms_last_slot;   /* last subplan slot we returned from */
  } MergeAppendState;
  
  /* ----------------
@@ -1195,11 +1047,33 @@ typedef struct ScanState
         TupleTableSlot *ss_ScanTupleSlot;
  } ScanState;
  
-/*
- * SeqScan uses a bare ScanState as its state node, since it needs
- * no additional fields.
+/* ----------------
+ *      SeqScanState information
+ * ----------------
   */
-typedef ScanState SeqScanState;
+typedef struct SeqScanState
+{
+       ScanState       ss;                             /* its first field is NodeTag */
+       Size            pscan_len;              /* size of parallel heap scan descriptor */
+} SeqScanState;
+
+/* ----------------
+ *      SampleScanState information
+ * ----------------
+ */
+typedef struct SampleScanState
+{
+       ScanState       ss;
+       List       *args;                       /* expr states for TABLESAMPLE params */
+       ExprState  *repeatable;         /* expr state for REPEATABLE expr */
+       /* use struct pointer to avoid including tsmapi.h here */
+       struct TsmRoutine *tsmroutine;          /* descriptor for tablesample method */
+       void       *tsm_state;          /* tablesample method can keep state here */
+       bool            use_bulkread;   /* use bulkread buffer access strategy? */
+       bool            use_pagemode;   /* use page-at-a-time visibility checking? */
+       bool            begun;                  /* false means need to call BeginSampleScan */
+       uint32          seed;                   /* random seed */
+} SampleScanState;
  
  /*
   * These structs store information about index quals that don't have simple
@@ -1227,6 +1101,7 @@ typedef struct
   *      IndexScanState information
   *
   *             indexqualorig      execution state for indexqualorig expressions
+ *             indexorderbyorig   execution state for indexorderbyorig expressions
   *             ScanKeys                   Skey structures for index quals
   *             NumScanKeys                number of ScanKeys
   *             OrderByKeys                Skey structures for index ordering operators
@@ -1237,12 +1112,22 @@ typedef struct
   *             RuntimeContext     expr context for evaling runtime Skeys
   *             RelationDesc       index relation descriptor
   *             ScanDesc                   index scan descriptor
+ *
+ *             ReorderQueue       tuples that need reordering due to re-check
+ *             ReachedEnd                 have we fetched all tuples from index already?
+ *             OrderByValues      values of ORDER BY exprs of last fetched tuple
+ *             OrderByNulls       null flags for OrderByValues
+ *             SortSupport                for reordering ORDER BY exprs
+ *             OrderByTypByVals   is the datatype of order by expression pass-by-value?
+ *             OrderByTypLens     typlens of the datatypes of order by expressions
+ *             pscan_len                  size of parallel index scan descriptor
   * ----------------
   */
  typedef struct IndexScanState
  {
         ScanState       ss;                             /* its first field is NodeTag */
-       List       *indexqualorig;
+       ExprState  *indexqualorig;
+       List       *indexorderbyorig;
         ScanKey         iss_ScanKeys;
         int                     iss_NumScanKeys;
         ScanKey         iss_OrderByKeys;
@@ -1253,6 +1138,16 @@ typedef struct IndexScanState
         ExprContext *iss_RuntimeContext;
         Relation        iss_RelationDesc;
         IndexScanDesc iss_ScanDesc;
+
+       /* These are needed for re-checking ORDER BY expr ordering */
+       pairingheap *iss_ReorderQueue;
+       bool            iss_ReachedEnd;
+       Datum      *iss_OrderByValues;
+       bool       *iss_OrderByNulls;
+       SortSupport iss_SortSupport;
+       bool       *iss_OrderByTypByVals;
+       int16      *iss_OrderByTypLens;
+       Size            iss_PscanLen;
  } IndexScanState;
  
  /* ----------------
@@ -1271,12 +1166,13 @@ typedef struct IndexScanState
   *             ScanDesc                   index scan descriptor
   *             VMBuffer                   buffer in use for visibility map testing, if any
   *             HeapFetches                number of tuples we were forced to fetch from heap
+ *             ioss_PscanLen      Size of parallel index-only scan descriptor
   * ----------------
   */
  typedef struct IndexOnlyScanState
  {
         ScanState       ss;                             /* its first field is NodeTag */
-       List       *indexqual;
+       ExprState  *indexqual;
         ScanKey         ioss_ScanKeys;
         int                     ioss_NumScanKeys;
         ScanKey         ioss_OrderByKeys;
@@ -1289,6 +1185,7 @@ typedef struct IndexOnlyScanState
         IndexScanDesc ioss_ScanDesc;
         Buffer          ioss_VMBuffer;
         long            ioss_HeapFetches;
+       Size            ioss_PscanLen;
  } IndexOnlyScanState;
  
  /* ----------------
@@ -1323,6 +1220,51 @@ typedef struct BitmapIndexScanState
         IndexScanDesc biss_ScanDesc;
  } BitmapIndexScanState;
  
+/* ----------------
+ *      SharedBitmapState information
+ *
+ *             BM_INITIAL              TIDBitmap creation is not yet started, so first worker
+ *                                             to see this state will set the state to BM_INPROGRESS
+ *                                             and that process will be responsible for creating
+ *                                             TIDBitmap.
+ *             BM_INPROGRESS   TIDBitmap creation is in progress; workers need to
+ *                                             sleep until it's finished.
+ *             BM_FINISHED             TIDBitmap creation is done, so now all workers can
+ *                                             proceed to iterate over TIDBitmap.
+ * ----------------
+ */
+typedef enum
+{
+       BM_INITIAL,
+       BM_INPROGRESS,
+       BM_FINISHED
+} SharedBitmapState;
+
+/* ----------------
+ *      ParallelBitmapHeapState information
+ *             tbmiterator                             iterator for scanning current pages
+ *             prefetch_iterator               iterator for prefetching ahead of current page
+ *             mutex                                   mutual exclusion for the prefetching variable
+ *                                                             and state
+ *             prefetch_pages                  # pages prefetch iterator is ahead of current
+ *             prefetch_target                 current target prefetch distance
+ *             state                                   current state of the TIDBitmap
+ *             cv                                              conditional wait variable
+ *             phs_snapshot_data               snapshot data shared to workers
+ * ----------------
+ */
+typedef struct ParallelBitmapHeapState
+{
+       dsa_pointer tbmiterator;
+       dsa_pointer prefetch_iterator;
+       slock_t         mutex;
+       int                     prefetch_pages;
+       int                     prefetch_target;
+       SharedBitmapState state;
+       ConditionVariable cv;
+       char            phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
+} ParallelBitmapHeapState;
+
  /* ----------------
   *      BitmapHeapScanState information
   *
@@ -1330,40 +1272,57 @@ typedef struct BitmapIndexScanState
   *             tbm                                bitmap obtained from child index scan(s)
   *             tbmiterator                iterator for scanning current pages
   *             tbmres                     current-page data
+ *             exact_pages                total number of exact pages retrieved
+ *             lossy_pages                total number of lossy pages retrieved
   *             prefetch_iterator  iterator for prefetching ahead of current page
   *             prefetch_pages     # pages prefetch iterator is ahead of current
- *             prefetch_target    target prefetch distance
+ *             prefetch_target    current target prefetch distance
+ *             prefetch_maximum   maximum value for prefetch_target
+ *             pscan_len                  size of the shared memory for parallel bitmap
+ *             initialized                is node is ready to iterate
+ *             shared_tbmiterator         shared iterator
+ *             shared_prefetch_iterator shared iterator for prefetching
+ *             pstate                     shared state for parallel bitmap scan
   * ----------------
   */
  typedef struct BitmapHeapScanState
  {
         ScanState       ss;                             /* its first field is NodeTag */
-       List       *bitmapqualorig;
+       ExprState  *bitmapqualorig;
         TIDBitmap  *tbm;
         TBMIterator *tbmiterator;
         TBMIterateResult *tbmres;
+       long            exact_pages;
+       long            lossy_pages;
         TBMIterator *prefetch_iterator;
         int                     prefetch_pages;
         int                     prefetch_target;
+       int                     prefetch_maximum;
+       Size            pscan_len;
+       bool            initialized;
+       TBMSharedIterator *shared_tbmiterator;
+       TBMSharedIterator *shared_prefetch_iterator;
+       ParallelBitmapHeapState *pstate;
  } BitmapHeapScanState;
  
  /* ----------------
   *      TidScanState information
   *
+ *             tidexprs           list of TidExpr structs (see nodeTidscan.c)
   *             isCurrentOf    scan has a CurrentOfExpr qual
   *             NumTids            number of tids in this scan
   *             TidPtr             index of currently fetched tid
   *             TidList            evaluated item pointers (array of size NumTids)
+ *             htup               currently-fetched tuple, if any
   * ----------------
   */
  typedef struct TidScanState
  {
         ScanState       ss;                             /* its first field is NodeTag */
-       List       *tss_tidquals;       /* list of ExprState nodes */
+       List       *tss_tidexprs;
         bool            tss_isCurrentOf;
         int                     tss_NumTids;
         int                     tss_TidPtr;
-       int                     tss_MarkTidPtr;
         ItemPointerData *tss_TidList;
         HeapTupleData tss_htup;
  } TidScanState;
@@ -1388,18 +1347,28 @@ typedef struct SubqueryScanState
   *             function appearing in FROM (typically a function returning set).
   *
   *             eflags                          node's capability flags
- *             tupdesc                         expected return tuple description
- *             tuplestorestate         private state of tuplestore.c
- *             funcexpr                        state for function expression being evaluated
+ *             ordinality                      is this scan WITH ORDINALITY?
+ *             simple                          true if we have 1 function and no ordinality
+ *             ordinal                         current ordinal column value
+ *             nfuncs                          number of functions being executed
+ *             funcstates                      per-function execution states (private in
+ *                                                     nodeFunctionscan.c)
+ *             argcontext                      memory context to evaluate function arguments in
   * ----------------
   */
+struct FunctionScanPerFuncState;
+
  typedef struct FunctionScanState
  {
         ScanState       ss;                             /* its first field is NodeTag */
         int                     eflags;
-       TupleDesc       tupdesc;
-       Tuplestorestate *tuplestorestate;
-       ExprState  *funcexpr;
+       bool            ordinality;
+       bool            simple;
+       int64           ordinal;
+       int                     nfuncs;
+       struct FunctionScanPerFuncState *funcstates;            /* array of length
+                                                                                                                * nfuncs */
+       MemoryContext argcontext;
  } FunctionScanState;
  
  /* ----------------
@@ -1411,7 +1380,6 @@ typedef struct FunctionScanState
   *             exprlists                       array of expression lists being evaluated
   *             array_len                       size of array
   *             curr_idx                        current array index (0-based)
- *             marked_idx                      marked position (for mark/restore)
   *
   *     Note: ss.ps.ps_ExprContext is used to evaluate any qual or projection
   *     expressions attached to the node.  We create a second ExprContext,
@@ -1427,9 +1395,33 @@ typedef struct ValuesScanState
         List      **exprlists;
         int                     array_len;
         int                     curr_idx;
-       int                     marked_idx;
  } ValuesScanState;
  
+/* ----------------
+ *             TableFuncScanState node
+ *
+ * Used in table-expression functions like XMLTABLE.
+ * ----------------
+ */
+typedef struct TableFuncScanState
+{
+       ScanState       ss;                             /* its first field is NodeTag */
+       ExprState  *docexpr;            /* state for document expression */
+       ExprState  *rowexpr;            /* state for row-generating expression */
+       List       *colexprs;           /* state for column-generating expression */
+       List       *coldefexprs;        /* state for column default expressions */
+       List       *ns_names;           /* list of str nodes with namespace names */
+       List       *ns_uris;            /* list of states of namespace uri exprs */
+       Bitmapset  *notnulls;           /* nullability flag for each output column */
+       void       *opaque;                     /* table builder private space */
+       const struct TableFuncRoutine *routine;         /* table builder methods */
+       FmgrInfo   *in_functions;       /* input function for each column */
+       Oid                *typioparams;        /* typioparam for each column */
+       int64           ordinal;                /* row number to be output next */
+       MemoryContext perValueCxt;      /* short life context for value evaluation */
+       Tuplestorestate *tupstore;      /* output tuple store */
+} TableFuncScanState;
+
  /* ----------------
   *      CteScanState information
   *
@@ -1457,7 +1449,7 @@ typedef struct CteScanState
   *      WorkTableScanState information
   *
   *             WorkTableScan nodes are used to scan the work table created by
- *             a RecursiveUnion node.  We locate the RecursiveUnion node
+ *             a RecursiveUnion node.  We locate the RecursiveUnion node
   *             during executor startup.
   * ----------------
   */
@@ -1476,11 +1468,38 @@ typedef struct WorkTableScanState
  typedef struct ForeignScanState
  {
         ScanState       ss;                             /* its first field is NodeTag */
+       ExprState  *fdw_recheck_quals;          /* original quals not in ss.ps.qual */
+       Size            pscan_len;              /* size of parallel coordination information */
         /* use struct pointer to avoid including fdwapi.h here */
         struct FdwRoutine *fdwroutine;
         void       *fdw_state;          /* foreign-data wrapper can keep state here */
  } ForeignScanState;
  
+/* ----------------
+ *      CustomScanState information
+ *
+ *             CustomScan nodes are used to execute custom code within executor.
+ *
+ * Core code must avoid assuming that the CustomScanState is only as large as
+ * the structure declared here; providers are allowed to make it the first
+ * element in a larger structure, and typically would need to do so.  The
+ * struct is actually allocated by the CreateCustomScanState method associated
+ * with the plan node.  Any additional fields can be initialized there, or in
+ * the BeginCustomScan method.
+ * ----------------
+ */
+struct CustomExecMethods;
+
+typedef struct CustomScanState
+{
+       ScanState       ss;
+       uint32          flags;                  /* mask of CUSTOMPATH_* flags, see
+                                                                * nodes/extensible.h */
+       List       *custom_ps;          /* list of child PlanState nodes, if any */
+       Size            pscan_len;              /* size of parallel coordination information */
+       const struct CustomExecMethods *methods;
+} CustomScanState;
+
  /* ----------------------------------------------------------------
   *                              Join State Information
   * ----------------------------------------------------------------
@@ -1496,7 +1515,7 @@ typedef struct JoinState
  {
         PlanState       ps;
         JoinType        jointype;
-       List       *joinqual;           /* JOIN quals (in addition to ps.qual) */
+       ExprState  *joinqual;           /* JOIN quals (in addition to ps.qual) */
  } JoinState;
  
  /* ----------------
@@ -1594,7 +1613,7 @@ typedef struct HashJoinTableData *HashJoinTable;
  typedef struct HashJoinState
  {
         JoinState       js;                             /* its first field is NodeTag */
-       List       *hashclauses;        /* list of ExprState nodes */
+       ExprState  *hashclauses;
         List       *hj_OuterHashKeys;           /* list of ExprState nodes */
         List       *hj_InnerHashKeys;           /* list of ExprState nodes */
         List       *hj_HashOperators;           /* list of operator OIDs */
@@ -1654,7 +1673,7 @@ typedef struct SortState
  
  /* ---------------------
   *     GroupState information
- * -------------------------
+ * ---------------------
   */
  typedef struct GroupState
  {
@@ -1673,32 +1692,59 @@ typedef struct GroupState
   *     input group during evaluation of an Agg node's output tuple(s).  We
   *     create a second ExprContext, tmpcontext, in which to evaluate input
   *     expressions and run the aggregate transition functions.
- * -------------------------
+ * ---------------------
   */
  /* these structs are private in nodeAgg.c: */
  typedef struct AggStatePerAggData *AggStatePerAgg;
+typedef struct AggStatePerTransData *AggStatePerTrans;
  typedef struct AggStatePerGroupData *AggStatePerGroup;
+typedef struct AggStatePerPhaseData *AggStatePerPhase;
  
  typedef struct AggState
  {
         ScanState       ss;                             /* its first field is NodeTag */
         List       *aggs;                       /* all Aggref nodes in targetlist & quals */
         int                     numaggs;                /* length of list (could be zero!) */
-       FmgrInfo   *eqfunctions;        /* per-grouping-field equality fns */
+       int                     numtrans;               /* number of pertrans items */
+       AggSplit        aggsplit;               /* agg-splitting mode, see nodes.h */
+       AggStatePerPhase phase;         /* pointer to current phase data */
+       int                     numphases;              /* number of phases */
+       int                     current_phase;  /* current phase number */
         FmgrInfo   *hashfunctions;      /* per-grouping-field hash fns */
         AggStatePerAgg peragg;          /* per-Aggref information */
-       MemoryContext aggcontext;       /* memory context for long-lived data */
+       AggStatePerTrans pertrans;      /* per-Trans state information */
+       ExprContext **aggcontexts;      /* econtexts for long-lived data (per GS) */
         ExprContext *tmpcontext;        /* econtext for input expressions */
+       AggStatePerTrans curpertrans;           /* currently active trans state */
+       bool            input_done;             /* indicates end of input */
         bool            agg_done;               /* indicates completion of Agg scan */
+       int                     projected_set;  /* The last projected grouping set */
+       int                     current_set;    /* The current grouping set being evaluated */
+       Bitmapset  *grouped_cols;       /* grouped cols in current projection */
+       List       *all_grouped_cols;           /* list of all grouped cols in DESC
+                                                                                * order */
+       /* These fields are for grouping set phase data */
+       int                     maxsets;                /* The max number of sets in any phase */
+       AggStatePerPhase phases;        /* array of all phases */
+       Tuplesortstate *sort_in;        /* sorted input to phases > 0 */
+       Tuplesortstate *sort_out;       /* input is copied here for next phase */
+       TupleTableSlot *sort_slot;      /* slot for sort results */
         /* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
         AggStatePerGroup pergroup;      /* per-Aggref-per-group working state */
         HeapTuple       grp_firstTuple; /* copy of first tuple of current group */
         /* these fields are used in AGG_HASHED mode: */
         TupleHashTable hashtable;       /* hash table with one entry per group */
         TupleTableSlot *hashslot;       /* slot for loading hash table */
-       List       *hash_needed;        /* list of columns needed in hash table */
+       int                     numhashGrpCols; /* number of columns in hash table */
+       int                     largestGrpColIdx; /* largest column required for hashing */
+       AttrNumber *hashGrpColIdxInput; /* and their indices in input slot */
+       AttrNumber *hashGrpColIdxHash;  /* indices for execGrouping in hashtbl */
         bool            table_filled;   /* hash table filled yet? */
         TupleHashIterator hashiter; /* for iterating through hash table */
+       /* support for evaluation of agg inputs */
+       TupleTableSlot *evalslot;       /* slot for agg inputs */
+       ProjectionInfo *evalproj;       /* projection machinery */
+       TupleDesc       evaldesc;               /* descriptor of input tuples */
  } AggState;
  
  /* ----------------
@@ -1741,7 +1787,8 @@ typedef struct WindowAggState
         Datum           endOffsetValue; /* result of endOffset evaluation */
  
         MemoryContext partcontext;      /* context for partition-lifespan data */
-       MemoryContext aggcontext;       /* context for each aggregate data */
+       MemoryContext aggcontext;       /* shared context for aggregate working data */
+       MemoryContext curaggcontext;    /* current aggregate's working data */
         ExprContext *tmpcontext;        /* short-term evaluation context */
  
         bool            all_first;              /* true if the scan is starting */
@@ -1769,7 +1816,7 @@ typedef struct WindowAggState
   *      UniqueState information
   *
   *             Unique nodes are used "on top of" sort nodes to discard
- *             duplicate tuples returned from the sort phase.  Basically
+ *             duplicate tuples returned from the sort phase.  Basically
   *             all it does is compare the current tuple from the subplan
   *             with the previously fetched tuple (stored in its result slot).
   *             If the two are identical in all interesting fields, then
@@ -1783,6 +1830,55 @@ typedef struct UniqueState
         MemoryContext tempContext;      /* short-term context for comparisons */
  } UniqueState;
  
+/* ----------------
+ * GatherState information
+ *
+ *             Gather nodes launch 1 or more parallel workers, run a subplan
+ *             in those workers, and collect the results.
+ * ----------------
+ */
+typedef struct GatherState
+{
+       PlanState       ps;                             /* its first field is NodeTag */
+       bool            initialized;
+       struct ParallelExecutorInfo *pei;
+       int                     nreaders;
+       int                     nextreader;
+       int                     nworkers_launched;
+       struct TupleQueueReader **reader;
+       TupleTableSlot *funnel_slot;
+       bool            need_to_scan_locally;
+} GatherState;
+
+/* ----------------
+ * GatherMergeState information
+ *
+ *             Gather merge nodes launch 1 or more parallel workers, run a
+ *             subplan which produces sorted output in each worker, and then
+ *             merge the results into a single sorted stream.
+ * ----------------
+ */
+struct GMReaderTuple;
+
+typedef struct GatherMergeState
+{
+       PlanState       ps;                             /* its first field is NodeTag */
+       bool            initialized;
+       struct ParallelExecutorInfo *pei;
+       int                     nreaders;
+       int                     nworkers_launched;
+       struct TupleQueueReader **reader;
+       TupleDesc       tupDesc;
+       TupleTableSlot **gm_slots;
+       struct binaryheap *gm_heap; /* binary heap of slot indices */
+       bool            gm_initialized; /* gather merge initilized ? */
+       bool            need_to_scan_locally;
+       int                     gm_nkeys;
+       SortSupport gm_sortkeys;        /* array of length ms_nkeys */
+       struct GMReaderTupleBuffer *gm_tuple_buffers;           /* tuple buffer per
+                                                                                                                * reader */
+} GatherMergeState;
+
  /* ----------------
   *      HashState information
   * ----------------
@@ -1828,7 +1924,7 @@ typedef struct SetOpState
  /* ----------------
   *      LockRowsState information
   *
- *             LockRows nodes are used to enforce FOR UPDATE/FOR SHARE locking.
+ *             LockRows nodes are used to enforce FOR [KEY] UPDATE/SHARE locking.
   * ----------------
   */
  typedef struct LockRowsState
@@ -1836,6 +1932,8 @@ typedef struct LockRowsState
         PlanState       ps;                             /* its first field is NodeTag */
         List       *lr_arowMarks;       /* List of ExecAuxRowMarks */
         EPQState        lr_epqstate;    /* for evaluating EvalPlanQual rechecks */
+       HeapTuple  *lr_curtuples;       /* locked tuples (one entry per RT entry) */
+       int                     lr_ntables;             /* length of lr_curtuples[] array */
  } LockRowsState;
  
  /* ----------------