X-Git-Url: https://granicus.if.org/sourcecode?a=blobdiff_plain;f=src%2Finclude%2Fnodes%2Fexecnodes.h;h=ff428951186007083f9bb35d4be6a57bdfb8e760;hb=b8d7f053c5c2bf2a7e8734fe3327f6a8bc711755;hp=0a6b829de4ff250c1eb64a8a476fba8e1ee9b449;hpb=bb742407947ad1cbf19355d24282380d576e7654;p=postgresql diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 0a6b829de4..ff42895118 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -4,7 +4,7 @@ * definitions for executor state nodes * * - * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/include/nodes/execnodes.h @@ -16,14 +16,84 @@ #include "access/genam.h" #include "access/heapam.h" -#include "access/skey.h" +#include "access/tupconvert.h" +#include "executor/instrument.h" +#include "lib/pairingheap.h" #include "nodes/params.h" #include "nodes/plannodes.h" -#include "nodes/tidbitmap.h" #include "utils/hsearch.h" -#include "utils/rel.h" -#include "utils/snapshot.h" +#include "utils/reltrigger.h" +#include "utils/sortsupport.h" #include "utils/tuplestore.h" +#include "utils/tuplesort.h" +#include "nodes/tidbitmap.h" +#include "storage/condition_variable.h" + + +/* ---------------- + * ExprState node + * + * ExprState is the top-level node for expression evaluation. + * It contains instructions (in ->steps) to evaluate the expression. + * ---------------- + */ +struct ExprState; /* forward references in this file */ +struct ExprContext; +struct ExprEvalStep; /* avoid including execExpr.h everywhere */ + +typedef Datum (*ExprStateEvalFunc) (struct ExprState *expression, + struct ExprContext *econtext, + bool *isNull); + +/* Bits in ExprState->flags (see also execExpr.h for private flag bits): */ +/* expression is for use with ExecQual() */ +#define EEO_FLAG_IS_QUAL (1 << 0) + +typedef struct ExprState +{ + Node tag; + + uint8 flags; /* bitmask of EEO_FLAG_* bits, see above */ + + /* + * Storage for result value of a scalar expression, or for individual + * column results within expressions built by ExecBuildProjectionInfo(). + */ + bool resnull; + Datum resvalue; + + /* + * If projecting a tuple result, this slot holds the result; else NULL. + */ + TupleTableSlot *resultslot; + + /* + * Instructions to compute expression's return value. + */ + struct ExprEvalStep *steps; + + /* + * Function that actually evaluates the expression. This can be set to + * different values depending on the complexity of the expression. + */ + ExprStateEvalFunc evalfunc; + + /* original expression tree, for debugging only */ + Expr *expr; + + /* + * XXX: following only needed during "compilation", could be thrown away. + */ + + int steps_len; /* number of steps currently */ + int steps_alloc; /* allocated length of steps array */ + + Datum *innermost_caseval; + bool *innermost_casenull; + + Datum *innermost_domainval; + bool *innermost_domainnull; +} ExprState; /* ---------------- @@ -43,10 +113,15 @@ * ExclusionOps Per-column exclusion operators, or NULL if none * ExclusionProcs Underlying function OIDs for ExclusionOps * ExclusionStrats Opclass strategy numbers for ExclusionOps + * UniqueOps Theses are like Exclusion*, but for unique indexes + * UniqueProcs + * UniqueStrats * Unique is it a unique index? * ReadyForInserts is it valid for inserts? * Concurrent are we doing a concurrent index build? * BrokenHotChain did we detect any broken HOT chains? + * AmCache private cache area for index AM + * Context memory context holding this IndexInfo * * ii_Concurrent and ii_BrokenHotChain are used only during index build; * they're conventionally set to false otherwise. @@ -60,14 +135,19 @@ typedef struct IndexInfo List *ii_Expressions; /* list of Expr */ List *ii_ExpressionsState; /* list of ExprState */ List *ii_Predicate; /* list of Expr */ - List *ii_PredicateState; /* list of ExprState */ + ExprState *ii_PredicateState; Oid *ii_ExclusionOps; /* array with one entry per column */ Oid *ii_ExclusionProcs; /* array with one entry per column */ uint16 *ii_ExclusionStrats; /* array with one entry per column */ + Oid *ii_UniqueOps; /* array with one entry per column */ + Oid *ii_UniqueProcs; /* array with one entry per column */ + uint16 *ii_UniqueStrats; /* array with one entry per column */ bool ii_Unique; bool ii_ReadyForInserts; bool ii_Concurrent; bool ii_BrokenHotChain; + void *ii_AmCache; + MemoryContext ii_Context; } IndexInfo; /* ---------------- @@ -90,14 +170,14 @@ typedef struct ExprContext_CB * * This class holds the "current context" information * needed to evaluate expressions for doing tuple qualifications - * and tuple projections. For example, if an expression refers + * and tuple projections. For example, if an expression refers * to an attribute in the current inner tuple then we need to know * what the current inner tuple is and so we look at the expression * context. * * There are two memory contexts associated with an ExprContext: * * ecxt_per_query_memory is a query-lifespan context, typically the same - * context the ExprContext node itself is allocated in. This context + * context the ExprContext node itself is allocated in. This context * can be used for purposes such as storing function call cache info. * * ecxt_per_tuple_memory is a short-term context for expression results. * As the name suggests, it will typically be reset once per tuple, @@ -143,12 +223,13 @@ typedef struct ExprContext /* Link to containing EState (NULL if a standalone ExprContext) */ struct EState *ecxt_estate; - /* Functions to call back when ExprContext is shut down */ + /* Functions to call back when ExprContext is shut down or rescanned */ ExprContext_CB *ecxt_callbacks; } ExprContext; /* - * Set-result status returned by ExecEvalExpr() + * Set-result status used when evaluating functions potentially returning a + * set. */ typedef enum { @@ -199,53 +280,21 @@ typedef struct ReturnSetInfo * that is, form new tuples by evaluation of targetlist expressions. * Nodes which need to do projections create one of these. * + * The target tuple slot is kept in ProjectionInfo->pi_state.resultslot. * ExecProject() evaluates the tlist, forms a tuple, and stores it - * in the given slot. Note that the result will be a "virtual" tuple + * in the given slot. Note that the result will be a "virtual" tuple * unless ExecMaterializeSlot() is then called to force it to be - * converted to a physical tuple. The slot must have a tupledesc + * converted to a physical tuple. The slot must have a tupledesc * that matches the output of the tlist! - * - * The planner very often produces tlists that consist entirely of - * simple Var references (lower levels of a plan tree almost always - * look like that). And top-level tlists are often mostly Vars too. - * We therefore optimize execution of simple-Var tlist entries. - * The pi_targetlist list actually contains only the tlist entries that - * aren't simple Vars, while those that are Vars are processed using the - * varSlotOffsets/varNumbers/varOutputCols arrays. - * - * The lastXXXVar fields are used to optimize fetching of fields from - * input tuples: they let us do a slot_getsomeattrs() call to ensure - * that all needed attributes are extracted in one pass. - * - * targetlist target list for projection (non-Var expressions only) - * exprContext expression context in which to evaluate targetlist - * slot slot to place projection result in - * itemIsDone workspace array for ExecProject - * directMap true if varOutputCols[] is an identity map - * numSimpleVars number of simple Vars found in original tlist - * varSlotOffsets array indicating which slot each simple Var is from - * varNumbers array containing input attr numbers of simple Vars - * varOutputCols array containing output attr numbers of simple Vars - * lastInnerVar highest attnum from inner tuple slot (0 if none) - * lastOuterVar highest attnum from outer tuple slot (0 if none) - * lastScanVar highest attnum from scan tuple slot (0 if none) * ---------------- */ typedef struct ProjectionInfo { NodeTag type; - List *pi_targetlist; + /* instructions to evaluate projection */ + ExprState pi_state; + /* expression context in which to evaluate expression */ ExprContext *pi_exprContext; - TupleTableSlot *pi_slot; - ExprDoneCond *pi_itemIsDone; - bool pi_directMap; - int pi_numSimpleVars; - int *pi_varSlotOffsets; - int *pi_varNumbers; - int *pi_varOutputCols; - int pi_lastInnerVar; - int pi_lastOuterVar; - int pi_lastScanVar; } ProjectionInfo; /* ---------------- @@ -257,7 +306,7 @@ typedef struct ProjectionInfo * in emitted tuples. For example, when we do an UPDATE query, * the planner adds a "junk" entry to the targetlist so that the tuples * returned to ExecutePlan() contain an extra attribute: the ctid of - * the tuple to be updated. This is needed to do the update, but we + * the tuple to be updated. This is needed to do the update, but we * don't want the ctid to be part of the stored new tuple! So, we * apply a "junk filter" to remove the junk attributes and form the * real output tuple. The junkfilter code also provides routines to @@ -272,7 +321,8 @@ typedef struct ProjectionInfo * resultSlot: tuple slot used to hold cleaned tuple. * junkAttNo: not used by junkfilter code. Can be used by caller * to remember the attno of a specific junk attribute - * (execMain.c stores the "ctid" attno here). + * (nodeModifyTable.c keeps the "ctid" or "wholerow" + * attno here). * ---------------- */ typedef struct JunkFilter @@ -302,9 +352,18 @@ typedef struct JunkFilter * TrigFunctions cached lookup info for trigger functions * TrigWhenExprs array of trigger WHEN expr states * TrigInstrument optional runtime measurements for triggers + * FdwRoutine FDW callback functions, if foreign table + * FdwState available to save private state of FDW + * usesFdwDirectModify true when modifying foreign table directly + * WithCheckOptions list of WithCheckOption's to be checked + * WithCheckOptionExprs list of WithCheckOption expr states * ConstraintExprs array of constraint-checking expr states * junkFilter for removing junk attributes from tuples * projectReturning for computing a RETURNING list + * onConflictSetProj for computing ON CONFLICT DO UPDATE SET + * onConflictSetWhere list of ON CONFLICT DO UPDATE exprs (qual) + * PartitionCheck partition check expression + * PartitionCheckExpr partition check expression state * ---------------- */ typedef struct ResultRelInfo @@ -317,11 +376,21 @@ typedef struct ResultRelInfo IndexInfo **ri_IndexRelationInfo; TriggerDesc *ri_TrigDesc; FmgrInfo *ri_TrigFunctions; - List **ri_TrigWhenExprs; - struct Instrumentation *ri_TrigInstrument; - List **ri_ConstraintExprs; + ExprState **ri_TrigWhenExprs; + Instrumentation *ri_TrigInstrument; + struct FdwRoutine *ri_FdwRoutine; + void *ri_FdwState; + bool ri_usesFdwDirectModify; + List *ri_WithCheckOptions; + List *ri_WithCheckOptionExprs; + ExprState **ri_ConstraintExprs; JunkFilter *ri_junkFilter; ProjectionInfo *ri_projectReturning; + ProjectionInfo *ri_onConflictSetProj; + ExprState *ri_onConflictSetWhere; + List *ri_PartitionCheck; + ExprState *ri_PartitionCheckExpr; + Relation ri_PartitionRoot; } ResultRelInfo; /* ---------------- @@ -340,13 +409,14 @@ typedef struct EState Snapshot es_crosscheck_snapshot; /* crosscheck time qual for RI */ List *es_range_table; /* List of RangeTblEntry */ PlannedStmt *es_plannedstmt; /* link to top of plan tree */ + const char *es_sourceText; /* Source text from QueryDesc */ JunkFilter *es_junkFilter; /* top-level junk filter, if any */ /* If query can insert/delete tuples, the command ID to mark them with */ CommandId es_output_cid; - /* Info about target table for insert/update/delete queries: */ + /* Info about target table(s) for insert/update/delete queries: */ ResultRelInfo *es_result_relations; /* array of ResultRelInfos */ int es_num_result_relations; /* length of array */ ResultRelInfo *es_result_relation_info; /* currently active array elt */ @@ -354,7 +424,8 @@ typedef struct EState /* Stuff used for firing triggers: */ List *es_trig_target_relations; /* trigger-only ResultRelInfos */ TupleTableSlot *es_trig_tuple_slot; /* for trigger output tuples */ - TupleTableSlot *es_trig_oldtup_slot; /* for trigger old tuples */ + TupleTableSlot *es_trig_oldtup_slot; /* for TriggerEnabled */ + TupleTableSlot *es_trig_newtup_slot; /* for TriggerEnabled */ /* Parameter info: */ ParamListInfo es_param_list_info; /* values of external params */ @@ -367,17 +438,19 @@ typedef struct EState List *es_rowMarks; /* List of ExecRowMarks */ - uint32 es_processed; /* # of tuples processed */ + uint64 es_processed; /* # of tuples processed */ Oid es_lastoid; /* last oid processed (by INSERT) */ + int es_top_eflags; /* eflags passed to ExecutorStart */ int es_instrument; /* OR of InstrumentOption flags */ - bool es_select_into; /* true if doing SELECT INTO */ - bool es_into_oids; /* true to generate OIDs in SELECT INTO */ + bool es_finished; /* true when ExecutorFinish is done */ List *es_exprcontexts; /* List of ExprContexts within EState */ List *es_subplanstates; /* List of PlanState for SubPlans */ + List *es_auxmodifytables; /* List of secondary ModifyTableStates */ + /* * this ExprContext is for per-output-tuple operations, such as constraint * checks and index-value computations. It will be reset for each output @@ -387,7 +460,7 @@ typedef struct EState /* * These fields are for re-evaluating plan quals when an updated tuple is - * substituted in READ COMMITTED mode. es_epqTuple[] contains tuples that + * substituted in READ COMMITTED mode. es_epqTuple[] contains tuples that * scan plan nodes should return instead of whatever they'd normally * return, or NULL if nothing to return; es_epqTupleSet[] is true if a * particular array entry is valid; and es_epqScanDone[] is state to @@ -397,35 +470,47 @@ typedef struct EState HeapTuple *es_epqTuple; /* array of EPQ substitute tuples */ bool *es_epqTupleSet; /* true if EPQ tuple is provided */ bool *es_epqScanDone; /* true if EPQ tuple has been fetched */ + + /* The per-query shared memory area to use for parallel execution. */ + struct dsa_area *es_query_dsa; } EState; /* * ExecRowMark - - * runtime representation of FOR UPDATE/SHARE clauses + * runtime representation of FOR [KEY] UPDATE/SHARE clauses * - * When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, we should have an + * When doing UPDATE, DELETE, or SELECT FOR [KEY] UPDATE/SHARE, we will have an * ExecRowMark for each non-target relation in the query (except inheritance - * parent RTEs, which can be ignored at runtime). See PlanRowMark for details - * about most of the fields. In addition to fields directly derived from - * PlanRowMark, we store curCtid, which is used by the WHERE CURRENT OF code. + * parent RTEs, which can be ignored at runtime). Virtual relations such as + * subqueries-in-FROM will have an ExecRowMark with relation == NULL. See + * PlanRowMark for details about most of the fields. In addition to fields + * directly derived from PlanRowMark, we store an activity flag (to denote + * inactive children of inheritance trees), curCtid, which is used by the + * WHERE CURRENT OF code, and ermExtra, which is available for use by the plan + * node that sources the relation (e.g., for a foreign table the FDW can use + * ermExtra to hold information). * * EState->es_rowMarks is a list of these structs. */ typedef struct ExecRowMark { Relation relation; /* opened and suitably locked relation */ + Oid relid; /* its OID (or InvalidOid, if subquery) */ Index rti; /* its range table index */ Index prti; /* parent range table index, if child */ Index rowmarkId; /* unique identifier for resjunk columns */ RowMarkType markType; /* see enum in nodes/plannodes.h */ - bool noWait; /* NOWAIT option */ + LockClauseStrength strength; /* LockingClause's strength, or LCS_NONE */ + LockWaitPolicy waitPolicy; /* NOWAIT and SKIP LOCKED */ + bool ermActive; /* is this mark relevant for current tuple? */ ItemPointerData curCtid; /* ctid of currently locked tuple, if any */ + void *ermExtra; /* available for use by relation source node */ } ExecRowMark; /* * ExecAuxRowMark - - * additional runtime representation of FOR UPDATE/SHARE clauses + * additional runtime representation of FOR [KEY] UPDATE/SHARE clauses * * Each LockRows and ModifyTable node keeps a list of the rowmarks it needs to * deal with. In addition to a pointer to the related entry in es_rowMarks, @@ -465,14 +550,23 @@ typedef struct TupleHashTableData *TupleHashTable; typedef struct TupleHashEntryData { - /* firstTuple must be the first field in this struct! */ MinimalTuple firstTuple; /* copy of first tuple in this group */ - /* there may be additional data beyond the end of this struct */ -} TupleHashEntryData; /* VARIABLE LENGTH STRUCT */ + void *additional; /* user data */ + uint32 status; /* hash status */ + uint32 hash; /* hash value (cached) */ +} TupleHashEntryData; + +/* define paramters necessary to generate the tuple hash table interface */ +#define SH_PREFIX tuplehash +#define SH_ELEMENT_TYPE TupleHashEntryData +#define SH_KEY_TYPE MinimalTuple +#define SH_SCOPE extern +#define SH_DECLARE +#include "lib/simplehash.h" typedef struct TupleHashTableData { - HTAB *hashtab; /* underlying dynahash table */ + tuplehash_hash *hashtab; /* underlying hash table */ int numCols; /* number of columns in lookup key */ AttrNumber *keyColIdx; /* attr numbers of key columns */ FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ @@ -485,9 +579,10 @@ typedef struct TupleHashTableData TupleTableSlot *inputslot; /* current input tuple's slot */ FmgrInfo *in_hash_funcs; /* hash functions for input datatype(s) */ FmgrInfo *cur_eq_funcs; /* equality functions for input vs. table */ -} TupleHashTableData; + uint32 hash_iv; /* hash-function IV */ +} TupleHashTableData; -typedef HASH_SEQ_STATUS TupleHashIterator; +typedef tuplehash_iterator TupleHashIterator; /* * Use InitTupleHashIterator/TermTupleHashIterator for a read/write scan. @@ -495,78 +590,35 @@ typedef HASH_SEQ_STATUS TupleHashIterator; * explicit scan termination is needed). */ #define InitTupleHashIterator(htable, iter) \ - hash_seq_init(iter, (htable)->hashtab) + tuplehash_start_iterate(htable->hashtab, iter) #define TermTupleHashIterator(iter) \ - hash_seq_term(iter) + ((void) 0) #define ResetTupleHashIterator(htable, iter) \ - do { \ - hash_freeze((htable)->hashtab); \ - hash_seq_init(iter, (htable)->hashtab); \ - } while (0) -#define ScanTupleHashTable(iter) \ - ((TupleHashEntry) hash_seq_search(iter)) + InitTupleHashIterator(htable, iter) +#define ScanTupleHashTable(htable, iter) \ + tuplehash_iterate(htable->hashtab, iter) /* ---------------------------------------------------------------- - * Expression State Trees - * - * Each executable expression tree has a parallel ExprState tree. - * - * Unlike PlanState, there is not an exact one-for-one correspondence between - * ExprState node types and Expr node types. Many Expr node types have no - * need for node-type-specific run-time state, and so they can use plain - * ExprState or GenericExprState as their associated ExprState node type. + * Expression State Nodes + * + * Formerly, there was a separate executor expression state node corresponding + * to each node in a planned expression tree. That's no longer the case; for + * common expression node types, all the execution info is embedded into + * step(s) in a single ExprState node. But we still have a few executor state + * node types for selected expression node types, mostly those in which info + * has to be shared with other parts of the execution state tree. * ---------------------------------------------------------------- */ -/* ---------------- - * ExprState node - * - * ExprState is the common superclass for all ExprState-type nodes. - * - * It can also be instantiated directly for leaf Expr nodes that need no - * local run-time state (such as Var, Const, or Param). - * - * To save on dispatch overhead, each ExprState node contains a function - * pointer to the routine to execute to evaluate the node. - * ---------------- - */ - -typedef struct ExprState ExprState; - -typedef Datum (*ExprStateEvalFunc) (ExprState *expression, - ExprContext *econtext, - bool *isNull, - ExprDoneCond *isDone); - -struct ExprState -{ - NodeTag type; - Expr *expr; /* associated Expr node */ - ExprStateEvalFunc evalfunc; /* routine to run to execute node */ -}; - -/* ---------------- - * GenericExprState node - * - * This is used for Expr node types that need no local run-time state, - * but have one child Expr node. - * ---------------- - */ -typedef struct GenericExprState -{ - ExprState xprstate; - ExprState *arg; /* state of my child node */ -} GenericExprState; - /* ---------------- * AggrefExprState node * ---------------- */ typedef struct AggrefExprState { - ExprState xprstate; - List *args; /* states of argument expressions */ + NodeTag type; + Aggref *aggref; /* expression plan node */ int aggno; /* ID number for agg within its plan node */ } AggrefExprState; @@ -576,49 +628,40 @@ typedef struct AggrefExprState */ typedef struct WindowFuncExprState { - ExprState xprstate; - List *args; /* states of argument expressions */ + NodeTag type; + WindowFunc *wfunc; /* expression plan node */ + List *args; /* ExprStates for argument expressions */ + ExprState *aggfilter; /* FILTER expression */ int wfuncno; /* ID number for wfunc within its plan node */ } WindowFuncExprState; -/* ---------------- - * ArrayRefExprState node - * - * Note: array types can be fixed-length (typlen > 0), but only when the - * element type is itself fixed-length. Otherwise they are varlena structures - * and have typlen = -1. In any case, an array type is never pass-by-value. - * ---------------- - */ -typedef struct ArrayRefExprState -{ - ExprState xprstate; - List *refupperindexpr; /* states for child nodes */ - List *reflowerindexpr; - ExprState *refexpr; - ExprState *refassgnexpr; - int16 refattrlength; /* typlen of array type */ - int16 refelemlength; /* typlen of the array element type */ - bool refelembyval; /* is the element type pass-by-value? */ - char refelemalign; /* typalign of the element type */ -} ArrayRefExprState; /* ---------------- - * FuncExprState node + * SetExprState node * - * Although named for FuncExpr, this is also used for OpExpr, DistinctExpr, - * and NullIf nodes; be careful to check what xprstate.expr is actually - * pointing at! + * State for evaluating a potentially set-returning expression (like FuncExpr + * or OpExpr). In some cases, like some of the expressions in ROWS FROM(...) + * the expression might not be a SRF, but nonetheless it uses the same + * machinery as SRFs; it will be treated as a SRF returning a single row. * ---------------- */ -typedef struct FuncExprState +typedef struct SetExprState { - ExprState xprstate; - List *args; /* states of argument expressions */ + NodeTag type; + Expr *expr; /* expression plan node */ + List *args; /* ExprStates for argument expressions */ + + /* + * In ROWS FROM, functions can be inlined, removing the FuncExpr normally + * inside. In such a case this is the compiled expression (which cannot + * return a set), which'll be evaluated using regular ExecEvalExpr(). + */ + ExprState *elidedFuncState; /* * Function manager's lookup info for the target function. If func.fn_oid * is InvalidOid, we haven't initialized it yet (nor any of the following - * fields). + * fields, except funcReturnsSet). */ FmgrInfo func; @@ -632,12 +675,18 @@ typedef struct FuncExprState /* * In some cases we need to compute a tuple descriptor for the function's - * output. If so, it's stored here. + * output. If so, it's stored here. */ TupleDesc funcResultDesc; bool funcReturnsTuple; /* valid when funcResultDesc isn't * NULL */ + /* + * Remember whether the function is declared to return a set. This is set + * by ExecInitExpr, and is valid even before the FmgrInfo is set up. + */ + bool funcReturnsSet; + /* * setArgsValid is true when we are evaluating a set-returning function * that uses value-per-call mode and we are in the middle of a call @@ -647,16 +696,9 @@ typedef struct FuncExprState */ bool setArgsValid; - /* - * Flag to remember whether we found a set-valued argument to the - * function. This causes the function result to be a set as well. Valid - * only when setArgsValid is true or funcResultStore isn't NULL. - */ - bool setHasSetArg; /* some argument returns a set */ - /* * Flag to remember whether we have registered a shutdown callback for - * this FuncExprState. We do so only if funcResultStore or setArgsValid + * this SetExprState. We do so only if funcResultStore or setArgsValid * has been set at least once (since all the callback is for is to release * the tuplestore or clear setArgsValid). */ @@ -668,33 +710,7 @@ typedef struct FuncExprState * argument values between calls, when setArgsValid is true. */ FunctionCallInfoData fcinfo_data; -} FuncExprState; - -/* ---------------- - * ScalarArrayOpExprState node - * - * This is a FuncExprState plus some additional data. - * ---------------- - */ -typedef struct ScalarArrayOpExprState -{ - FuncExprState fxprstate; - /* Cached info about array element type */ - Oid element_type; - int16 typlen; - bool typbyval; - char typalign; -} ScalarArrayOpExprState; - -/* ---------------- - * BoolExprState node - * ---------------- - */ -typedef struct BoolExprState -{ - ExprState xprstate; - List *args; /* states of argument expression(s) */ -} BoolExprState; +} SetExprState; /* ---------------- * SubPlanState node @@ -702,11 +718,14 @@ typedef struct BoolExprState */ typedef struct SubPlanState { - ExprState xprstate; + NodeTag type; + SubPlan *subplan; /* expression plan node */ struct PlanState *planstate; /* subselect plan's state tree */ + struct PlanState *parent; /* parent plan node's state tree */ ExprState *testexpr; /* state of combining expression */ List *args; /* states of argument expression(s) */ HeapTuple curTuple; /* copy of most recent tuple from subplan */ + Datum curArray; /* most recent array from ARRAY() subplan */ /* these are used when hashing the subselect's output: */ ProjectionInfo *projLeft; /* for projecting lefthand exprs */ ProjectionInfo *projRight; /* for projecting subselect output */ @@ -714,7 +733,7 @@ typedef struct SubPlanState TupleHashTable hashnulls; /* hash table for rows with null(s) */ bool havehashrows; /* TRUE if hashtable is not empty */ bool havenullrows; /* TRUE if hashnulls is not empty */ - MemoryContext hashtablecxt; /* memory context containing hash tables */ + MemoryContext hashtablecxt; /* memory context containing hash tables */ MemoryContext hashtempcxt; /* temp memory context for hash tables */ ExprContext *innerecontext; /* econtext for computing inner tuples */ AttrNumber *keyColIdx; /* control data for hash tables */ @@ -730,200 +749,18 @@ typedef struct SubPlanState */ typedef struct AlternativeSubPlanState { - ExprState xprstate; - List *subplans; /* states of alternative subplans */ + NodeTag type; + AlternativeSubPlan *subplan; /* expression plan node */ + List *subplans; /* SubPlanStates of alternative subplans */ int active; /* list index of the one we're using */ } AlternativeSubPlanState; -/* ---------------- - * FieldSelectState node - * ---------------- - */ -typedef struct FieldSelectState -{ - ExprState xprstate; - ExprState *arg; /* input expression */ - TupleDesc argdesc; /* tupdesc for most recent input */ -} FieldSelectState; - -/* ---------------- - * FieldStoreState node - * ---------------- - */ -typedef struct FieldStoreState -{ - ExprState xprstate; - ExprState *arg; /* input tuple value */ - List *newvals; /* new value(s) for field(s) */ - TupleDesc argdesc; /* tupdesc for most recent input */ -} FieldStoreState; - -/* ---------------- - * CoerceViaIOState node - * ---------------- - */ -typedef struct CoerceViaIOState -{ - ExprState xprstate; - ExprState *arg; /* input expression */ - FmgrInfo outfunc; /* lookup info for source output function */ - FmgrInfo infunc; /* lookup info for result input function */ - Oid intypioparam; /* argument needed for input function */ -} CoerceViaIOState; - -/* ---------------- - * ArrayCoerceExprState node - * ---------------- - */ -typedef struct ArrayCoerceExprState -{ - ExprState xprstate; - ExprState *arg; /* input array value */ - Oid resultelemtype; /* element type of result array */ - FmgrInfo elemfunc; /* lookup info for element coercion function */ - /* use struct pointer to avoid including array.h here */ - struct ArrayMapState *amstate; /* workspace for array_map */ -} ArrayCoerceExprState; - -/* ---------------- - * ConvertRowtypeExprState node - * ---------------- - */ -typedef struct ConvertRowtypeExprState -{ - ExprState xprstate; - ExprState *arg; /* input tuple value */ - TupleDesc indesc; /* tupdesc for source rowtype */ - TupleDesc outdesc; /* tupdesc for result rowtype */ - /* use "struct" so we needn't include tupconvert.h here */ - struct TupleConversionMap *map; - bool initialized; -} ConvertRowtypeExprState; - -/* ---------------- - * CaseExprState node - * ---------------- - */ -typedef struct CaseExprState -{ - ExprState xprstate; - ExprState *arg; /* implicit equality comparison argument */ - List *args; /* the arguments (list of WHEN clauses) */ - ExprState *defresult; /* the default result (ELSE clause) */ -} CaseExprState; - -/* ---------------- - * CaseWhenState node - * ---------------- - */ -typedef struct CaseWhenState -{ - ExprState xprstate; - ExprState *expr; /* condition expression */ - ExprState *result; /* substitution result */ -} CaseWhenState; - -/* ---------------- - * ArrayExprState node - * - * Note: ARRAY[] expressions always produce varlena arrays, never fixed-length - * arrays. - * ---------------- - */ -typedef struct ArrayExprState -{ - ExprState xprstate; - List *elements; /* states for child nodes */ - int16 elemlength; /* typlen of the array element type */ - bool elembyval; /* is the element type pass-by-value? */ - char elemalign; /* typalign of the element type */ -} ArrayExprState; - -/* ---------------- - * RowExprState node - * ---------------- - */ -typedef struct RowExprState -{ - ExprState xprstate; - List *args; /* the arguments */ - TupleDesc tupdesc; /* descriptor for result tuples */ -} RowExprState; - -/* ---------------- - * RowCompareExprState node - * ---------------- - */ -typedef struct RowCompareExprState -{ - ExprState xprstate; - List *largs; /* the left-hand input arguments */ - List *rargs; /* the right-hand input arguments */ - FmgrInfo *funcs; /* array of comparison function info */ -} RowCompareExprState; - -/* ---------------- - * CoalesceExprState node - * ---------------- - */ -typedef struct CoalesceExprState -{ - ExprState xprstate; - List *args; /* the arguments */ -} CoalesceExprState; - -/* ---------------- - * MinMaxExprState node - * ---------------- - */ -typedef struct MinMaxExprState -{ - ExprState xprstate; - List *args; /* the arguments */ - FmgrInfo cfunc; /* lookup info for comparison func */ -} MinMaxExprState; - -/* ---------------- - * XmlExprState node - * ---------------- - */ -typedef struct XmlExprState -{ - ExprState xprstate; - List *named_args; /* ExprStates for named arguments */ - List *args; /* ExprStates for other arguments */ -} XmlExprState; - -/* ---------------- - * NullTestState node - * ---------------- - */ -typedef struct NullTestState -{ - ExprState xprstate; - ExprState *arg; /* input expression */ - /* used only if input is of composite type: */ - TupleDesc argdesc; /* tupdesc for most recent input */ -} NullTestState; - -/* ---------------- - * CoerceToDomainState node - * ---------------- - */ -typedef struct CoerceToDomainState -{ - ExprState xprstate; - ExprState *arg; /* input expression */ - /* Cached list of constraints that need to be checked */ - List *constraints; /* list of DomainConstraintState nodes */ -} CoerceToDomainState; - /* * DomainConstraintState - one item to check during CoerceToDomain * - * Note: this is just a Node, and not an ExprState, because it has no - * corresponding Expr to link to. Nonetheless it is part of an ExprState - * tree, so we give it a name following the xxxState convention. + * Note: we consider this to be part of an ExprState tree, so we give it + * a name following the xxxState convention. But there's no directly + * associated plan-tree node. */ typedef enum DomainConstraintType { @@ -936,7 +773,8 @@ typedef struct DomainConstraintState NodeTag type; DomainConstraintType constrainttype; /* constraint type */ char *name; /* name of constraint (for error msgs) */ - ExprState *check_expr; /* for CHECK, a boolean expression */ + Expr *check_expr; /* for CHECK, a boolean expression */ + ExprState *check_exprstate; /* check_expr's eval state, or NULL */ } DomainConstraintState; @@ -965,16 +803,15 @@ typedef struct PlanState * nodes point to one EState for the whole * top-level plan */ - struct Instrumentation *instrument; /* Optional runtime stats for this - * plan node */ + Instrumentation *instrument; /* Optional runtime stats for this node */ + WorkerInstrumentation *worker_instrument; /* per-worker instrumentation */ /* * Common structural data for all Plan types. These links to subsidiary * state trees parallel links in the associated plan tree (except for the * subPlan list, which does not exist in the plan tree). */ - List *targetlist; /* target list to be computed at this node */ - List *qual; /* implicitly-ANDed qual conditions */ + ExprState *qual; /* boolean qual condition */ struct PlanState *lefttree; /* input plan tree(s) */ struct PlanState *righttree; List *initPlan; /* Init SubPlanState nodes (un-correlated expr @@ -992,8 +829,6 @@ typedef struct PlanState TupleTableSlot *ps_ResultTupleSlot; /* slot for my result tuples */ ExprContext *ps_ExprContext; /* node's expression-evaluation context */ ProjectionInfo *ps_ProjInfo; /* info for doing tuple projection */ - bool ps_TupFromTlist;/* state flag for processing set-valued - * functions in targetlist */ } PlanState; /* ---------------- @@ -1006,6 +841,18 @@ typedef struct PlanState #define innerPlanState(node) (((PlanState *)(node))->righttree) #define outerPlanState(node) (((PlanState *)(node))->lefttree) +/* Macros for inline access to certain instrumentation counters */ +#define InstrCountFiltered1(node, delta) \ + do { \ + if (((PlanState *)(node))->instrument) \ + ((PlanState *)(node))->instrument->nfiltered1 += (delta); \ + } while(0) +#define InstrCountFiltered2(node, delta) \ + do { \ + if (((PlanState *)(node))->instrument) \ + ((PlanState *)(node))->instrument->nfiltered2 += (delta); \ + } while(0) + /* * EPQState is state for executing an EvalPlanQual recheck on a candidate * tuple in ModifyTable or LockRows. The estate and planstate fields are @@ -1034,6 +881,22 @@ typedef struct ResultState bool rs_checkqual; /* do we need to check the qual? */ } ResultState; +/* ---------------- + * ProjectSetState information + * + * Note: at least one of the "elems" will be a SetExprState; the rest are + * regular ExprStates. + * ---------------- + */ +typedef struct ProjectSetState +{ + PlanState ps; /* its first field is NodeTag */ + Node **elems; /* array of expression states */ + ExprDoneCond *elemdone; /* array of per-SRF is-done states */ + int nelems; /* length of elemdone[] array */ + bool pending_srf_tuples; /* still evaluating srfs in tlist? */ +} ProjectSetState; + /* ---------------- * ModifyTableState information * ---------------- @@ -1041,13 +904,34 @@ typedef struct ResultState typedef struct ModifyTableState { PlanState ps; /* its first field is NodeTag */ - CmdType operation; + CmdType operation; /* INSERT, UPDATE, or DELETE */ + bool canSetTag; /* do we set the command tag/es_processed? */ + bool mt_done; /* are we done? */ PlanState **mt_plans; /* subplans (one per target rel) */ int mt_nplans; /* number of plans in the array */ int mt_whichplan; /* which one is being executed (0..n-1) */ + ResultRelInfo *resultRelInfo; /* per-subplan target relations */ List **mt_arowmarks; /* per-subplan ExecAuxRowMark lists */ EPQState mt_epqstate; /* for evaluating EvalPlanQual rechecks */ bool fireBSTriggers; /* do we need to fire stmt triggers? */ + OnConflictAction mt_onconflict; /* ON CONFLICT type */ + List *mt_arbiterindexes; /* unique index OIDs to arbitrate + * taking alt path */ + TupleTableSlot *mt_existing; /* slot to store existing target tuple in */ + List *mt_excludedtlist; /* the excluded pseudo relation's + * tlist */ + TupleTableSlot *mt_conflproj; /* CONFLICT ... SET ... projection + * target */ + struct PartitionDispatchData **mt_partition_dispatch_info; + /* Tuple-routing support info */ + int mt_num_dispatch; /* Number of entries in the above + * array */ + int mt_num_partitions; /* Number of members in the + * following arrays */ + ResultRelInfo *mt_partitions; /* Per partition result relation */ + TupleConversionMap **mt_partition_tupconv_maps; + /* Per partition tuple conversion map */ + TupleTableSlot *mt_partition_tuple_slot; } ModifyTableState; /* ---------------- @@ -1070,12 +954,10 @@ typedef struct AppendState * * nplans how many plans are in the array * nkeys number of sort key columns - * scankeys sort keys in ScanKey representation + * sortkeys sort keys in SortSupport representation * slots current output tuple of each subplan - * heap heap of active tuples (represented as array indexes) - * heap_size number of active heap entries + * heap heap of active tuples * initialized true if we have fetched first tuple from each subplan - * last_slot last subplan fetched from (which must be re-called) * ---------------- */ typedef struct MergeAppendState @@ -1084,12 +966,10 @@ typedef struct MergeAppendState PlanState **mergeplans; /* array of PlanStates for my inputs */ int ms_nplans; int ms_nkeys; - ScanKey ms_scankeys; /* array of length ms_nkeys */ + SortSupport ms_sortkeys; /* array of length ms_nkeys */ TupleTableSlot **ms_slots; /* array of length ms_nplans */ - int *ms_heap; /* array of length ms_nplans */ - int ms_heap_size; /* current active length of ms_heap[] */ - bool ms_initialized; /* are subplans started? */ - int ms_last_slot; /* last subplan slot we returned from */ + struct binaryheap *ms_heap; /* binary heap of slot indices */ + bool ms_initialized; /* are subplans started? */ } MergeAppendState; /* ---------------- @@ -1167,11 +1047,33 @@ typedef struct ScanState TupleTableSlot *ss_ScanTupleSlot; } ScanState; -/* - * SeqScan uses a bare ScanState as its state node, since it needs - * no additional fields. +/* ---------------- + * SeqScanState information + * ---------------- + */ +typedef struct SeqScanState +{ + ScanState ss; /* its first field is NodeTag */ + Size pscan_len; /* size of parallel heap scan descriptor */ +} SeqScanState; + +/* ---------------- + * SampleScanState information + * ---------------- */ -typedef ScanState SeqScanState; +typedef struct SampleScanState +{ + ScanState ss; + List *args; /* expr states for TABLESAMPLE params */ + ExprState *repeatable; /* expr state for REPEATABLE expr */ + /* use struct pointer to avoid including tsmapi.h here */ + struct TsmRoutine *tsmroutine; /* descriptor for tablesample method */ + void *tsm_state; /* tablesample method can keep state here */ + bool use_bulkread; /* use bulkread buffer access strategy? */ + bool use_pagemode; /* use page-at-a-time visibility checking? */ + bool begun; /* false means need to call BeginSampleScan */ + uint32 seed; /* random seed */ +} SampleScanState; /* * These structs store information about index quals that don't have simple @@ -1199,6 +1101,7 @@ typedef struct * IndexScanState information * * indexqualorig execution state for indexqualorig expressions + * indexorderbyorig execution state for indexorderbyorig expressions * ScanKeys Skey structures for index quals * NumScanKeys number of ScanKeys * OrderByKeys Skey structures for index ordering operators @@ -1209,12 +1112,22 @@ typedef struct * RuntimeContext expr context for evaling runtime Skeys * RelationDesc index relation descriptor * ScanDesc index scan descriptor + * + * ReorderQueue tuples that need reordering due to re-check + * ReachedEnd have we fetched all tuples from index already? + * OrderByValues values of ORDER BY exprs of last fetched tuple + * OrderByNulls null flags for OrderByValues + * SortSupport for reordering ORDER BY exprs + * OrderByTypByVals is the datatype of order by expression pass-by-value? + * OrderByTypLens typlens of the datatypes of order by expressions + * pscan_len size of parallel index scan descriptor * ---------------- */ typedef struct IndexScanState { ScanState ss; /* its first field is NodeTag */ - List *indexqualorig; + ExprState *indexqualorig; + List *indexorderbyorig; ScanKey iss_ScanKeys; int iss_NumScanKeys; ScanKey iss_OrderByKeys; @@ -1225,8 +1138,56 @@ typedef struct IndexScanState ExprContext *iss_RuntimeContext; Relation iss_RelationDesc; IndexScanDesc iss_ScanDesc; + + /* These are needed for re-checking ORDER BY expr ordering */ + pairingheap *iss_ReorderQueue; + bool iss_ReachedEnd; + Datum *iss_OrderByValues; + bool *iss_OrderByNulls; + SortSupport iss_SortSupport; + bool *iss_OrderByTypByVals; + int16 *iss_OrderByTypLens; + Size iss_PscanLen; } IndexScanState; +/* ---------------- + * IndexOnlyScanState information + * + * indexqual execution state for indexqual expressions + * ScanKeys Skey structures for index quals + * NumScanKeys number of ScanKeys + * OrderByKeys Skey structures for index ordering operators + * NumOrderByKeys number of OrderByKeys + * RuntimeKeys info about Skeys that must be evaluated at runtime + * NumRuntimeKeys number of RuntimeKeys + * RuntimeKeysReady true if runtime Skeys have been computed + * RuntimeContext expr context for evaling runtime Skeys + * RelationDesc index relation descriptor + * ScanDesc index scan descriptor + * VMBuffer buffer in use for visibility map testing, if any + * HeapFetches number of tuples we were forced to fetch from heap + * ioss_PscanLen Size of parallel index-only scan descriptor + * ---------------- + */ +typedef struct IndexOnlyScanState +{ + ScanState ss; /* its first field is NodeTag */ + ExprState *indexqual; + ScanKey ioss_ScanKeys; + int ioss_NumScanKeys; + ScanKey ioss_OrderByKeys; + int ioss_NumOrderByKeys; + IndexRuntimeKeyInfo *ioss_RuntimeKeys; + int ioss_NumRuntimeKeys; + bool ioss_RuntimeKeysReady; + ExprContext *ioss_RuntimeContext; + Relation ioss_RelationDesc; + IndexScanDesc ioss_ScanDesc; + Buffer ioss_VMBuffer; + long ioss_HeapFetches; + Size ioss_PscanLen; +} IndexOnlyScanState; + /* ---------------- * BitmapIndexScanState information * @@ -1259,6 +1220,51 @@ typedef struct BitmapIndexScanState IndexScanDesc biss_ScanDesc; } BitmapIndexScanState; +/* ---------------- + * SharedBitmapState information + * + * BM_INITIAL TIDBitmap creation is not yet started, so first worker + * to see this state will set the state to BM_INPROGRESS + * and that process will be responsible for creating + * TIDBitmap. + * BM_INPROGRESS TIDBitmap creation is in progress; workers need to + * sleep until it's finished. + * BM_FINISHED TIDBitmap creation is done, so now all workers can + * proceed to iterate over TIDBitmap. + * ---------------- + */ +typedef enum +{ + BM_INITIAL, + BM_INPROGRESS, + BM_FINISHED +} SharedBitmapState; + +/* ---------------- + * ParallelBitmapHeapState information + * tbmiterator iterator for scanning current pages + * prefetch_iterator iterator for prefetching ahead of current page + * mutex mutual exclusion for the prefetching variable + * and state + * prefetch_pages # pages prefetch iterator is ahead of current + * prefetch_target current target prefetch distance + * state current state of the TIDBitmap + * cv conditional wait variable + * phs_snapshot_data snapshot data shared to workers + * ---------------- + */ +typedef struct ParallelBitmapHeapState +{ + dsa_pointer tbmiterator; + dsa_pointer prefetch_iterator; + slock_t mutex; + int prefetch_pages; + int prefetch_target; + SharedBitmapState state; + ConditionVariable cv; + char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER]; +} ParallelBitmapHeapState; + /* ---------------- * BitmapHeapScanState information * @@ -1266,40 +1272,57 @@ typedef struct BitmapIndexScanState * tbm bitmap obtained from child index scan(s) * tbmiterator iterator for scanning current pages * tbmres current-page data + * exact_pages total number of exact pages retrieved + * lossy_pages total number of lossy pages retrieved * prefetch_iterator iterator for prefetching ahead of current page * prefetch_pages # pages prefetch iterator is ahead of current - * prefetch_target target prefetch distance + * prefetch_target current target prefetch distance + * prefetch_maximum maximum value for prefetch_target + * pscan_len size of the shared memory for parallel bitmap + * initialized is node is ready to iterate + * shared_tbmiterator shared iterator + * shared_prefetch_iterator shared iterator for prefetching + * pstate shared state for parallel bitmap scan * ---------------- */ typedef struct BitmapHeapScanState { ScanState ss; /* its first field is NodeTag */ - List *bitmapqualorig; + ExprState *bitmapqualorig; TIDBitmap *tbm; TBMIterator *tbmiterator; TBMIterateResult *tbmres; + long exact_pages; + long lossy_pages; TBMIterator *prefetch_iterator; int prefetch_pages; int prefetch_target; + int prefetch_maximum; + Size pscan_len; + bool initialized; + TBMSharedIterator *shared_tbmiterator; + TBMSharedIterator *shared_prefetch_iterator; + ParallelBitmapHeapState *pstate; } BitmapHeapScanState; /* ---------------- * TidScanState information * + * tidexprs list of TidExpr structs (see nodeTidscan.c) * isCurrentOf scan has a CurrentOfExpr qual * NumTids number of tids in this scan * TidPtr index of currently fetched tid * TidList evaluated item pointers (array of size NumTids) + * htup currently-fetched tuple, if any * ---------------- */ typedef struct TidScanState { ScanState ss; /* its first field is NodeTag */ - List *tss_tidquals; /* list of ExprState nodes */ + List *tss_tidexprs; bool tss_isCurrentOf; int tss_NumTids; int tss_TidPtr; - int tss_MarkTidPtr; ItemPointerData *tss_TidList; HeapTupleData tss_htup; } TidScanState; @@ -1324,18 +1347,28 @@ typedef struct SubqueryScanState * function appearing in FROM (typically a function returning set). * * eflags node's capability flags - * tupdesc expected return tuple description - * tuplestorestate private state of tuplestore.c - * funcexpr state for function expression being evaluated + * ordinality is this scan WITH ORDINALITY? + * simple true if we have 1 function and no ordinality + * ordinal current ordinal column value + * nfuncs number of functions being executed + * funcstates per-function execution states (private in + * nodeFunctionscan.c) + * argcontext memory context to evaluate function arguments in * ---------------- */ +struct FunctionScanPerFuncState; + typedef struct FunctionScanState { ScanState ss; /* its first field is NodeTag */ int eflags; - TupleDesc tupdesc; - Tuplestorestate *tuplestorestate; - ExprState *funcexpr; + bool ordinality; + bool simple; + int64 ordinal; + int nfuncs; + struct FunctionScanPerFuncState *funcstates; /* array of length + * nfuncs */ + MemoryContext argcontext; } FunctionScanState; /* ---------------- @@ -1347,7 +1380,6 @@ typedef struct FunctionScanState * exprlists array of expression lists being evaluated * array_len size of array * curr_idx current array index (0-based) - * marked_idx marked position (for mark/restore) * * Note: ss.ps.ps_ExprContext is used to evaluate any qual or projection * expressions attached to the node. We create a second ExprContext, @@ -1363,9 +1395,33 @@ typedef struct ValuesScanState List **exprlists; int array_len; int curr_idx; - int marked_idx; } ValuesScanState; +/* ---------------- + * TableFuncScanState node + * + * Used in table-expression functions like XMLTABLE. + * ---------------- + */ +typedef struct TableFuncScanState +{ + ScanState ss; /* its first field is NodeTag */ + ExprState *docexpr; /* state for document expression */ + ExprState *rowexpr; /* state for row-generating expression */ + List *colexprs; /* state for column-generating expression */ + List *coldefexprs; /* state for column default expressions */ + List *ns_names; /* list of str nodes with namespace names */ + List *ns_uris; /* list of states of namespace uri exprs */ + Bitmapset *notnulls; /* nullability flag for each output column */ + void *opaque; /* table builder private space */ + const struct TableFuncRoutine *routine; /* table builder methods */ + FmgrInfo *in_functions; /* input function for each column */ + Oid *typioparams; /* typioparam for each column */ + int64 ordinal; /* row number to be output next */ + MemoryContext perValueCxt; /* short life context for value evaluation */ + Tuplestorestate *tupstore; /* output tuple store */ +} TableFuncScanState; + /* ---------------- * CteScanState information * @@ -1393,7 +1449,7 @@ typedef struct CteScanState * WorkTableScanState information * * WorkTableScan nodes are used to scan the work table created by - * a RecursiveUnion node. We locate the RecursiveUnion node + * a RecursiveUnion node. We locate the RecursiveUnion node * during executor startup. * ---------------- */ @@ -1412,11 +1468,38 @@ typedef struct WorkTableScanState typedef struct ForeignScanState { ScanState ss; /* its first field is NodeTag */ + ExprState *fdw_recheck_quals; /* original quals not in ss.ps.qual */ + Size pscan_len; /* size of parallel coordination information */ /* use struct pointer to avoid including fdwapi.h here */ struct FdwRoutine *fdwroutine; void *fdw_state; /* foreign-data wrapper can keep state here */ } ForeignScanState; +/* ---------------- + * CustomScanState information + * + * CustomScan nodes are used to execute custom code within executor. + * + * Core code must avoid assuming that the CustomScanState is only as large as + * the structure declared here; providers are allowed to make it the first + * element in a larger structure, and typically would need to do so. The + * struct is actually allocated by the CreateCustomScanState method associated + * with the plan node. Any additional fields can be initialized there, or in + * the BeginCustomScan method. + * ---------------- + */ +struct CustomExecMethods; + +typedef struct CustomScanState +{ + ScanState ss; + uint32 flags; /* mask of CUSTOMPATH_* flags, see + * nodes/extensible.h */ + List *custom_ps; /* list of child PlanState nodes, if any */ + Size pscan_len; /* size of parallel coordination information */ + const struct CustomExecMethods *methods; +} CustomScanState; + /* ---------------------------------------------------------------- * Join State Information * ---------------------------------------------------------------- @@ -1432,7 +1515,7 @@ typedef struct JoinState { PlanState ps; JoinType jointype; - List *joinqual; /* JOIN quals (in addition to ps.qual) */ + ExprState *joinqual; /* JOIN quals (in addition to ps.qual) */ } JoinState; /* ---------------- @@ -1530,7 +1613,7 @@ typedef struct HashJoinTableData *HashJoinTable; typedef struct HashJoinState { JoinState js; /* its first field is NodeTag */ - List *hashclauses; /* list of ExprState nodes */ + ExprState *hashclauses; List *hj_OuterHashKeys; /* list of ExprState nodes */ List *hj_InnerHashKeys; /* list of ExprState nodes */ List *hj_HashOperators; /* list of operator OIDs */ @@ -1590,7 +1673,7 @@ typedef struct SortState /* --------------------- * GroupState information - * ------------------------- + * --------------------- */ typedef struct GroupState { @@ -1609,32 +1692,59 @@ typedef struct GroupState * input group during evaluation of an Agg node's output tuple(s). We * create a second ExprContext, tmpcontext, in which to evaluate input * expressions and run the aggregate transition functions. - * ------------------------- + * --------------------- */ /* these structs are private in nodeAgg.c: */ typedef struct AggStatePerAggData *AggStatePerAgg; +typedef struct AggStatePerTransData *AggStatePerTrans; typedef struct AggStatePerGroupData *AggStatePerGroup; +typedef struct AggStatePerPhaseData *AggStatePerPhase; typedef struct AggState { ScanState ss; /* its first field is NodeTag */ List *aggs; /* all Aggref nodes in targetlist & quals */ int numaggs; /* length of list (could be zero!) */ - FmgrInfo *eqfunctions; /* per-grouping-field equality fns */ + int numtrans; /* number of pertrans items */ + AggSplit aggsplit; /* agg-splitting mode, see nodes.h */ + AggStatePerPhase phase; /* pointer to current phase data */ + int numphases; /* number of phases */ + int current_phase; /* current phase number */ FmgrInfo *hashfunctions; /* per-grouping-field hash fns */ AggStatePerAgg peragg; /* per-Aggref information */ - MemoryContext aggcontext; /* memory context for long-lived data */ + AggStatePerTrans pertrans; /* per-Trans state information */ + ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */ ExprContext *tmpcontext; /* econtext for input expressions */ + AggStatePerTrans curpertrans; /* currently active trans state */ + bool input_done; /* indicates end of input */ bool agg_done; /* indicates completion of Agg scan */ + int projected_set; /* The last projected grouping set */ + int current_set; /* The current grouping set being evaluated */ + Bitmapset *grouped_cols; /* grouped cols in current projection */ + List *all_grouped_cols; /* list of all grouped cols in DESC + * order */ + /* These fields are for grouping set phase data */ + int maxsets; /* The max number of sets in any phase */ + AggStatePerPhase phases; /* array of all phases */ + Tuplesortstate *sort_in; /* sorted input to phases > 0 */ + Tuplesortstate *sort_out; /* input is copied here for next phase */ + TupleTableSlot *sort_slot; /* slot for sort results */ /* these fields are used in AGG_PLAIN and AGG_SORTED modes: */ AggStatePerGroup pergroup; /* per-Aggref-per-group working state */ HeapTuple grp_firstTuple; /* copy of first tuple of current group */ /* these fields are used in AGG_HASHED mode: */ TupleHashTable hashtable; /* hash table with one entry per group */ TupleTableSlot *hashslot; /* slot for loading hash table */ - List *hash_needed; /* list of columns needed in hash table */ + int numhashGrpCols; /* number of columns in hash table */ + int largestGrpColIdx; /* largest column required for hashing */ + AttrNumber *hashGrpColIdxInput; /* and their indices in input slot */ + AttrNumber *hashGrpColIdxHash; /* indices for execGrouping in hashtbl */ bool table_filled; /* hash table filled yet? */ TupleHashIterator hashiter; /* for iterating through hash table */ + /* support for evaluation of agg inputs */ + TupleTableSlot *evalslot; /* slot for agg inputs */ + ProjectionInfo *evalproj; /* projection machinery */ + TupleDesc evaldesc; /* descriptor of input tuples */ } AggState; /* ---------------- @@ -1677,7 +1787,8 @@ typedef struct WindowAggState Datum endOffsetValue; /* result of endOffset evaluation */ MemoryContext partcontext; /* context for partition-lifespan data */ - MemoryContext aggcontext; /* context for each aggregate data */ + MemoryContext aggcontext; /* shared context for aggregate working data */ + MemoryContext curaggcontext; /* current aggregate's working data */ ExprContext *tmpcontext; /* short-term evaluation context */ bool all_first; /* true if the scan is starting */ @@ -1705,7 +1816,7 @@ typedef struct WindowAggState * UniqueState information * * Unique nodes are used "on top of" sort nodes to discard - * duplicate tuples returned from the sort phase. Basically + * duplicate tuples returned from the sort phase. Basically * all it does is compare the current tuple from the subplan * with the previously fetched tuple (stored in its result slot). * If the two are identical in all interesting fields, then @@ -1719,6 +1830,55 @@ typedef struct UniqueState MemoryContext tempContext; /* short-term context for comparisons */ } UniqueState; +/* ---------------- + * GatherState information + * + * Gather nodes launch 1 or more parallel workers, run a subplan + * in those workers, and collect the results. + * ---------------- + */ +typedef struct GatherState +{ + PlanState ps; /* its first field is NodeTag */ + bool initialized; + struct ParallelExecutorInfo *pei; + int nreaders; + int nextreader; + int nworkers_launched; + struct TupleQueueReader **reader; + TupleTableSlot *funnel_slot; + bool need_to_scan_locally; +} GatherState; + +/* ---------------- + * GatherMergeState information + * + * Gather merge nodes launch 1 or more parallel workers, run a + * subplan which produces sorted output in each worker, and then + * merge the results into a single sorted stream. + * ---------------- + */ +struct GMReaderTuple; + +typedef struct GatherMergeState +{ + PlanState ps; /* its first field is NodeTag */ + bool initialized; + struct ParallelExecutorInfo *pei; + int nreaders; + int nworkers_launched; + struct TupleQueueReader **reader; + TupleDesc tupDesc; + TupleTableSlot **gm_slots; + struct binaryheap *gm_heap; /* binary heap of slot indices */ + bool gm_initialized; /* gather merge initilized ? */ + bool need_to_scan_locally; + int gm_nkeys; + SortSupport gm_sortkeys; /* array of length ms_nkeys */ + struct GMReaderTupleBuffer *gm_tuple_buffers; /* tuple buffer per + * reader */ +} GatherMergeState; + /* ---------------- * HashState information * ---------------- @@ -1764,7 +1924,7 @@ typedef struct SetOpState /* ---------------- * LockRowsState information * - * LockRows nodes are used to enforce FOR UPDATE/FOR SHARE locking. + * LockRows nodes are used to enforce FOR [KEY] UPDATE/SHARE locking. * ---------------- */ typedef struct LockRowsState @@ -1772,6 +1932,8 @@ typedef struct LockRowsState PlanState ps; /* its first field is NodeTag */ List *lr_arowMarks; /* List of ExecAuxRowMarks */ EPQState lr_epqstate; /* for evaluating EvalPlanQual rechecks */ + HeapTuple *lr_curtuples; /* locked tuples (one entry per RT entry) */ + int lr_ntables; /* length of lr_curtuples[] array */ } LockRowsState; /* ----------------