From d73f4c74dd34b19c19839f7ae09fb96442728509 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 4 Oct 2018 15:48:17 -0400 Subject: [PATCH] In the executor, use an array of pointers to access the rangetable. Instead of doing a lot of list_nth() accesses to es_range_table, create a flattened pointer array during executor startup and index into that to get at individual RangeTblEntrys. This eliminates one source of O(N^2) behavior with lots of partitions. (I'm not exactly convinced that it's the most important source, but it's an easy one to fix.) Amit Langote and David Rowley Discussion: https://postgr.es/m/468c85d9-540e-66a2-1dde-fec2b741e688@lab.ntt.co.jp --- contrib/postgres_fdw/postgres_fdw.c | 12 +++--- src/backend/commands/copy.c | 5 +-- src/backend/commands/trigger.c | 2 +- src/backend/executor/execExprInterp.c | 6 +-- src/backend/executor/execMain.c | 34 +++++++--------- src/backend/executor/execUtils.c | 49 ++++++++++++++++++++++-- src/backend/executor/nodeLockRows.c | 2 +- src/backend/replication/logical/worker.c | 3 +- src/include/executor/executor.h | 9 +++++ src/include/nodes/execnodes.h | 7 +++- src/include/parser/parsetree.h | 10 ----- 11 files changed, 87 insertions(+), 52 deletions(-) diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index c02287a55c..b91b7d27de 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -1345,7 +1345,7 @@ postgresBeginForeignScan(ForeignScanState *node, int eflags) rtindex = fsplan->scan.scanrelid; else rtindex = bms_next_member(fsplan->fs_relids, -1); - rte = rt_fetch(rtindex, estate->es_range_table); + rte = exec_rt_fetch(rtindex, estate); userid = rte->checkAsUser ? rte->checkAsUser : GetUserId(); /* Get info about foreign table. */ @@ -1731,8 +1731,8 @@ postgresBeginForeignModify(ModifyTableState *mtstate, FdwModifyPrivateRetrievedAttrs); /* Find RTE. */ - rte = rt_fetch(resultRelInfo->ri_RangeTableIndex, - mtstate->ps.state->es_range_table); + rte = exec_rt_fetch(resultRelInfo->ri_RangeTableIndex, + mtstate->ps.state); /* Construct an execution state. */ fmstate = create_foreign_modify(mtstate->ps.state, @@ -2036,7 +2036,7 @@ postgresBeginForeignInsert(ModifyTableState *mtstate, * correspond to this partition if it is one of the UPDATE subplan target * rels; in that case, we can just use the existing RTE as-is. */ - rte = list_nth(estate->es_range_table, resultRelation - 1); + rte = exec_rt_fetch(resultRelation, estate); if (rte->relid != RelationGetRelid(rel)) { rte = copyObject(rte); @@ -2396,7 +2396,7 @@ postgresBeginDirectModify(ForeignScanState *node, int eflags) * ExecCheckRTEPerms() does. */ rtindex = estate->es_result_relation_info->ri_RangeTableIndex; - rte = rt_fetch(rtindex, estate->es_range_table); + rte = exec_rt_fetch(rtindex, estate); userid = rte->checkAsUser ? rte->checkAsUser : GetUserId(); /* Get info about foreign table. */ @@ -5752,7 +5752,7 @@ conversion_error_callback(void *arg) RangeTblEntry *rte; Var *var = (Var *) tle->expr; - rte = rt_fetch(var->varno, estate->es_range_table); + rte = exec_rt_fetch(var->varno, estate); if (var->varattno == 0) is_wholerow = true; diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index df98e4ac62..86b0fb300f 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -2484,9 +2484,8 @@ CopyFrom(CopyState cstate) estate->es_result_relations = resultRelInfo; estate->es_num_result_relations = 1; estate->es_result_relation_info = resultRelInfo; - estate->es_range_table = cstate->range_table; - estate->es_relations = (Relation *) palloc0(list_length(cstate->range_table) * - sizeof(Relation)); + + ExecInitRangeTable(estate, cstate->range_table); /* Set up a tuple slot too */ myslot = ExecInitExtraTupleSlot(estate, tupDesc); diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 136f9f0627..240e85e391 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -75,7 +75,7 @@ static int MyTriggerDepth = 0; * to be changed, however. */ #define GetUpdatedColumns(relinfo, estate) \ - (rt_fetch((relinfo)->ri_RangeTableIndex, (estate)->es_range_table)->updatedCols) + (exec_rt_fetch((relinfo)->ri_RangeTableIndex, estate)->updatedCols) /* Local function prototypes */ static void ConvertTriggerToFK(CreateTrigStmt *stmt, Oid funcoid); diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index c549e3db5d..1b0946b02f 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -3934,10 +3934,10 @@ ExecEvalWholeRowVar(ExprState *state, ExprEvalStep *op, ExprContext *econtext) * perhaps other places.) */ if (econtext->ecxt_estate && - variable->varno <= list_length(econtext->ecxt_estate->es_range_table)) + variable->varno <= econtext->ecxt_estate->es_range_table_size) { - RangeTblEntry *rte = rt_fetch(variable->varno, - econtext->ecxt_estate->es_range_table); + RangeTblEntry *rte = exec_rt_fetch(variable->varno, + econtext->ecxt_estate); if (rte->eref) ExecTypeSetColNames(output_tupdesc, rte->eref->colnames); diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 8cbd75ed7a..71b720eec9 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -109,9 +109,9 @@ static void EvalPlanQualStart(EPQState *epqstate, EState *parentestate, * to be changed, however. */ #define GetInsertedColumns(relinfo, estate) \ - (rt_fetch((relinfo)->ri_RangeTableIndex, (estate)->es_range_table)->insertedCols) + (exec_rt_fetch((relinfo)->ri_RangeTableIndex, estate)->insertedCols) #define GetUpdatedColumns(relinfo, estate) \ - (rt_fetch((relinfo)->ri_RangeTableIndex, (estate)->es_range_table)->updatedCols) + (exec_rt_fetch((relinfo)->ri_RangeTableIndex, estate)->updatedCols) /* end of local decls */ @@ -823,15 +823,7 @@ InitPlan(QueryDesc *queryDesc, int eflags) /* * initialize the node's execution state */ - estate->es_range_table = rangeTable; - - /* - * Allocate an array to store an open Relation corresponding to each - * rangeTable item, and initialize entries to NULL. Relations are opened - * and stored here as needed. - */ - estate->es_relations = (Relation *) palloc0(list_length(rangeTable) * - sizeof(Relation)); + ExecInitRangeTable(estate, rangeTable); estate->es_plannedstmt = plannedstmt; @@ -918,9 +910,9 @@ InitPlan(QueryDesc *queryDesc, int eflags) resultRelIndex)) { Relation resultRelDesc; + Oid reloid = exec_rt_fetch(resultRelIndex, estate)->relid; - resultRelDesc = heap_open(getrelid(resultRelIndex, rangeTable), - NoLock); + resultRelDesc = heap_open(reloid, NoLock); Assert(CheckRelationLockedByMe(resultRelDesc, RowExclusiveLock, true)); heap_close(resultRelDesc, NoLock); } @@ -962,7 +954,7 @@ InitPlan(QueryDesc *queryDesc, int eflags) continue; /* get relation's OID (will produce InvalidOid if subquery) */ - relid = getrelid(rc->rti, rangeTable); + relid = exec_rt_fetch(rc->rti, estate)->relid; switch (rc->markType) { @@ -1619,8 +1611,8 @@ static void ExecEndPlan(PlanState *planstate, EState *estate) { ResultRelInfo *resultRelInfo; - int num_relations; - int i; + Index num_relations; + Index i; ListCell *l; /* @@ -1661,7 +1653,7 @@ ExecEndPlan(PlanState *planstate, EState *estate) * close whatever rangetable Relations have been opened. We did not * acquire locks in ExecGetRangeTableRelation, so don't release 'em here. */ - num_relations = list_length(estate->es_range_table); + num_relations = estate->es_range_table_size; for (i = 0; i < num_relations; i++) { if (estate->es_relations[i]) @@ -3087,7 +3079,7 @@ EvalPlanQualBegin(EPQState *epqstate, EState *parentestate) /* * We already have a suitable child EPQ tree, so just reset it. */ - int rtsize = list_length(parentestate->es_range_table); + Index rtsize = parentestate->es_range_table_size; PlanState *planstate = epqstate->planstate; MemSet(estate->es_epqScanDone, 0, rtsize * sizeof(bool)); @@ -3136,11 +3128,11 @@ static void EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree) { EState *estate; - int rtsize; + Index rtsize; MemoryContext oldcontext; ListCell *l; - rtsize = list_length(parentestate->es_range_table); + rtsize = parentestate->es_range_table_size; epqstate->estate = estate = CreateExecutorState(); @@ -3164,6 +3156,8 @@ EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree) estate->es_snapshot = parentestate->es_snapshot; estate->es_crosscheck_snapshot = parentestate->es_crosscheck_snapshot; estate->es_range_table = parentestate->es_range_table; + estate->es_range_table_array = parentestate->es_range_table_array; + estate->es_range_table_size = parentestate->es_range_table_size; estate->es_relations = parentestate->es_relations; estate->es_plannedstmt = parentestate->es_plannedstmt; estate->es_junkFilter = parentestate->es_junkFilter; diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index a89ef2a482..b75062f74c 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -26,6 +26,8 @@ * * ExecOpenScanRelation Common code for scan node init routines. * + * ExecInitRangeTable Set up executor's range-table-related data. + * * ExecGetRangeTableRelation Fetch Relation for a rangetable entry. * * executor_errposition Report syntactic position of an error. @@ -108,6 +110,8 @@ CreateExecutorState(void) estate->es_snapshot = InvalidSnapshot; /* caller must initialize this */ estate->es_crosscheck_snapshot = InvalidSnapshot; /* no crosscheck */ estate->es_range_table = NIL; + estate->es_range_table_array = NULL; + estate->es_range_table_size = 0; estate->es_relations = NULL; estate->es_plannedstmt = NULL; @@ -670,6 +674,43 @@ ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags) return rel; } +/* + * ExecInitRangeTable + * Set up executor's range-table-related data + * + * We build an array from the range table list to allow faster lookup by RTI. + * (The es_range_table field is now somewhat redundant, but we keep it to + * avoid breaking external code unnecessarily.) + * This is also a convenient place to set up the parallel es_relations array. + */ +void +ExecInitRangeTable(EState *estate, List *rangeTable) +{ + Index rti; + ListCell *lc; + + /* Remember the range table List as-is */ + estate->es_range_table = rangeTable; + + /* Set up the equivalent array representation */ + estate->es_range_table_size = list_length(rangeTable); + estate->es_range_table_array = (RangeTblEntry **) + palloc(estate->es_range_table_size * sizeof(RangeTblEntry *)); + rti = 0; + foreach(lc, rangeTable) + { + estate->es_range_table_array[rti++] = lfirst_node(RangeTblEntry, lc); + } + + /* + * Allocate an array to store an open Relation corresponding to each + * rangetable entry, and initialize entries to NULL. Relations are opened + * and stored here as needed. + */ + estate->es_relations = (Relation *) + palloc0(estate->es_range_table_size * sizeof(Relation)); +} + /* * ExecGetRangeTableRelation * Open the Relation for a range table entry, if not already done @@ -681,13 +722,13 @@ ExecGetRangeTableRelation(EState *estate, Index rti) { Relation rel; - Assert(rti > 0 && rti <= list_length(estate->es_range_table)); + Assert(rti > 0 && rti <= estate->es_range_table_size); rel = estate->es_relations[rti - 1]; if (rel == NULL) { /* First time through, so open the relation */ - RangeTblEntry *rte = rt_fetch(rti, estate->es_range_table); + RangeTblEntry *rte = exec_rt_fetch(rti, estate); Assert(rte->rtekind == RTE_RELATION); @@ -876,7 +917,7 @@ ExecLockNonLeafAppendTables(List *partitioned_rels, EState *estate) ListCell *l; Index rti = lfirst_int(lc); bool is_result_rel = false; - Oid relid = getrelid(rti, estate->es_range_table); + Oid relid = exec_rt_fetch(rti, estate)->relid; /* If this is a result relation, already locked in InitPlan */ foreach(l, stmt->nonleafResultRelations) @@ -911,7 +952,7 @@ ExecLockNonLeafAppendTables(List *partitioned_rels, EState *estate) else lockmode = AccessShareLock; - Assert(lockmode == rt_fetch(rti, estate->es_range_table)->rellockmode); + Assert(lockmode == exec_rt_fetch(rti, estate)->rellockmode); LockRelationOid(relid, lockmode); } diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c index 30de8a95ab..6db345ae7a 100644 --- a/src/backend/executor/nodeLockRows.c +++ b/src/backend/executor/nodeLockRows.c @@ -400,7 +400,7 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags) /* * Create workspace in which we can remember per-RTE locked tuples */ - lrstate->lr_ntables = list_length(estate->es_range_table); + lrstate->lr_ntables = estate->es_range_table_size; lrstate->lr_curtuples = (HeapTuple *) palloc0(lrstate->lr_ntables * sizeof(HeapTuple)); diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index c3443c5a2c..277da69fa6 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -200,8 +200,7 @@ create_estate_for_relation(LogicalRepRelMapEntry *rel) rte->relid = RelationGetRelid(rel->localrel); rte->relkind = rel->localrel->rd_rel->relkind; rte->rellockmode = AccessShareLock; - estate->es_range_table = list_make1(rte); - estate->es_relations = (Relation *) palloc0(1 * sizeof(Relation)); + ExecInitRangeTable(estate, list_make1(rte)); resultRelInfo = makeNode(ResultRelInfo); InitResultRelInfo(resultRelInfo, rel->localrel, 1, NULL, 0); diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 83ce989e3a..830f42dfe3 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -514,6 +514,15 @@ extern bool ExecRelationIsTargetRelation(EState *estate, Index scanrelid); extern Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags); +extern void ExecInitRangeTable(EState *estate, List *rangeTable); + +static inline RangeTblEntry * +exec_rt_fetch(Index rti, EState *estate) +{ + Assert(rti > 0 && rti <= estate->es_range_table_size); + return estate->es_range_table_array[rti - 1]; +} + extern Relation ExecGetRangeTableRelation(EState *estate, Index rti); extern int executor_errposition(EState *estate, int location); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 35646231a4..657b593663 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -36,6 +36,7 @@ struct PlanState; /* forward references in this file */ struct ParallelHashJoinState; struct ExprState; struct ExprContext; +struct RangeTblEntry; /* avoid including parsenodes.h here */ struct ExprEvalStep; /* avoid including execExpr.h everywhere */ @@ -486,7 +487,9 @@ typedef struct EState Snapshot es_snapshot; /* time qual to use */ Snapshot es_crosscheck_snapshot; /* crosscheck time qual for RI */ List *es_range_table; /* List of RangeTblEntry */ - Relation *es_relations; /* Array of per-es_range_table-entry Relation + struct RangeTblEntry **es_range_table_array; /* equivalent array */ + Index es_range_table_size; /* size of the range table arrays */ + Relation *es_relations; /* Array of per-range-table-entry Relation * pointers, or NULL if not yet opened */ PlannedStmt *es_plannedstmt; /* link to top of plan tree */ const char *es_sourceText; /* Source text from QueryDesc */ @@ -563,7 +566,7 @@ typedef struct EState * return, or NULL if nothing to return; es_epqTupleSet[] is true if a * particular array entry is valid; and es_epqScanDone[] is state to * remember if the tuple has been returned already. Arrays are of size - * list_length(es_range_table) and are indexed by scan node scanrelid - 1. + * es_range_table_size and are indexed by scan node scanrelid - 1. */ HeapTuple *es_epqTuple; /* array of EPQ substitute tuples */ bool *es_epqTupleSet; /* true if EPQ tuple is provided */ diff --git a/src/include/parser/parsetree.h b/src/include/parser/parsetree.h index dd9ae658ac..fe16d7d1fa 100644 --- a/src/include/parser/parsetree.h +++ b/src/include/parser/parsetree.h @@ -31,16 +31,6 @@ #define rt_fetch(rangetable_index, rangetable) \ ((RangeTblEntry *) list_nth(rangetable, (rangetable_index)-1)) -/* - * getrelid - * - * Given the range index of a relation, return the corresponding - * relation OID. Note that InvalidOid will be returned if the - * RTE is for a non-relation-type RTE. - */ -#define getrelid(rangeindex,rangetable) \ - (rt_fetch(rangeindex, rangetable)->relid) - /* * Given an RTE and an attribute number, return the appropriate * variable name or alias for that attribute of that RTE. -- 2.40.0