From: Tom Lane Date: Tue, 11 Oct 2011 18:20:06 +0000 (-0400) Subject: Rearrange the implementation of index-only scans. X-Git-Tag: REL9_2_BETA1~998 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a0185461dd94c8d31d8d55a7f2839b0d2f172ab9;p=postgresql Rearrange the implementation of index-only scans. This commit changes index-only scans so that data is read directly from the index tuple without first generating a faux heap tuple. The only immediate benefit is that indexes on system columns (such as OID) can be used in index-only scans, but this is necessary infrastructure if we are ever to support index-only scans on expression indexes. The executor is now ready for that, though the planner still needs substantial work to recognize the possibility. To do this, Vars in index-only plan nodes have to refer to index columns not heap columns. I introduced a new special varno, INDEX_VAR, to mark such Vars to avoid confusion. (In passing, this commit renames the two existing special varnos to OUTER_VAR and INNER_VAR.) This allows ruleutils.c to handle them with logic similar to what we use for subplan reference Vars. Since index-only scans are now fundamentally different from regular indexscans so far as their expression subtrees are concerned, I also chose to change them to have their own plan node type (and hence, their own executor source file). --- diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index fbcaf6cbe0..e38de5c153 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -79,6 +79,8 @@ static void show_instrumentation_count(const char *qlabel, int which, PlanState *planstate, ExplainState *es); static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es); static const char *explain_get_index_name(Oid indexId); +static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, + ExplainState *es); static void ExplainScanTarget(Scan *plan, ExplainState *es); static void ExplainModifyTarget(ModifyTable *plan, ExplainState *es); static void ExplainTargetRel(Plan *plan, Index rti, ExplainState *es); @@ -656,10 +658,10 @@ ExplainNode(PlanState *planstate, List *ancestors, pname = sname = "Seq Scan"; break; case T_IndexScan: - if (((IndexScan *) plan)->indexonly) - pname = sname = "Index Only Scan"; - else - pname = sname = "Index Scan"; + pname = sname = "Index Scan"; + break; + case T_IndexOnlyScan: + pname = sname = "Index Only Scan"; break; case T_BitmapIndexScan: pname = sname = "Bitmap Index Scan"; @@ -793,42 +795,6 @@ ExplainNode(PlanState *planstate, List *ancestors, switch (nodeTag(plan)) { - case T_IndexScan: - { - IndexScan *indexscan = (IndexScan *) plan; - const char *indexname = - explain_get_index_name(indexscan->indexid); - - if (es->format == EXPLAIN_FORMAT_TEXT) - { - if (ScanDirectionIsBackward(indexscan->indexorderdir)) - appendStringInfoString(es->str, " Backward"); - appendStringInfo(es->str, " using %s", indexname); - } - else - { - const char *scandir; - - switch (indexscan->indexorderdir) - { - case BackwardScanDirection: - scandir = "Backward"; - break; - case NoMovementScanDirection: - scandir = "NoMovement"; - break; - case ForwardScanDirection: - scandir = "Forward"; - break; - default: - scandir = "???"; - break; - } - ExplainPropertyText("Scan Direction", scandir, es); - ExplainPropertyText("Index Name", indexname, es); - } - } - /* FALL THRU */ case T_SeqScan: case T_BitmapHeapScan: case T_TidScan: @@ -840,6 +806,26 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_ForeignScan: ExplainScanTarget((Scan *) plan, es); break; + case T_IndexScan: + { + IndexScan *indexscan = (IndexScan *) plan; + + ExplainIndexScanDetails(indexscan->indexid, + indexscan->indexorderdir, + es); + ExplainScanTarget((Scan *) indexscan, es); + } + break; + case T_IndexOnlyScan: + { + IndexOnlyScan *indexonlyscan = (IndexOnlyScan *) plan; + + ExplainIndexScanDetails(indexonlyscan->indexid, + indexonlyscan->indexorderdir, + es); + ExplainScanTarget((Scan *) indexonlyscan, es); + } + break; case T_BitmapIndexScan: { BitmapIndexScan *bitmapindexscan = (BitmapIndexScan *) plan; @@ -1014,6 +1000,19 @@ ExplainNode(PlanState *planstate, List *ancestors, show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); break; + case T_IndexOnlyScan: + show_scan_qual(((IndexOnlyScan *) plan)->indexqual, + "Index Cond", planstate, ancestors, es); + if (((IndexOnlyScan *) plan)->indexqual) + show_instrumentation_count("Rows Removed by Index Recheck", 2, + planstate, es); + show_scan_qual(((IndexOnlyScan *) plan)->indexorderby, + "Order By", planstate, ancestors, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; case T_BitmapIndexScan: show_scan_qual(((BitmapIndexScan *) plan)->indexqualorig, "Index Cond", planstate, ancestors, es); @@ -1626,6 +1625,45 @@ explain_get_index_name(Oid indexId) return result; } +/* + * Add some additional details about an IndexScan or IndexOnlyScan + */ +static void +ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, + ExplainState *es) +{ + const char *indexname = explain_get_index_name(indexid); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (ScanDirectionIsBackward(indexorderdir)) + appendStringInfoString(es->str, " Backward"); + appendStringInfo(es->str, " using %s", indexname); + } + else + { + const char *scandir; + + switch (indexorderdir) + { + case BackwardScanDirection: + scandir = "Backward"; + break; + case NoMovementScanDirection: + scandir = "NoMovement"; + break; + case ForwardScanDirection: + scandir = "Forward"; + break; + default: + scandir = "???"; + break; + } + ExplainPropertyText("Scan Direction", scandir, es); + ExplainPropertyText("Index Name", indexname, es); + } +} + /* * Show the target of a Scan node */ @@ -1670,6 +1708,7 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es) { case T_SeqScan: case T_IndexScan: + case T_IndexOnlyScan: case T_BitmapHeapScan: case T_TidScan: case T_ForeignScan: diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 06d368e077..9fb9754848 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -2734,9 +2734,9 @@ TriggerEnabled(EState *estate, ResultRelInfo *relinfo, oldContext = MemoryContextSwitchTo(estate->es_query_cxt); tgqual = stringToNode(trigger->tgqual); - /* Change references to OLD and NEW to INNER and OUTER */ - ChangeVarNodes(tgqual, PRS2_OLD_VARNO, INNER, 0); - ChangeVarNodes(tgqual, PRS2_NEW_VARNO, OUTER, 0); + /* Change references to OLD and NEW to INNER_VAR and OUTER_VAR */ + ChangeVarNodes(tgqual, PRS2_OLD_VARNO, INNER_VAR, 0); + ChangeVarNodes(tgqual, PRS2_NEW_VARNO, OUTER_VAR, 0); /* ExecQual wants implicit-AND form */ tgqual = (Node *) make_ands_implicit((Expr *) tgqual); *predicate = (List *) ExecPrepareExpr((Expr *) tgqual, estate); @@ -2783,7 +2783,7 @@ TriggerEnabled(EState *estate, ResultRelInfo *relinfo, /* * Finally evaluate the expression, making the old and/or new tuples - * available as INNER/OUTER respectively. + * available as INNER_VAR/OUTER_VAR respectively. */ econtext->ecxt_innertuple = oldslot; econtext->ecxt_outertuple = newslot; diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile index a854c9a5dc..6081b56c08 100644 --- a/src/backend/executor/Makefile +++ b/src/backend/executor/Makefile @@ -17,7 +17,8 @@ OBJS = execAmi.o execCurrent.o execGrouping.o execJunk.o execMain.o \ execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o \ nodeBitmapAnd.o nodeBitmapOr.o \ nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeHash.o \ - nodeHashjoin.o nodeIndexscan.o nodeLimit.o nodeLockRows.o \ + nodeHashjoin.o nodeIndexscan.o nodeIndexonlyscan.o \ + nodeLimit.o nodeLockRows.o \ nodeMaterial.o nodeMergeAppend.o nodeMergejoin.o nodeModifyTable.o \ nodeNestloop.o nodeFunctionscan.o nodeRecursiveunion.o nodeResult.o \ nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \ diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 711e8c7786..fa27640fed 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -26,6 +26,7 @@ #include "executor/nodeGroup.h" #include "executor/nodeHash.h" #include "executor/nodeHashjoin.h" +#include "executor/nodeIndexonlyscan.h" #include "executor/nodeIndexscan.h" #include "executor/nodeLimit.h" #include "executor/nodeLockRows.h" @@ -155,6 +156,10 @@ ExecReScan(PlanState *node) ExecReScanIndexScan((IndexScanState *) node); break; + case T_IndexOnlyScanState: + ExecReScanIndexOnlyScan((IndexOnlyScanState *) node); + break; + case T_BitmapIndexScanState: ExecReScanBitmapIndexScan((BitmapIndexScanState *) node); break; @@ -273,6 +278,10 @@ ExecMarkPos(PlanState *node) ExecIndexMarkPos((IndexScanState *) node); break; + case T_IndexOnlyScanState: + ExecIndexOnlyMarkPos((IndexOnlyScanState *) node); + break; + case T_TidScanState: ExecTidMarkPos((TidScanState *) node); break; @@ -326,6 +335,10 @@ ExecRestrPos(PlanState *node) ExecIndexRestrPos((IndexScanState *) node); break; + case T_IndexOnlyScanState: + ExecIndexOnlyRestrPos((IndexOnlyScanState *) node); + break; + case T_TidScanState: ExecTidRestrPos((TidScanState *) node); break; @@ -371,6 +384,7 @@ ExecSupportsMarkRestore(NodeTag plantype) { case T_SeqScan: case T_IndexScan: + case T_IndexOnlyScan: case T_TidScan: case T_ValuesScan: case T_Material: @@ -442,6 +456,10 @@ ExecSupportsBackwardScan(Plan *node) return IndexSupportsBackwardScan(((IndexScan *) node)->indexid) && TargetListSupportsBackwardScan(node->targetlist); + case T_IndexOnlyScan: + return IndexSupportsBackwardScan(((IndexOnlyScan *) node)->indexid) && + TargetListSupportsBackwardScan(node->targetlist); + case T_SubqueryScan: return ExecSupportsBackwardScan(((SubqueryScan *) node)->subplan) && TargetListSupportsBackwardScan(node->targetlist); @@ -474,7 +492,8 @@ TargetListSupportsBackwardScan(List *targetlist) } /* - * An IndexScan node supports backward scan only if the index's AM does. + * An IndexScan or IndexOnlyScan node supports backward scan only if the + * index's AM does. */ static bool IndexSupportsBackwardScan(Oid indexid) diff --git a/src/backend/executor/execCurrent.c b/src/backend/executor/execCurrent.c index 61a5f47112..5d70ad60de 100644 --- a/src/backend/executor/execCurrent.c +++ b/src/backend/executor/execCurrent.c @@ -262,6 +262,7 @@ search_plan_tree(PlanState *node, Oid table_oid) */ case T_SeqScanState: case T_IndexScanState: + case T_IndexOnlyScanState: case T_BitmapHeapScanState: case T_TidScanState: { diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 284fc6a63b..8ab9892c85 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -89,6 +89,7 @@ #include "executor/nodeGroup.h" #include "executor/nodeHash.h" #include "executor/nodeHashjoin.h" +#include "executor/nodeIndexonlyscan.h" #include "executor/nodeIndexscan.h" #include "executor/nodeLimit.h" #include "executor/nodeLockRows.h" @@ -192,6 +193,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags) estate, eflags); break; + case T_IndexOnlyScan: + result = (PlanState *) ExecInitIndexOnlyScan((IndexOnlyScan *) node, + estate, eflags); + break; + case T_BitmapIndexScan: result = (PlanState *) ExecInitBitmapIndexScan((BitmapIndexScan *) node, estate, eflags); @@ -397,6 +403,10 @@ ExecProcNode(PlanState *node) result = ExecIndexScan((IndexScanState *) node); break; + case T_IndexOnlyScanState: + result = ExecIndexOnlyScan((IndexOnlyScanState *) node); + break; + /* BitmapIndexScanState does not yield tuples */ case T_BitmapHeapScanState: @@ -627,6 +637,10 @@ ExecEndNode(PlanState *node) ExecEndIndexScan((IndexScanState *) node); break; + case T_IndexOnlyScanState: + ExecEndIndexOnlyScan((IndexOnlyScanState *) node); + break; + case T_BitmapIndexScanState: ExecEndBitmapIndexScan((BitmapIndexScanState *) node); break; diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c index 80f08d8b92..887e5ce82a 100644 --- a/src/backend/executor/execQual.c +++ b/src/backend/executor/execQual.c @@ -578,14 +578,16 @@ ExecEvalVar(ExprState *exprstate, ExprContext *econtext, /* Get the input slot and attribute number we want */ switch (variable->varno) { - case INNER: /* get the tuple from the inner node */ + case INNER_VAR: /* get the tuple from the inner node */ slot = econtext->ecxt_innertuple; break; - case OUTER: /* get the tuple from the outer node */ + case OUTER_VAR: /* get the tuple from the outer node */ slot = econtext->ecxt_outertuple; break; + /* INDEX_VAR is handled by default case */ + default: /* get the tuple from the relation being * scanned */ slot = econtext->ecxt_scantuple; @@ -761,14 +763,16 @@ ExecEvalScalarVar(ExprState *exprstate, ExprContext *econtext, /* Get the input slot and attribute number we want */ switch (variable->varno) { - case INNER: /* get the tuple from the inner node */ + case INNER_VAR: /* get the tuple from the inner node */ slot = econtext->ecxt_innertuple; break; - case OUTER: /* get the tuple from the outer node */ + case OUTER_VAR: /* get the tuple from the outer node */ slot = econtext->ecxt_outertuple; break; + /* INDEX_VAR is handled by default case */ + default: /* get the tuple from the relation being * scanned */ slot = econtext->ecxt_scantuple; @@ -804,14 +808,16 @@ ExecEvalWholeRowVar(ExprState *exprstate, ExprContext *econtext, /* Get the input slot we want */ switch (variable->varno) { - case INNER: /* get the tuple from the inner node */ + case INNER_VAR: /* get the tuple from the inner node */ slot = econtext->ecxt_innertuple; break; - case OUTER: /* get the tuple from the outer node */ + case OUTER_VAR: /* get the tuple from the outer node */ slot = econtext->ecxt_outertuple; break; + /* INDEX_VAR is handled by default case */ + default: /* get the tuple from the relation being * scanned */ slot = econtext->ecxt_scantuple; @@ -873,14 +879,16 @@ ExecEvalWholeRowSlow(ExprState *exprstate, ExprContext *econtext, /* Get the input slot we want */ switch (variable->varno) { - case INNER: /* get the tuple from the inner node */ + case INNER_VAR: /* get the tuple from the inner node */ slot = econtext->ecxt_innertuple; break; - case OUTER: /* get the tuple from the outer node */ + case OUTER_VAR: /* get the tuple from the outer node */ slot = econtext->ecxt_outertuple; break; + /* INDEX_VAR is handled by default case */ + default: /* get the tuple from the relation being * scanned */ slot = econtext->ecxt_scantuple; diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c index d4ed235856..42acc102c6 100644 --- a/src/backend/executor/execScan.c +++ b/src/backend/executor/execScan.c @@ -246,10 +246,17 @@ void ExecAssignScanProjectionInfo(ScanState *node) { Scan *scan = (Scan *) node->ps.plan; + Index varno; + + /* Vars in an index-only scan's tlist should be INDEX_VAR */ + if (IsA(scan, IndexOnlyScan)) + varno = INDEX_VAR; + else + varno = scan->scanrelid; if (tlist_matches_tupdesc(&node->ps, scan->plan.targetlist, - scan->scanrelid, + varno, node->ss_ScanTupleSlot->tts_tupleDescriptor)) node->ps.ps_ProjInfo = NULL; else diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 4dbf10b8da..65591e2445 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -566,20 +566,22 @@ ExecBuildProjectionInfo(List *targetList, switch (variable->varno) { - case INNER: + case INNER_VAR: varSlotOffsets[numSimpleVars] = offsetof(ExprContext, ecxt_innertuple); if (projInfo->pi_lastInnerVar < attnum) projInfo->pi_lastInnerVar = attnum; break; - case OUTER: + case OUTER_VAR: varSlotOffsets[numSimpleVars] = offsetof(ExprContext, ecxt_outertuple); if (projInfo->pi_lastOuterVar < attnum) projInfo->pi_lastOuterVar = attnum; break; + /* INDEX_VAR is handled by default case */ + default: varSlotOffsets[numSimpleVars] = offsetof(ExprContext, ecxt_scantuple); @@ -628,16 +630,18 @@ get_last_attnums(Node *node, ProjectionInfo *projInfo) switch (variable->varno) { - case INNER: + case INNER_VAR: if (projInfo->pi_lastInnerVar < attnum) projInfo->pi_lastInnerVar = attnum; break; - case OUTER: + case OUTER_VAR: if (projInfo->pi_lastOuterVar < attnum) projInfo->pi_lastOuterVar = attnum; break; + /* INDEX_VAR is handled by default case */ + default: if (projInfo->pi_lastScanVar < attnum) projInfo->pi_lastScanVar = attnum; diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index e769d6d012..0701da40b1 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -806,8 +806,8 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos) { Var *var = (Var *) node; - /* setrefs.c should have set the varno to OUTER */ - Assert(var->varno == OUTER); + /* setrefs.c should have set the varno to OUTER_VAR */ + Assert(var->varno == OUTER_VAR); Assert(var->varlevelsup == 0); *colnos = bms_add_member(*colnos, var->varattno); return false; diff --git a/src/backend/executor/nodeBitmapIndexscan.c b/src/backend/executor/nodeBitmapIndexscan.c index 8e1df079b3..8cc8315a45 100644 --- a/src/backend/executor/nodeBitmapIndexscan.c +++ b/src/backend/executor/nodeBitmapIndexscan.c @@ -266,7 +266,6 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags) */ ExecIndexBuildScanKeys((PlanState *) indexstate, indexstate->biss_RelationDesc, - node->scan.scanrelid, node->indexqual, false, &indexstate->biss_ScanKeys, diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index e72a71bf51..091aef90e0 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -755,8 +755,8 @@ ExecHashTableInsert(HashJoinTable hashtable, * Compute the hash value for a tuple * * The tuple to be tested must be in either econtext->ecxt_outertuple or - * econtext->ecxt_innertuple. Vars in the hashkeys expressions reference - * either OUTER or INNER. + * econtext->ecxt_innertuple. Vars in the hashkeys expressions should have + * varno either OUTER_VAR or INNER_VAR. * * A TRUE result means the tuple's hash value has been successfully computed * and stored at *hashvalue. A FALSE result means the tuple cannot match diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c new file mode 100644 index 0000000000..487373b497 --- /dev/null +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -0,0 +1,542 @@ +/*------------------------------------------------------------------------- + * + * nodeIndexonlyscan.c + * Routines to support index-only scans + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/executor/nodeIndexonlyscan.c + * + *------------------------------------------------------------------------- + */ +/* + * INTERFACE ROUTINES + * ExecIndexOnlyScan scans an index + * IndexOnlyNext retrieve next tuple + * ExecInitIndexOnlyScan creates and initializes state info. + * ExecReScanIndexOnlyScan rescans the indexed relation. + * ExecEndIndexOnlyScan releases all storage. + * ExecIndexOnlyMarkPos marks scan position. + * ExecIndexOnlyRestrPos restores scan position. + */ +#include "postgres.h" + +#include "access/relscan.h" +#include "access/visibilitymap.h" +#include "catalog/pg_opfamily.h" +#include "catalog/pg_type.h" +#include "executor/execdebug.h" +#include "executor/nodeIndexonlyscan.h" +#include "executor/nodeIndexscan.h" +#include "storage/bufmgr.h" +#include "utils/memutils.h" +#include "utils/rel.h" + + +static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node); +static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, + Relation indexRel); + + +/* ---------------------------------------------------------------- + * IndexOnlyNext + * + * Retrieve a tuple from the IndexOnlyScan node's index. + * ---------------------------------------------------------------- + */ +static TupleTableSlot * +IndexOnlyNext(IndexOnlyScanState *node) +{ + EState *estate; + ExprContext *econtext; + ScanDirection direction; + IndexScanDesc scandesc; + HeapTuple tuple; + TupleTableSlot *slot; + ItemPointer tid; + + /* + * extract necessary information from index scan node + */ + estate = node->ss.ps.state; + direction = estate->es_direction; + /* flip direction if this is an overall backward scan */ + if (ScanDirectionIsBackward(((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir)) + { + if (ScanDirectionIsForward(direction)) + direction = BackwardScanDirection; + else if (ScanDirectionIsBackward(direction)) + direction = ForwardScanDirection; + } + scandesc = node->ioss_ScanDesc; + econtext = node->ss.ps.ps_ExprContext; + slot = node->ss.ss_ScanTupleSlot; + + /* + * OK, now that we have what we need, fetch the next tuple. + */ + while ((tid = index_getnext_tid(scandesc, direction)) != NULL) + { + /* + * We can skip the heap fetch if the TID references a heap page on + * which all tuples are known visible to everybody. In any case, + * we'll use the index tuple not the heap tuple as the data source. + */ + if (!visibilitymap_test(scandesc->heapRelation, + ItemPointerGetBlockNumber(tid), + &node->ioss_VMBuffer)) + { + /* + * Rats, we have to visit the heap to check visibility. + */ + tuple = index_fetch_heap(scandesc); + if (tuple == NULL) + continue; /* no visible tuple, try next index entry */ + + /* + * Only MVCC snapshots are supported here, so there should be no + * need to keep following the HOT chain once a visible entry has + * been found. If we did want to allow that, we'd need to keep + * more state to remember not to call index_getnext_tid next time. + */ + if (scandesc->xs_continue_hot) + elog(ERROR, "non-MVCC snapshots are not supported in index-only scans"); + + /* + * Note: at this point we are holding a pin on the heap page, as + * recorded in scandesc->xs_cbuf. We could release that pin now, + * but it's not clear whether it's a win to do so. The next index + * entry might require a visit to the same heap page. + */ + } + + /* + * Fill the scan tuple slot with data from the index. + */ + StoreIndexTuple(slot, scandesc->xs_itup, scandesc->indexRelation); + + /* + * If the index was lossy, we have to recheck the index quals. + * (Currently, this can never happen, but we should support the case + * for possible future use, eg with GiST indexes.) + */ + if (scandesc->xs_recheck) + { + econtext->ecxt_scantuple = slot; + ResetExprContext(econtext); + if (!ExecQual(node->indexqual, econtext, false)) + { + /* Fails recheck, so drop it and loop back for another */ + InstrCountFiltered2(node, 1); + continue; + } + } + + return slot; + } + + /* + * if we get here it means the index scan failed so we are at the end of + * the scan.. + */ + return ExecClearTuple(slot); +} + +/* + * StoreIndexTuple + * Fill the slot with data from the index tuple. + * + * At some point this might be generally-useful functionality, but + * right now we don't need it elsewhere. + */ +static void +StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, Relation indexRel) +{ + TupleDesc indexDesc = RelationGetDescr(indexRel); + int nindexatts = indexDesc->natts; + Datum *values = slot->tts_values; + bool *isnull = slot->tts_isnull; + int i; + + /* + * Note: we must use the index relation's tupdesc in index_getattr, + * not the slot's tupdesc, because of index_descriptor_hack(). + */ + Assert(slot->tts_tupleDescriptor->natts == nindexatts); + + ExecClearTuple(slot); + for (i = 0; i < nindexatts; i++) + values[i] = index_getattr(itup, i + 1, indexDesc, &isnull[i]); + ExecStoreVirtualTuple(slot); +} + +/* + * index_descriptor_hack -- ugly kluge to make index's tupdesc OK for slot + * + * This is necessary because, alone among btree opclasses, name_ops uses + * a storage type (cstring) different from its input type. The index + * tuple descriptor will show "cstring", which is correct, but we have to + * expose "name" as the slot datatype or ExecEvalVar will whine. If we + * ever want to have any other cases with a different storage type, we ought + * to think of a cleaner solution than this. + */ +static TupleDesc +index_descriptor_hack(Relation indexRel) +{ + TupleDesc tupdesc = RelationGetDescr(indexRel); + int i; + + /* copy so we can scribble on it safely */ + tupdesc = CreateTupleDescCopy(tupdesc); + + for (i = 0; i < tupdesc->natts; i++) + { + if (indexRel->rd_opfamily[i] == NAME_BTREE_FAM_OID && + tupdesc->attrs[i]->atttypid == CSTRINGOID) + { + tupdesc->attrs[i]->atttypid = NAMEOID; + + /* + * We set attlen to match the type OID just in case anything looks + * at it. Note that this is safe only because StoreIndexTuple + * will insert the data as a virtual tuple, and we don't expect + * anything will try to materialize the scan tuple slot. + */ + tupdesc->attrs[i]->attlen = NAMEDATALEN; + } + } + + return tupdesc; +} + +/* + * IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual + * + * This can't really happen, since an index can't supply CTID which would + * be necessary data for any potential EvalPlanQual target relation. If it + * did happen, the EPQ code would pass us the wrong data, namely a heap + * tuple not an index tuple. So throw an error. + */ +static bool +IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot) +{ + elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans"); + return false; /* keep compiler quiet */ +} + +/* ---------------------------------------------------------------- + * ExecIndexOnlyScan(node) + * ---------------------------------------------------------------- + */ +TupleTableSlot * +ExecIndexOnlyScan(IndexOnlyScanState *node) +{ + /* + * If we have runtime keys and they've not already been set up, do it now. + */ + if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady) + ExecReScan((PlanState *) node); + + return ExecScan(&node->ss, + (ExecScanAccessMtd) IndexOnlyNext, + (ExecScanRecheckMtd) IndexOnlyRecheck); +} + +/* ---------------------------------------------------------------- + * ExecReScanIndexOnlyScan(node) + * + * Recalculates the values of any scan keys whose value depends on + * information known at runtime, then rescans the indexed relation. + * + * Updating the scan key was formerly done separately in + * ExecUpdateIndexScanKeys. Integrating it into ReScan makes + * rescans of indices and relations/general streams more uniform. + * ---------------------------------------------------------------- + */ +void +ExecReScanIndexOnlyScan(IndexOnlyScanState *node) +{ + /* + * If we are doing runtime key calculations (ie, any of the index key + * values weren't simple Consts), compute the new key values. But first, + * reset the context so we don't leak memory as each outer tuple is + * scanned. Note this assumes that we will recalculate *all* runtime keys + * on each call. + */ + if (node->ioss_NumRuntimeKeys != 0) + { + ExprContext *econtext = node->ioss_RuntimeContext; + + ResetExprContext(econtext); + ExecIndexEvalRuntimeKeys(econtext, + node->ioss_RuntimeKeys, + node->ioss_NumRuntimeKeys); + } + node->ioss_RuntimeKeysReady = true; + + /* reset index scan */ + index_rescan(node->ioss_ScanDesc, + node->ioss_ScanKeys, node->ioss_NumScanKeys, + node->ioss_OrderByKeys, node->ioss_NumOrderByKeys); + + ExecScanReScan(&node->ss); +} + + +/* ---------------------------------------------------------------- + * ExecEndIndexOnlyScan + * ---------------------------------------------------------------- + */ +void +ExecEndIndexOnlyScan(IndexOnlyScanState *node) +{ + Relation indexRelationDesc; + IndexScanDesc indexScanDesc; + Relation relation; + + /* + * extract information from the node + */ + indexRelationDesc = node->ioss_RelationDesc; + indexScanDesc = node->ioss_ScanDesc; + relation = node->ss.ss_currentRelation; + + /* Release VM buffer pin, if any. */ + if (node->ioss_VMBuffer != InvalidBuffer) + { + ReleaseBuffer(node->ioss_VMBuffer); + node->ioss_VMBuffer = InvalidBuffer; + } + + /* + * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext + */ +#ifdef NOT_USED + ExecFreeExprContext(&node->ss.ps); + if (node->ioss_RuntimeContext) + FreeExprContext(node->ioss_RuntimeContext, true); +#endif + + /* + * clear out tuple table slots + */ + ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); + ExecClearTuple(node->ss.ss_ScanTupleSlot); + + /* + * close the index relation (no-op if we didn't open it) + */ + if (indexScanDesc) + index_endscan(indexScanDesc); + if (indexRelationDesc) + index_close(indexRelationDesc, NoLock); + + /* + * close the heap relation. + */ + ExecCloseScanRelation(relation); +} + +/* ---------------------------------------------------------------- + * ExecIndexOnlyMarkPos + * ---------------------------------------------------------------- + */ +void +ExecIndexOnlyMarkPos(IndexOnlyScanState *node) +{ + index_markpos(node->ioss_ScanDesc); +} + +/* ---------------------------------------------------------------- + * ExecIndexOnlyRestrPos + * ---------------------------------------------------------------- + */ +void +ExecIndexOnlyRestrPos(IndexOnlyScanState *node) +{ + index_restrpos(node->ioss_ScanDesc); +} + +/* ---------------------------------------------------------------- + * ExecInitIndexOnlyScan + * + * Initializes the index scan's state information, creates + * scan keys, and opens the base and index relations. + * + * Note: index scans have 2 sets of state information because + * we have to keep track of the base relation and the + * index relation. + * ---------------------------------------------------------------- + */ +IndexOnlyScanState * +ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) +{ + IndexOnlyScanState *indexstate; + Relation currentRelation; + bool relistarget; + TupleDesc tupDesc; + + /* + * create state structure + */ + indexstate = makeNode(IndexOnlyScanState); + indexstate->ss.ps.plan = (Plan *) node; + indexstate->ss.ps.state = estate; + + /* + * Miscellaneous initialization + * + * create expression context for node + */ + ExecAssignExprContext(estate, &indexstate->ss.ps); + + indexstate->ss.ps.ps_TupFromTlist = false; + + /* + * initialize child expressions + * + * Note: we don't initialize all of the indexorderby expression, only the + * sub-parts corresponding to runtime keys (see below). + */ + indexstate->ss.ps.targetlist = (List *) + ExecInitExpr((Expr *) node->scan.plan.targetlist, + (PlanState *) indexstate); + indexstate->ss.ps.qual = (List *) + ExecInitExpr((Expr *) node->scan.plan.qual, + (PlanState *) indexstate); + indexstate->indexqual = (List *) + ExecInitExpr((Expr *) node->indexqual, + (PlanState *) indexstate); + + /* + * tuple table initialization + */ + ExecInitResultTupleSlot(estate, &indexstate->ss.ps); + ExecInitScanTupleSlot(estate, &indexstate->ss); + + /* + * open the base relation and acquire appropriate lock on it. + */ + currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid); + + indexstate->ss.ss_currentRelation = currentRelation; + indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */ + + /* + * Initialize result tuple type. + */ + ExecAssignResultTypeFromTL(&indexstate->ss.ps); + + /* + * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop + * here. This allows an index-advisor plugin to EXPLAIN a plan containing + * references to nonexistent indexes. + */ + if (eflags & EXEC_FLAG_EXPLAIN_ONLY) + return indexstate; + + /* + * Open the index relation. + * + * If the parent table is one of the target relations of the query, then + * InitPlan already opened and write-locked the index, so we can avoid + * taking another lock here. Otherwise we need a normal reader's lock. + */ + relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid); + indexstate->ioss_RelationDesc = index_open(node->indexid, + relistarget ? NoLock : AccessShareLock); + + /* + * Now we can get the scan tuple's type (which is the index's rowtype, + * not the heap's) and initialize result projection info. + */ + tupDesc = index_descriptor_hack(indexstate->ioss_RelationDesc); + ExecAssignScanType(&indexstate->ss, tupDesc); + ExecAssignScanProjectionInfo(&indexstate->ss); + + /* + * Initialize index-specific scan state + */ + indexstate->ioss_RuntimeKeysReady = false; + indexstate->ioss_RuntimeKeys = NULL; + indexstate->ioss_NumRuntimeKeys = 0; + + /* + * build the index scan keys from the index qualification + */ + ExecIndexBuildScanKeys((PlanState *) indexstate, + indexstate->ioss_RelationDesc, + node->indexqual, + false, + &indexstate->ioss_ScanKeys, + &indexstate->ioss_NumScanKeys, + &indexstate->ioss_RuntimeKeys, + &indexstate->ioss_NumRuntimeKeys, + NULL, /* no ArrayKeys */ + NULL); + + /* + * any ORDER BY exprs have to be turned into scankeys in the same way + */ + ExecIndexBuildScanKeys((PlanState *) indexstate, + indexstate->ioss_RelationDesc, + node->indexorderby, + true, + &indexstate->ioss_OrderByKeys, + &indexstate->ioss_NumOrderByKeys, + &indexstate->ioss_RuntimeKeys, + &indexstate->ioss_NumRuntimeKeys, + NULL, /* no ArrayKeys */ + NULL); + + /* + * If we have runtime keys, we need an ExprContext to evaluate them. The + * node's standard context won't do because we want to reset that context + * for every tuple. So, build another context just like the other one... + * -tgl 7/11/00 + */ + if (indexstate->ioss_NumRuntimeKeys != 0) + { + ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext; + + ExecAssignExprContext(estate, &indexstate->ss.ps); + indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext; + indexstate->ss.ps.ps_ExprContext = stdecontext; + } + else + { + indexstate->ioss_RuntimeContext = NULL; + } + + /* + * Initialize scan descriptor. + */ + indexstate->ioss_ScanDesc = index_beginscan(currentRelation, + indexstate->ioss_RelationDesc, + estate->es_snapshot, + indexstate->ioss_NumScanKeys, + indexstate->ioss_NumOrderByKeys); + + /* Set it up for index-only scan */ + indexstate->ioss_ScanDesc->xs_want_itup = true; + indexstate->ioss_VMBuffer = InvalidBuffer; + + /* + * If no run-time keys to calculate, go ahead and pass the scankeys to the + * index AM. + */ + if (indexstate->ioss_NumRuntimeKeys == 0) + index_rescan(indexstate->ioss_ScanDesc, + indexstate->ioss_ScanKeys, + indexstate->ioss_NumScanKeys, + indexstate->ioss_OrderByKeys, + indexstate->ioss_NumOrderByKeys); + + /* + * all done. + */ + return indexstate; +} diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 56b9855094..6d073bf5fd 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -14,8 +14,8 @@ */ /* * INTERFACE ROUTINES - * ExecIndexScan scans a relation using indices - * ExecIndexNext using index to retrieve next tuple + * ExecIndexScan scans a relation using an index + * IndexNext retrieve next tuple using index * ExecInitIndexScan creates and initializes state info. * ExecReScanIndexScan rescans the indexed relation. * ExecEndIndexScan releases all storage. @@ -26,7 +26,6 @@ #include "access/nbtree.h" #include "access/relscan.h" -#include "access/visibilitymap.h" #include "executor/execdebug.h" #include "executor/nodeIndexscan.h" #include "optimizer/clauses.h" @@ -37,7 +36,6 @@ static TupleTableSlot *IndexNext(IndexScanState *node); -static void IndexStoreHeapTuple(TupleTableSlot *slot, IndexScanDesc scandesc); /* ---------------------------------------------------------------- @@ -56,7 +54,6 @@ IndexNext(IndexScanState *node) IndexScanDesc scandesc; HeapTuple tuple; TupleTableSlot *slot; - ItemPointer tid; /* * extract necessary information from index scan node @@ -76,67 +73,23 @@ IndexNext(IndexScanState *node) slot = node->ss.ss_ScanTupleSlot; /* - * OK, now that we have what we need, fetch the next TID. + * ok, now that we have what we need, fetch the next tuple. */ - while ((tid = index_getnext_tid(scandesc, direction)) != NULL) + while ((tuple = index_getnext(scandesc, direction)) != NULL) { /* - * Attempt index-only scan, if possible. For this, we need to have - * gotten an index tuple from the AM, and we need the TID to reference - * a heap page on which all tuples are known visible to everybody. - * If that's the case, we don't need to visit the heap page for tuple - * visibility testing, and we don't need any column values that are - * not available from the index. - * - * Note: in the index-only path, we are still holding pin on the - * scan's xs_cbuf, ie, the previously visited heap page. It's not - * clear whether it'd be better to release that pin. + * Store the scanned tuple in the scan tuple slot of the scan state. + * Note: we pass 'false' because tuples returned by amgetnext are + * pointers onto disk pages and must not be pfree()'d. */ - if (scandesc->xs_want_itup && - visibilitymap_test(scandesc->heapRelation, - ItemPointerGetBlockNumber(tid), - &node->iss_VMBuffer)) - { - /* - * Convert index tuple to look like a heap tuple, and store the - * results in the scan tuple slot. - */ - IndexStoreHeapTuple(slot, scandesc); - } - else - { - /* Index-only approach not possible, so fetch heap tuple. */ - tuple = index_fetch_heap(scandesc); - - /* Tuple might not be visible. */ - if (tuple == NULL) - continue; - - /* - * Only MVCC snapshots are supported here, so there should be no - * need to keep following the HOT chain once a visible entry has - * been found. If we did want to allow that, we'd need to keep - * more state to remember not to call index_getnext_tid next time. - */ - if (scandesc->xs_continue_hot) - elog(ERROR, "unsupported use of non-MVCC snapshot in executor"); - - /* - * Store the scanned tuple in the scan tuple slot of the scan - * state. - * - * Note: we pass 'false' because tuples returned by amgetnext are - * pointers onto disk pages and must not be pfree()'d. - */ - ExecStoreTuple(tuple, /* tuple to store */ - slot, /* slot to store in */ - scandesc->xs_cbuf, /* buffer containing tuple */ - false); /* don't pfree */ - } + ExecStoreTuple(tuple, /* tuple to store */ + slot, /* slot to store in */ + scandesc->xs_cbuf, /* buffer containing tuple */ + false); /* don't pfree */ /* * If the index was lossy, we have to recheck the index quals using - * the real tuple. + * the fetched tuple. */ if (scandesc->xs_recheck) { @@ -160,53 +113,6 @@ IndexNext(IndexScanState *node) return ExecClearTuple(slot); } -/* - * IndexStoreHeapTuple - * - * When performing an index-only scan, we build a faux heap tuple - * from the index tuple. Columns not present in the index are set to - * NULL, which is OK because we know they won't be referenced. - * - * The faux tuple is built as a virtual tuple that depends on the - * scandesc's xs_itup, so that must remain valid for as long as we - * need the slot contents. - */ -static void -IndexStoreHeapTuple(TupleTableSlot *slot, IndexScanDesc scandesc) -{ - Form_pg_index indexForm = scandesc->indexRelation->rd_index; - TupleDesc indexDesc = RelationGetDescr(scandesc->indexRelation); - int nindexatts = indexDesc->natts; - int nheapatts = slot->tts_tupleDescriptor->natts; - Datum *values = slot->tts_values; - bool *isnull = slot->tts_isnull; - int i; - - /* We must first set the slot to empty, and mark all columns as null */ - ExecClearTuple(slot); - - memset(isnull, true, nheapatts * sizeof(bool)); - - /* Transpose index tuple into heap tuple. */ - for (i = 0; i < nindexatts; i++) - { - int indexatt = indexForm->indkey.values[i]; - - /* Ignore expression columns, as well as system attributes */ - if (indexatt <= 0) - continue; - - Assert(indexatt <= nheapatts); - - values[indexatt - 1] = index_getattr(scandesc->xs_itup, i + 1, - indexDesc, - &isnull[indexatt - 1]); - } - - /* And now we can mark the slot as holding a virtual tuple. */ - ExecStoreVirtualTuple(slot); -} - /* * IndexRecheck -- access method routine to recheck a tuple in EvalPlanQual */ @@ -493,13 +399,6 @@ ExecEndIndexScan(IndexScanState *node) indexScanDesc = node->iss_ScanDesc; relation = node->ss.ss_currentRelation; - /* Release VM buffer pin, if any. */ - if (node->iss_VMBuffer != InvalidBuffer) - { - ReleaseBuffer(node->iss_VMBuffer); - node->iss_VMBuffer = InvalidBuffer; - } - /* * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext */ @@ -659,7 +558,6 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) */ ExecIndexBuildScanKeys((PlanState *) indexstate, indexstate->iss_RelationDesc, - node->scan.scanrelid, node->indexqual, false, &indexstate->iss_ScanKeys, @@ -674,7 +572,6 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) */ ExecIndexBuildScanKeys((PlanState *) indexstate, indexstate->iss_RelationDesc, - node->scan.scanrelid, node->indexorderby, true, &indexstate->iss_OrderByKeys, @@ -712,10 +609,6 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) indexstate->iss_NumScanKeys, indexstate->iss_NumOrderByKeys); - /* Prepare for possible index-only scan */ - indexstate->iss_ScanDesc->xs_want_itup = node->indexonly; - indexstate->iss_VMBuffer = InvalidBuffer; - /* * If no run-time keys to calculate, go ahead and pass the scankeys to the * index AM. @@ -772,7 +665,6 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) * * planstate: executor state node we are working for * index: the index we are building scan keys for - * scanrelid: varno of the index's relation within current query * quals: indexquals (or indexorderbys) expressions * isorderby: true if processing ORDER BY exprs, false if processing quals * *runtimeKeys: ptr to pre-existing IndexRuntimeKeyInfos, or NULL if none @@ -791,7 +683,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) * ScalarArrayOpExpr quals are not supported. */ void -ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, +ExecIndexBuildScanKeys(PlanState *planstate, Relation index, List *quals, bool isorderby, ScanKey *scanKeys, int *numScanKeys, IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys, @@ -865,7 +757,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, Assert(leftop != NULL); if (!(IsA(leftop, Var) && - ((Var *) leftop)->varno == scanrelid)) + ((Var *) leftop)->varno == INDEX_VAR)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; @@ -979,7 +871,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, Assert(leftop != NULL); if (!(IsA(leftop, Var) && - ((Var *) leftop)->varno == scanrelid)) + ((Var *) leftop)->varno == INDEX_VAR)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; @@ -1107,7 +999,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, Assert(leftop != NULL); if (!(IsA(leftop, Var) && - ((Var *) leftop)->varno == scanrelid)) + ((Var *) leftop)->varno == INDEX_VAR)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; @@ -1172,7 +1064,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, Assert(leftop != NULL); if (!(IsA(leftop, Var) && - ((Var *) leftop)->varno == scanrelid)) + ((Var *) leftop)->varno == INDEX_VAR)) elog(ERROR, "NullTest indexqual has wrong key"); varattno = ((Var *) leftop)->varattno; diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c index 49b880d0ca..d6433c7f53 100644 --- a/src/backend/executor/nodeNestloop.c +++ b/src/backend/executor/nodeNestloop.c @@ -147,8 +147,8 @@ ExecNestLoop(NestLoopState *node) ParamExecData *prm; prm = &(econtext->ecxt_param_exec_vals[paramno]); - /* Param value should be an OUTER var */ - Assert(nlp->paramval->varno == OUTER); + /* Param value should be an OUTER_VAR var */ + Assert(nlp->paramval->varno == OUTER_VAR); Assert(nlp->paramval->varattno > 0); prm->value = slot_getattr(outerTupleSlot, nlp->paramval->varattno, diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 5100642dd6..24ac5295f6 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -370,7 +370,31 @@ _copyIndexScan(IndexScan *from) COPY_NODE_FIELD(indexorderby); COPY_NODE_FIELD(indexorderbyorig); COPY_SCALAR_FIELD(indexorderdir); - COPY_SCALAR_FIELD(indexonly); + + return newnode; +} + +/* + * _copyIndexOnlyScan + */ +static IndexOnlyScan * +_copyIndexOnlyScan(IndexOnlyScan *from) +{ + IndexOnlyScan *newnode = makeNode(IndexOnlyScan); + + /* + * copy node superclass fields + */ + CopyScanFields((Scan *) from, (Scan *) newnode); + + /* + * copy remainder of node + */ + COPY_SCALAR_FIELD(indexid); + COPY_NODE_FIELD(indexqual); + COPY_NODE_FIELD(indexorderby); + COPY_NODE_FIELD(indextlist); + COPY_SCALAR_FIELD(indexorderdir); return newnode; } @@ -3871,6 +3895,9 @@ copyObject(void *from) case T_IndexScan: retval = _copyIndexScan(from); break; + case T_IndexOnlyScan: + retval = _copyIndexOnlyScan(from); + break; case T_BitmapIndexScan: retval = _copyBitmapIndexScan(from); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 9f56427774..eba3d6d579 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -447,7 +447,20 @@ _outIndexScan(StringInfo str, IndexScan *node) WRITE_NODE_FIELD(indexorderby); WRITE_NODE_FIELD(indexorderbyorig); WRITE_ENUM_FIELD(indexorderdir, ScanDirection); - WRITE_BOOL_FIELD(indexonly); +} + +static void +_outIndexOnlyScan(StringInfo str, IndexOnlyScan *node) +{ + WRITE_NODE_TYPE("INDEXONLYSCAN"); + + _outScanInfo(str, (Scan *) node); + + WRITE_OID_FIELD(indexid); + WRITE_NODE_FIELD(indexqual); + WRITE_NODE_FIELD(indexorderby); + WRITE_NODE_FIELD(indextlist); + WRITE_ENUM_FIELD(indexorderdir, ScanDirection); } static void @@ -1501,7 +1514,6 @@ _outIndexPath(StringInfo str, IndexPath *node) WRITE_NODE_FIELD(indexorderbys); WRITE_BOOL_FIELD(isjoininner); WRITE_ENUM_FIELD(indexscandir, ScanDirection); - WRITE_BOOL_FIELD(indexonly); WRITE_FLOAT_FIELD(indextotalcost, "%.2f"); WRITE_FLOAT_FIELD(indexselectivity, "%.4f"); WRITE_FLOAT_FIELD(rows, "%.0f"); @@ -1752,8 +1764,9 @@ _outIndexOptInfo(StringInfo str, IndexOptInfo *node) WRITE_FLOAT_FIELD(tuples, "%.0f"); WRITE_INT_FIELD(ncolumns); WRITE_OID_FIELD(relam); - WRITE_NODE_FIELD(indexprs); + /* indexprs is redundant since we print indextlist */ WRITE_NODE_FIELD(indpred); + WRITE_NODE_FIELD(indextlist); WRITE_BOOL_FIELD(predOK); WRITE_BOOL_FIELD(unique); WRITE_BOOL_FIELD(hypothetical); @@ -2707,6 +2720,9 @@ _outNode(StringInfo str, void *obj) case T_IndexScan: _outIndexScan(str, obj); break; + case T_IndexOnlyScan: + _outIndexOnlyScan(str, obj); + break; case T_BitmapIndexScan: _outBitmapIndexScan(str, obj); break; diff --git a/src/backend/nodes/print.c b/src/backend/nodes/print.c index 0b0cf38482..5fe4fd5520 100644 --- a/src/backend/nodes/print.c +++ b/src/backend/nodes/print.c @@ -320,14 +320,18 @@ print_expr(Node *expr, List *rtable) switch (var->varno) { - case INNER: + case INNER_VAR: relname = "INNER"; attname = "?"; break; - case OUTER: + case OUTER_VAR: relname = "OUTER"; attname = "?"; break; + case INDEX_VAR: + relname = "INDEX"; + attname = "?"; + break; default: { RangeTblEntry *rte; diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 7090a7e0c0..9ab146a1f7 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -199,14 +199,15 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel) true, NULL, SAOP_FORBID, ST_ANYSCAN); /* - * Submit all the ones that can form plain IndexScan plans to add_path. (A - * plain IndexPath always represents a plain IndexScan plan; however some - * of the indexes might support only bitmap scans, and those we mustn't - * submit to add_path here.) Also, pick out the ones that might be useful - * as bitmap scans. For that, we must discard indexes that don't support - * bitmap scans, and we also are only interested in paths that have some - * selectivity; we should discard anything that was generated solely for - * ordering purposes. + * Submit all the ones that can form plain IndexScan plans to add_path. + * (A plain IndexPath might represent either a plain IndexScan or an + * IndexOnlyScan, but for our purposes here the distinction does not + * matter. However, some of the indexes might support only bitmap scans, + * and those we mustn't submit to add_path here.) Also, pick out the ones + * that might be useful as bitmap scans. For that, we must discard + * indexes that don't support bitmap scans, and we also are only + * interested in paths that have some selectivity; we should discard + * anything that was generated solely for ordering purposes. */ bitindexpaths = NIL; foreach(l, indexpaths) @@ -1107,11 +1108,9 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index) /* * For the moment, we just ignore index expressions. It might be nice - * to do something with them, later. We also ignore index columns - * that are system columns (such as OID), because the virtual-tuple - * coding used by IndexStoreHeapTuple() can't deal with them. + * to do something with them, later. */ - if (attno <= 0) + if (attno == 0) continue; index_attrs = diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index e5228a81c6..d32fbba237 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -25,7 +25,6 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/tlist.h" -#include "parser/parsetree.h" #include "utils/lsyscache.h" @@ -35,8 +34,6 @@ static PathKey *make_canonical_pathkey(PlannerInfo *root, EquivalenceClass *eclass, Oid opfamily, int strategy, bool nulls_first); static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); -static Var *find_indexkey_var(PlannerInfo *root, RelOptInfo *rel, - AttrNumber varattno); static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey); @@ -504,21 +501,24 @@ build_index_pathkeys(PlannerInfo *root, ScanDirection scandir) { List *retval = NIL; - ListCell *indexprs_item; + ListCell *lc; int i; if (index->sortopfamily == NULL) return NIL; /* non-orderable index */ - indexprs_item = list_head(index->indexprs); - for (i = 0; i < index->ncolumns; i++) + i = 0; + foreach(lc, index->indextlist) { + TargetEntry *indextle = (TargetEntry *) lfirst(lc); + Expr *indexkey; bool reverse_sort; bool nulls_first; - int ikey; - Expr *indexkey; PathKey *cpathkey; + /* We assume we don't need to make a copy of the tlist item */ + indexkey = indextle->expr; + if (ScanDirectionIsBackward(scandir)) { reverse_sort = !index->reverse_sort[i]; @@ -530,21 +530,6 @@ build_index_pathkeys(PlannerInfo *root, nulls_first = index->nulls_first[i]; } - ikey = index->indexkeys[i]; - if (ikey != 0) - { - /* simple index column */ - indexkey = (Expr *) find_indexkey_var(root, index->rel, ikey); - } - else - { - /* expression --- assume we need not copy it */ - if (indexprs_item == NULL) - elog(ERROR, "wrong number of index expressions"); - indexkey = (Expr *) lfirst(indexprs_item); - indexprs_item = lnext(indexprs_item); - } - /* OK, try to make a canonical pathkey for this sort key */ cpathkey = make_pathkey_from_sortinfo(root, indexkey, @@ -568,44 +553,11 @@ build_index_pathkeys(PlannerInfo *root, /* Add to list unless redundant */ if (!pathkey_is_redundant(cpathkey, retval)) retval = lappend(retval, cpathkey); - } - return retval; -} - -/* - * Find or make a Var node for the specified attribute of the rel. - * - * We first look for the var in the rel's target list, because that's - * easy and fast. But the var might not be there (this should normally - * only happen for vars that are used in WHERE restriction clauses, - * but not in join clauses or in the SELECT target list). In that case, - * gin up a Var node the hard way. - */ -static Var * -find_indexkey_var(PlannerInfo *root, RelOptInfo *rel, AttrNumber varattno) -{ - ListCell *temp; - Index relid; - Oid reloid, - vartypeid, - varcollid; - int32 type_mod; - - foreach(temp, rel->reltargetlist) - { - Var *var = (Var *) lfirst(temp); - - if (IsA(var, Var) && - var->varattno == varattno) - return var; + i++; } - relid = rel->relid; - reloid = getrelid(relid, root->parse->rtable); - get_atttypetypmodcoll(reloid, varattno, &vartypeid, &type_mod, &varcollid); - - return makeVar(relid, varattno, vartypeid, type_mod, varcollid, 0); + return retval; } /* diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 36ee7c5648..a76f2c603c 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -53,8 +53,8 @@ static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path static Plan *create_unique_plan(PlannerInfo *root, UniquePath *best_path); static SeqScan *create_seqscan_plan(PlannerInfo *root, Path *best_path, List *tlist, List *scan_clauses); -static IndexScan *create_indexscan_plan(PlannerInfo *root, IndexPath *best_path, - List *tlist, List *scan_clauses); +static Scan *create_indexscan_plan(PlannerInfo *root, IndexPath *best_path, + List *tlist, List *scan_clauses, bool indexonly); static BitmapHeapScan *create_bitmap_scan_plan(PlannerInfo *root, BitmapHeapPath *best_path, List *tlist, List *scan_clauses); @@ -95,7 +95,12 @@ static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid); static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid, Oid indexid, List *indexqual, List *indexqualorig, List *indexorderby, List *indexorderbyorig, - ScanDirection indexscandir, bool indexonly); + ScanDirection indexscandir); +static IndexOnlyScan *make_indexonlyscan(List *qptlist, List *qpqual, + Index scanrelid, Oid indexid, + List *indexqual, List *indexorderby, + List *indextlist, + ScanDirection indexscandir); static BitmapIndexScan *make_bitmap_indexscan(Index scanrelid, Oid indexid, List *indexqual, List *indexqualorig); @@ -206,6 +211,7 @@ create_plan_recurse(PlannerInfo *root, Path *best_path) { case T_SeqScan: case T_IndexScan: + case T_IndexOnlyScan: case T_BitmapHeapScan: case T_TidScan: case T_SubqueryScan: @@ -274,10 +280,18 @@ create_scan_plan(PlannerInfo *root, Path *best_path) */ if (use_physical_tlist(root, rel)) { - tlist = build_physical_tlist(root, rel); - /* if fail because of dropped cols, use regular method */ - if (tlist == NIL) - tlist = build_relation_tlist(rel); + if (best_path->pathtype == T_IndexOnlyScan) + { + /* For index-only scan, the preferred tlist is the index's */ + tlist = copyObject(((IndexPath *) best_path)->indexinfo->indextlist); + } + else + { + tlist = build_physical_tlist(root, rel); + /* if fail because of dropped cols, use regular method */ + if (tlist == NIL) + tlist = build_relation_tlist(rel); + } } else tlist = build_relation_tlist(rel); @@ -302,7 +316,16 @@ create_scan_plan(PlannerInfo *root, Path *best_path) plan = (Plan *) create_indexscan_plan(root, (IndexPath *) best_path, tlist, - scan_clauses); + scan_clauses, + false); + break; + + case T_IndexOnlyScan: + plan = (Plan *) create_indexscan_plan(root, + (IndexPath *) best_path, + tlist, + scan_clauses, + true); break; case T_BitmapHeapScan: @@ -476,6 +499,7 @@ disuse_physical_tlist(Plan *plan, Path *path) { case T_SeqScan: case T_IndexScan: + case T_IndexOnlyScan: case T_BitmapHeapScan: case T_TidScan: case T_SubqueryScan: @@ -1044,16 +1068,23 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, * Returns an indexscan plan for the base relation scanned by 'best_path' * with restriction clauses 'scan_clauses' and targetlist 'tlist'. * + * We use this for both plain IndexScans and IndexOnlyScans, because the + * qual preprocessing work is the same for both. Note that the caller tells + * us which to build --- we don't look at best_path->path.pathtype, because + * create_bitmap_subplan needs to be able to override the prior decision. + * * The indexquals list of the path contains implicitly-ANDed qual conditions. * The list can be empty --- then no index restrictions will be applied during * the scan. */ -static IndexScan * +static Scan * create_indexscan_plan(PlannerInfo *root, IndexPath *best_path, List *tlist, - List *scan_clauses) + List *scan_clauses, + bool indexonly) { + Scan *scan_plan; List *indexquals = best_path->indexquals; List *indexorderbys = best_path->indexorderbys; Index baserelid = best_path->path.parent->relid; @@ -1063,7 +1094,6 @@ create_indexscan_plan(PlannerInfo *root, List *fixed_indexquals; List *fixed_indexorderbys; ListCell *l; - IndexScan *scan_plan; /* it should be a base rel... */ Assert(baserelid > 0); @@ -1077,7 +1107,7 @@ create_indexscan_plan(PlannerInfo *root, /* * The executor needs a copy with the indexkey on the left of each clause - * and with index attr numbers substituted for table ones. + * and with index Vars substituted for table ones. */ fixed_indexquals = fix_indexqual_references(root, best_path, indexquals); @@ -1175,20 +1205,29 @@ create_indexscan_plan(PlannerInfo *root, } /* Finally ready to build the plan node */ - scan_plan = make_indexscan(tlist, - qpqual, - baserelid, - indexoid, - fixed_indexquals, - stripped_indexquals, - fixed_indexorderbys, - indexorderbys, - best_path->indexscandir, - best_path->indexonly); - - copy_path_costsize(&scan_plan->scan.plan, &best_path->path); + if (indexonly) + scan_plan = (Scan *) make_indexonlyscan(tlist, + qpqual, + baserelid, + indexoid, + fixed_indexquals, + fixed_indexorderbys, + best_path->indexinfo->indextlist, + best_path->indexscandir); + else + scan_plan = (Scan *) make_indexscan(tlist, + qpqual, + baserelid, + indexoid, + fixed_indexquals, + stripped_indexquals, + fixed_indexorderbys, + indexorderbys, + best_path->indexscandir); + + copy_path_costsize(&scan_plan->plan, &best_path->path); /* use the indexscan-specific rows estimate, not the parent rel's */ - scan_plan->scan.plan.plan_rows = best_path->rows; + scan_plan->plan.plan_rows = best_path->rows; return scan_plan; } @@ -1440,7 +1479,9 @@ create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual, ListCell *l; /* Use the regular indexscan plan build machinery... */ - iscan = create_indexscan_plan(root, ipath, NIL, NIL); + iscan = (IndexScan *) create_indexscan_plan(root, ipath, + NIL, NIL, false); + Assert(IsA(iscan, IndexScan)); /* then convert to a bitmap indexscan */ plan = (Plan *) make_bitmap_indexscan(iscan->scan.scanrelid, iscan->indexid, @@ -2549,17 +2590,13 @@ fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path, /* * fix_indexqual_operand * Convert an indexqual expression to a Var referencing the index column. + * + * We represent index keys by Var nodes having varno == INDEX_VAR and varattno + * equal to the index's attribute number (index column position). */ static Node * fix_indexqual_operand(Node *node, IndexOptInfo *index) { - /* - * We represent index keys by Var nodes having the varno of the base table - * but varattno equal to the index's attribute number (index column - * position). This is a bit hokey ... would be cleaner to use a - * special-purpose node type that could not be mistaken for a regular Var. - * But it will do for now. - */ Var *result; int pos; ListCell *indexpr_item; @@ -2583,6 +2620,7 @@ fix_indexqual_operand(Node *node, IndexOptInfo *index) if (index->indexkeys[pos] == varatt) { result = (Var *) copyObject(node); + result->varno = INDEX_VAR; result->varattno = pos + 1; return (Node *) result; } @@ -2606,7 +2644,7 @@ fix_indexqual_operand(Node *node, IndexOptInfo *index) if (equal(node, indexkey)) { /* Found a match */ - result = makeVar(index->rel->relid, pos + 1, + result = makeVar(INDEX_VAR, pos + 1, exprType(lfirst(indexpr_item)), -1, exprCollation(lfirst(indexpr_item)), 0); @@ -2842,8 +2880,7 @@ make_indexscan(List *qptlist, List *indexqualorig, List *indexorderby, List *indexorderbyorig, - ScanDirection indexscandir, - bool indexonly) + ScanDirection indexscandir) { IndexScan *node = makeNode(IndexScan); Plan *plan = &node->scan.plan; @@ -2860,7 +2897,34 @@ make_indexscan(List *qptlist, node->indexorderby = indexorderby; node->indexorderbyorig = indexorderbyorig; node->indexorderdir = indexscandir; - node->indexonly = indexonly; + + return node; +} + +static IndexOnlyScan * +make_indexonlyscan(List *qptlist, + List *qpqual, + Index scanrelid, + Oid indexid, + List *indexqual, + List *indexorderby, + List *indextlist, + ScanDirection indexscandir) +{ + IndexOnlyScan *node = makeNode(IndexOnlyScan); + Plan *plan = &node->scan.plan; + + /* cost should be inserted by caller */ + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->indexid = indexid; + node->indexqual = indexqual; + node->indexorderby = indexorderby; + node->indextlist = indextlist; + node->indexorderdir = indexscandir; return node; } diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index d60163379b..493103a1db 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -63,6 +63,7 @@ typedef struct { PlannerInfo *root; indexed_tlist *subplan_itlist; + Index newvarno; int rtoffset; } fix_upper_expr_context; @@ -81,6 +82,9 @@ typedef struct ((List *) fix_scan_expr(root, (Node *) (lst), rtoffset)) static Plan *set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset); +static Plan *set_indexonlyscan_references(PlannerInfo *root, + IndexOnlyScan *plan, + int rtoffset); static Plan *set_subqueryscan_references(PlannerInfo *root, SubqueryScan *plan, int rtoffset); @@ -113,6 +117,7 @@ static Node *fix_join_expr_mutator(Node *node, static Node *fix_upper_expr(PlannerInfo *root, Node *node, indexed_tlist *subplan_itlist, + Index newvarno, int rtoffset); static Node *fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context); @@ -234,6 +239,16 @@ set_plan_references(PlannerInfo *root, Plan *plan) newrte->relid); } + /* + * Check for RT index overflow; it's very unlikely, but if it did happen, + * the executor would get confused by varnos that match the special varno + * values. + */ + if (IS_SPECIAL_VARNO(list_length(glob->finalrtable))) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("too many range table entries"))); + /* * Adjust RT indexes of PlanRowMarks and add to final rowmarks list */ @@ -305,6 +320,13 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) fix_scan_list(root, splan->indexorderbyorig, rtoffset); } break; + case T_IndexOnlyScan: + { + IndexOnlyScan *splan = (IndexOnlyScan *) plan; + + return set_indexonlyscan_references(root, splan, rtoffset); + } + break; case T_BitmapIndexScan: { BitmapIndexScan *splan = (BitmapIndexScan *) plan; @@ -652,6 +674,49 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) return plan; } +/* + * set_indexonlyscan_references + * Do set_plan_references processing on an IndexOnlyScan + * + * This is unlike the handling of a plain IndexScan because we have to + * convert Vars referencing the heap into Vars referencing the index. + * We can use the fix_upper_expr machinery for that, by working from a + * targetlist describing the index columns. + */ +static Plan * +set_indexonlyscan_references(PlannerInfo *root, + IndexOnlyScan *plan, + int rtoffset) +{ + indexed_tlist *index_itlist; + + index_itlist = build_tlist_index(plan->indextlist); + + plan->scan.scanrelid += rtoffset; + plan->scan.plan.targetlist = (List *) + fix_upper_expr(root, + (Node *) plan->scan.plan.targetlist, + index_itlist, + INDEX_VAR, + rtoffset); + plan->scan.plan.qual = (List *) + fix_upper_expr(root, + (Node *) plan->scan.plan.qual, + index_itlist, + INDEX_VAR, + rtoffset); + /* indexqual is already transformed to reference index columns */ + plan->indexqual = fix_scan_list(root, plan->indexqual, rtoffset); + /* indexorderby is already transformed to reference index columns */ + plan->indexorderby = fix_scan_list(root, plan->indexorderby, rtoffset); + /* indextlist must NOT be transformed to reference index columns */ + plan->indextlist = fix_scan_list(root, plan->indextlist, rtoffset); + + pfree(index_itlist); + + return (Plan *) plan; +} + /* * set_subqueryscan_references * Do set_plan_references processing on a SubqueryScan @@ -919,11 +984,13 @@ fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context) Assert(var->varlevelsup == 0); /* - * We should not see any Vars marked INNER or OUTER. + * We should not see any Vars marked INNER_VAR or OUTER_VAR. But an + * indexqual expression could contain INDEX_VAR Vars. */ - Assert(var->varno != INNER); - Assert(var->varno != OUTER); - var->varno += context->rtoffset; + Assert(var->varno != INNER_VAR); + Assert(var->varno != OUTER_VAR); + if (!IS_SPECIAL_VARNO(var->varno)) + var->varno += context->rtoffset; if (var->varnoold > 0) var->varnoold += context->rtoffset; return (Node *) var; @@ -932,9 +999,10 @@ fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context) { CurrentOfExpr *cexpr = (CurrentOfExpr *) copyObject(node); - Assert(cexpr->cvarno != INNER); - Assert(cexpr->cvarno != OUTER); - cexpr->cvarno += context->rtoffset; + Assert(cexpr->cvarno != INNER_VAR); + Assert(cexpr->cvarno != OUTER_VAR); + if (!IS_SPECIAL_VARNO(cexpr->cvarno)) + cexpr->cvarno += context->rtoffset; return (Node *) cexpr; } if (IsA(node, PlaceHolderVar)) @@ -963,9 +1031,9 @@ fix_scan_expr_walker(Node *node, fix_scan_expr_context *context) /* * set_join_references * Modify the target list and quals of a join node to reference its - * subplans, by setting the varnos to OUTER or INNER and setting attno - * values to the result domain number of either the corresponding outer - * or inner join tuple item. Also perform opcode lookup for these + * subplans, by setting the varnos to OUTER_VAR or INNER_VAR and setting + * attno values to the result domain number of either the corresponding + * outer or inner join tuple item. Also perform opcode lookup for these * expressions. and add regclass OIDs to root->glob->relationOids. */ static void @@ -1012,6 +1080,7 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) nlp->paramval = (Var *) fix_upper_expr(root, (Node *) nlp->paramval, outer_itlist, + OUTER_VAR, rtoffset); } } @@ -1083,17 +1152,19 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset) search_indexed_tlist_for_sortgroupref((Node *) tle->expr, tle->ressortgroupref, subplan_itlist, - OUTER); + OUTER_VAR); if (!newexpr) newexpr = fix_upper_expr(root, (Node *) tle->expr, subplan_itlist, + OUTER_VAR, rtoffset); } else newexpr = fix_upper_expr(root, (Node *) tle->expr, subplan_itlist, + OUTER_VAR, rtoffset); tle = flatCopyTargetEntry(tle); tle->expr = (Expr *) newexpr; @@ -1105,6 +1176,7 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset) fix_upper_expr(root, (Node *) plan->qual, subplan_itlist, + OUTER_VAR, rtoffset); pfree(subplan_itlist); @@ -1113,7 +1185,7 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset) /* * set_dummy_tlist_references * Replace the targetlist of an upper-level plan node with a simple - * list of OUTER references to its child. + * list of OUTER_VAR references to its child. * * This is used for plan types like Sort and Append that don't evaluate * their targetlists. Although the executor doesn't care at all what's in @@ -1136,7 +1208,7 @@ set_dummy_tlist_references(Plan *plan, int rtoffset) Var *oldvar = (Var *) tle->expr; Var *newvar; - newvar = makeVar(OUTER, + newvar = makeVar(OUTER_VAR, tle->resno, exprType((Node *) oldvar), exprTypmod((Node *) oldvar), @@ -1382,11 +1454,12 @@ search_indexed_tlist_for_sortgroupref(Node *node, * relation target lists. Also perform opcode lookup and add * regclass OIDs to root->glob->relationOids. * - * This is used in two different scenarios: a normal join clause, where - * all the Vars in the clause *must* be replaced by OUTER or INNER references; - * and a RETURNING clause, which may contain both Vars of the target relation - * and Vars of other relations. In the latter case we want to replace the - * other-relation Vars by OUTER references, while leaving target Vars alone. + * This is used in two different scenarios: a normal join clause, where all + * the Vars in the clause *must* be replaced by OUTER_VAR or INNER_VAR + * references; and a RETURNING clause, which may contain both Vars of the + * target relation and Vars of other relations. In the latter case we want + * to replace the other-relation Vars by OUTER_VAR references, while leaving + * target Vars alone. * * For a normal join, acceptable_rel should be zero so that any failure to * match a Var will be reported as an error. For the RETURNING case, pass @@ -1435,7 +1508,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) /* First look for the var in the input tlists */ newvar = search_indexed_tlist_for_var(var, context->outer_itlist, - OUTER, + OUTER_VAR, context->rtoffset); if (newvar) return (Node *) newvar; @@ -1443,7 +1516,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) { newvar = search_indexed_tlist_for_var(var, context->inner_itlist, - INNER, + INNER_VAR, context->rtoffset); if (newvar) return (Node *) newvar; @@ -1470,7 +1543,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) { newvar = search_indexed_tlist_for_non_var((Node *) phv, context->outer_itlist, - OUTER); + OUTER_VAR); if (newvar) return (Node *) newvar; } @@ -1478,7 +1551,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) { newvar = search_indexed_tlist_for_non_var((Node *) phv, context->inner_itlist, - INNER); + INNER_VAR); if (newvar) return (Node *) newvar; } @@ -1491,7 +1564,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) { newvar = search_indexed_tlist_for_non_var(node, context->outer_itlist, - OUTER); + OUTER_VAR); if (newvar) return (Node *) newvar; } @@ -1499,7 +1572,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) { newvar = search_indexed_tlist_for_non_var(node, context->inner_itlist, - INNER); + INNER_VAR); if (newvar) return (Node *) newvar; } @@ -1516,7 +1589,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) * root->glob->relationOids. * * This is used to fix up target and qual expressions of non-join upper-level - * plan nodes. + * plan nodes, as well as index-only scan nodes. * * An error is raised if no matching var can be found in the subplan tlist * --- so this routine should only be applied to nodes whose subplans' @@ -1529,23 +1602,26 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) * subplan tlist is just a flattened list of Vars.) * * 'node': the tree to be fixed (a target item or qual) - * 'subplan_itlist': indexed target list for subplan + * 'subplan_itlist': indexed target list for subplan (or index) + * 'newvarno': varno to use for Vars referencing tlist elements * 'rtoffset': how much to increment varnoold by * * The resulting tree is a copy of the original in which all Var nodes have - * varno = OUTER, varattno = resno of corresponding subplan target. + * varno = newvarno, varattno = resno of corresponding targetlist element. * The original tree is not modified. */ static Node * fix_upper_expr(PlannerInfo *root, Node *node, indexed_tlist *subplan_itlist, + Index newvarno, int rtoffset) { fix_upper_expr_context context; context.root = root; context.subplan_itlist = subplan_itlist; + context.newvarno = newvarno; context.rtoffset = rtoffset; return fix_upper_expr_mutator(node, &context); } @@ -1563,7 +1639,7 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context) newvar = search_indexed_tlist_for_var(var, context->subplan_itlist, - OUTER, + context->newvarno, context->rtoffset); if (!newvar) elog(ERROR, "variable not found in subplan target list"); @@ -1578,7 +1654,7 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context) { newvar = search_indexed_tlist_for_non_var((Node *) phv, context->subplan_itlist, - OUTER); + context->newvarno); if (newvar) return (Node *) newvar; } @@ -1590,7 +1666,7 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context) { newvar = search_indexed_tlist_for_non_var(node, context->subplan_itlist, - OUTER); + context->newvarno); if (newvar) return (Node *) newvar; } @@ -1610,7 +1686,7 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context) * table should be left alone, however (the executor will evaluate them * using the actual heap tuple, after firing triggers if any). In the * adjusted RETURNING list, result-table Vars will still have their - * original varno, but Vars for other rels will have varno OUTER. + * original varno, but Vars for other rels will have varno OUTER_VAR. * * We also must perform opcode lookup and add regclass OIDs to * root->glob->relationOids. diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 2e308c625a..c4046ca534 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -1974,6 +1974,18 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, context.paramids = bms_add_members(context.paramids, scan_params); break; + case T_IndexOnlyScan: + finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexqual, + &context); + finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexorderby, + &context); + + /* + * we need not look at indextlist, since it cannot contain Params. + */ + context.paramids = bms_add_members(context.paramids, scan_params); + break; + case T_BitmapIndexScan: finalize_primnode((Node *) ((BitmapIndexScan *) plan)->indexqual, &context); diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 8ed55a3d0e..6aa34412de 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -452,7 +452,7 @@ create_index_path(PlannerInfo *root, indexscandir = NoMovementScanDirection; } - pathnode->path.pathtype = T_IndexScan; + pathnode->path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan; pathnode->path.parent = rel; pathnode->path.pathkeys = pathkeys; @@ -470,7 +470,6 @@ create_index_path(PlannerInfo *root, pathnode->isjoininner = (outer_rel != NULL); pathnode->indexscandir = indexscandir; - pathnode->indexonly = indexonly; if (outer_rel != NULL) { diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 742e7a880a..0b3675f146 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -22,6 +22,7 @@ #include "access/sysattr.h" #include "access/transam.h" #include "catalog/catalog.h" +#include "catalog/heap.h" #include "miscadmin.h" #include "nodes/makefuncs.h" #include "optimizer/clauses.h" @@ -49,6 +50,8 @@ static int32 get_rel_data_width(Relation rel, int32 *attr_widths); static List *get_relation_constraints(PlannerInfo *root, Oid relationObjectId, RelOptInfo *rel, bool include_notnull); +static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index, + Relation heapRelation); /* @@ -314,6 +317,10 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, ChangeVarNodes((Node *) info->indexprs, 1, varno, 0); if (info->indpred && varno != 1) ChangeVarNodes((Node *) info->indpred, 1, varno, 0); + + /* Build targetlist using the completed indexprs data */ + info->indextlist = build_index_tlist(root, info, relation); + info->predOK = false; /* set later in indxpath.c */ info->unique = index->indisunique; info->hypothetical = false; @@ -900,6 +907,70 @@ build_physical_tlist(PlannerInfo *root, RelOptInfo *rel) return tlist; } +/* + * build_index_tlist + * + * Build a targetlist representing the columns of the specified index. + * Each column is represented by a Var for the corresponding base-relation + * column, or an expression in base-relation Vars, as appropriate. + * + * There are never any dropped columns in indexes, so unlike + * build_physical_tlist, we need no failure case. + */ +static List * +build_index_tlist(PlannerInfo *root, IndexOptInfo *index, + Relation heapRelation) +{ + List *tlist = NIL; + Index varno = index->rel->relid; + ListCell *indexpr_item; + int i; + + indexpr_item = list_head(index->indexprs); + for (i = 0; i < index->ncolumns; i++) + { + int indexkey = index->indexkeys[i]; + Expr *indexvar; + + if (indexkey != 0) + { + /* simple column */ + Form_pg_attribute att_tup; + + if (indexkey < 0) + att_tup = SystemAttributeDefinition(indexkey, + heapRelation->rd_rel->relhasoids); + else + att_tup = heapRelation->rd_att->attrs[indexkey - 1]; + + indexvar = (Expr *) makeVar(varno, + indexkey, + att_tup->atttypid, + att_tup->atttypmod, + att_tup->attcollation, + 0); + } + else + { + /* expression column */ + if (indexpr_item == NULL) + elog(ERROR, "wrong number of index expressions"); + indexvar = (Expr *) lfirst(indexpr_item); + indexpr_item = lnext(indexpr_item); + } + + tlist = lappend(tlist, + makeTargetEntry(indexvar, + i + 1, + NULL, + false)); + } + if (indexpr_item != NULL) + elog(ERROR, "wrong number of index expressions"); + + return tlist; +} + /* * restriction_selectivity * diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index c112a9cc16..75923a6f2e 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -107,9 +107,11 @@ typedef struct * deparse_namespace list (since a plan tree never contains Vars with * varlevelsup > 0). We store the PlanState node that is the immediate * parent of the expression to be deparsed, as well as a list of that - * PlanState's ancestors. In addition, we store the outer and inner - * subplan nodes, whose targetlists are used to resolve OUTER and INNER Vars. - * (Note: these could be derived on-the-fly from the planstate instead.) + * PlanState's ancestors. In addition, we store its outer and inner subplan + * state nodes, as well as their plan nodes' targetlists, and the indextlist + * if the current PlanState is an IndexOnlyScanState. (These fields could + * be derived on-the-fly from the current PlanState, but it seems notationally + * clearer to set them up as separate fields.) */ typedef struct { @@ -118,10 +120,11 @@ typedef struct /* Remaining fields are used only when deparsing a Plan tree: */ PlanState *planstate; /* immediate parent of current expression */ List *ancestors; /* ancestors of planstate */ - PlanState *outer_planstate; /* OUTER subplan state, or NULL if none */ - PlanState *inner_planstate; /* INNER subplan state, or NULL if none */ - Plan *outer_plan; /* OUTER subplan, or NULL if none */ - Plan *inner_plan; /* INNER subplan, or NULL if none */ + PlanState *outer_planstate; /* outer subplan state, or NULL if none */ + PlanState *inner_planstate; /* inner subplan state, or NULL if none */ + List *outer_tlist; /* referent for OUTER_VAR Vars */ + List *inner_tlist; /* referent for INNER_VAR Vars */ + List *index_tlist; /* referent for INDEX_VAR Vars */ } deparse_namespace; @@ -2162,9 +2165,14 @@ deparse_context_for(const char *aliasname, Oid relid) * deparse_context_for_planstate - Build deparse context for a plan * * When deparsing an expression in a Plan tree, we might have to resolve - * OUTER or INNER references. To do this, the caller must provide the - * parent PlanState node. Then OUTER and INNER references can be resolved - * by drilling down into the left and right child plans. + * OUTER_VAR, INNER_VAR, or INDEX_VAR references. To do this, the caller must + * provide the parent PlanState node. Then OUTER_VAR and INNER_VAR references + * can be resolved by drilling down into the left and right child plans. + * Similarly, INDEX_VAR references can be resolved by reference to the + * indextlist given in the parent IndexOnlyScan node. (Note that we don't + * currently support deparsing of indexquals in regular IndexScan or + * BitmapIndexScan nodes; for those, we can only deparse the indexqualorig + * fields, which won't contain INDEX_VAR Vars.) * * Note: planstate really ought to be declared as "PlanState *", but we use * "Node *" to avoid having to include execnodes.h in builtins.h. @@ -2175,7 +2183,7 @@ deparse_context_for(const char *aliasname, Oid relid) * * The plan's rangetable list must also be passed. We actually prefer to use * the rangetable to resolve simple Vars, but the plan inputs are necessary - * for Vars that reference expressions computed in subplan target lists. + * for Vars with special varnos. */ List * deparse_context_for_planstate(Node *planstate, List *ancestors, @@ -2201,10 +2209,11 @@ deparse_context_for_planstate(Node *planstate, List *ancestors, * set_deparse_planstate: set up deparse_namespace to parse subexpressions * of a given PlanState node * - * This sets the planstate, outer_planstate, inner_planstate, outer_plan, and - * inner_plan fields. Caller is responsible for adjusting the ancestors list - * if necessary. Note that the rtable and ctes fields do not need to change - * when shifting attention to different plan nodes in a single plan tree. + * This sets the planstate, outer_planstate, inner_planstate, outer_tlist, + * inner_tlist, and index_tlist fields. Caller is responsible for adjusting + * the ancestors list if necessary. Note that the rtable and ctes fields do + * not need to change when shifting attention to different plan nodes in a + * single plan tree. */ static void set_deparse_planstate(deparse_namespace *dpns, PlanState *ps) @@ -2229,9 +2238,9 @@ set_deparse_planstate(deparse_namespace *dpns, PlanState *ps) dpns->outer_planstate = outerPlanState(ps); if (dpns->outer_planstate) - dpns->outer_plan = dpns->outer_planstate->plan; + dpns->outer_tlist = dpns->outer_planstate->plan->targetlist; else - dpns->outer_plan = NULL; + dpns->outer_tlist = NIL; /* * For a SubqueryScan, pretend the subplan is INNER referent. (We don't @@ -2246,18 +2255,25 @@ set_deparse_planstate(deparse_namespace *dpns, PlanState *ps) dpns->inner_planstate = innerPlanState(ps); if (dpns->inner_planstate) - dpns->inner_plan = dpns->inner_planstate->plan; + dpns->inner_tlist = dpns->inner_planstate->plan->targetlist; else - dpns->inner_plan = NULL; + dpns->inner_tlist = NIL; + + /* index_tlist is set only if it's an IndexOnlyScan */ + if (IsA(ps->plan, IndexOnlyScan)) + dpns->index_tlist = ((IndexOnlyScan *) ps->plan)->indextlist; + else + dpns->index_tlist = NIL; } /* * push_child_plan: temporarily transfer deparsing attention to a child plan * - * When expanding an OUTER or INNER reference, we must adjust the deparse - * context in case the referenced expression itself uses OUTER/INNER. We - * modify the top stack entry in-place to avoid affecting levelsup issues - * (although in a Plan tree there really shouldn't be any). + * When expanding an OUTER_VAR or INNER_VAR reference, we must adjust the + * deparse context in case the referenced expression itself uses + * OUTER_VAR/INNER_VAR. We modify the top stack entry in-place to avoid + * affecting levelsup issues (although in a Plan tree there really shouldn't + * be any). * * Caller must provide a local deparse_namespace variable to save the * previous state for pop_child_plan. @@ -2271,10 +2287,11 @@ push_child_plan(deparse_namespace *dpns, PlanState *ps, /* * Currently we don't bother to adjust the ancestors list, because an - * OUTER or INNER reference really shouldn't contain any Params that would - * be set by the parent node itself. If we did want to adjust it, - * lcons'ing dpns->planstate onto dpns->ancestors would be the appropriate - * thing --- and pop_child_plan would need to undo the change to the list. + * OUTER_VAR or INNER_VAR reference really shouldn't contain any Params + * that would be set by the parent node itself. If we did want to adjust + * the list, lcons'ing dpns->planstate onto dpns->ancestors would be the + * appropriate thing --- and pop_child_plan would need to undo the change + * to the list. */ /* Set attention on selected child */ @@ -2298,7 +2315,7 @@ pop_child_plan(deparse_namespace *dpns, deparse_namespace *save_dpns) * When expanding a Param reference, we must adjust the deparse context * to match the plan node that contains the expression being printed; * otherwise we'd fail if that expression itself contains a Param or - * OUTER/INNER variables. + * OUTER_VAR/INNER_VAR/INDEX_VAR variable. * * The target ancestor is conveniently identified by the ListCell holding it * in dpns->ancestors. @@ -3716,22 +3733,22 @@ get_variable(Var *var, int levelsup, bool showstar, deparse_context *context) /* * Try to find the relevant RTE in this rtable. In a plan tree, it's - * likely that varno is OUTER or INNER, in which case we must dig down - * into the subplans. + * likely that varno is OUTER_VAR or INNER_VAR, in which case we must dig + * down into the subplans, or INDEX_VAR, which is resolved similarly. */ if (var->varno >= 1 && var->varno <= list_length(dpns->rtable)) { rte = rt_fetch(var->varno, dpns->rtable); attnum = var->varattno; } - else if (var->varno == OUTER && dpns->outer_plan) + else if (var->varno == OUTER_VAR && dpns->outer_tlist) { TargetEntry *tle; deparse_namespace save_dpns; - tle = get_tle_by_resno(dpns->outer_plan->targetlist, var->varattno); + tle = get_tle_by_resno(dpns->outer_tlist, var->varattno); if (!tle) - elog(ERROR, "bogus varattno for OUTER var: %d", var->varattno); + elog(ERROR, "bogus varattno for OUTER_VAR var: %d", var->varattno); Assert(netlevelsup == 0); push_child_plan(dpns, dpns->outer_planstate, &save_dpns); @@ -3749,14 +3766,14 @@ get_variable(Var *var, int levelsup, bool showstar, deparse_context *context) pop_child_plan(dpns, &save_dpns); return NULL; } - else if (var->varno == INNER && dpns->inner_plan) + else if (var->varno == INNER_VAR && dpns->inner_tlist) { TargetEntry *tle; deparse_namespace save_dpns; - tle = get_tle_by_resno(dpns->inner_plan->targetlist, var->varattno); + tle = get_tle_by_resno(dpns->inner_tlist, var->varattno); if (!tle) - elog(ERROR, "bogus varattno for INNER var: %d", var->varattno); + elog(ERROR, "bogus varattno for INNER_VAR var: %d", var->varattno); Assert(netlevelsup == 0); push_child_plan(dpns, dpns->inner_planstate, &save_dpns); @@ -3774,6 +3791,28 @@ get_variable(Var *var, int levelsup, bool showstar, deparse_context *context) pop_child_plan(dpns, &save_dpns); return NULL; } + else if (var->varno == INDEX_VAR && dpns->index_tlist) + { + TargetEntry *tle; + + tle = get_tle_by_resno(dpns->index_tlist, var->varattno); + if (!tle) + elog(ERROR, "bogus varattno for INDEX_VAR var: %d", var->varattno); + + Assert(netlevelsup == 0); + + /* + * Force parentheses because our caller probably assumed a Var is a + * simple expression. + */ + if (!IsA(tle->expr, Var)) + appendStringInfoChar(buf, '('); + get_rule_expr((Node *) tle->expr, context, true); + if (!IsA(tle->expr, Var)) + appendStringInfoChar(buf, ')'); + + return NULL; + } else { elog(ERROR, "bogus varno: %d", var->varno); @@ -3789,16 +3828,16 @@ get_variable(Var *var, int levelsup, bool showstar, deparse_context *context) * no alias. So in that case, drill down to the subplan and print the * contents of the referenced tlist item. This works because in a plan * tree, such Vars can only occur in a SubqueryScan or CteScan node, and - * we'll have set dpns->inner_plan to reference the child plan node. + * we'll have set dpns->inner_planstate to reference the child plan node. */ if ((rte->rtekind == RTE_SUBQUERY || rte->rtekind == RTE_CTE) && attnum > list_length(rte->eref->colnames) && - dpns->inner_plan) + dpns->inner_planstate) { TargetEntry *tle; deparse_namespace save_dpns; - tle = get_tle_by_resno(dpns->inner_plan->targetlist, var->varattno); + tle = get_tle_by_resno(dpns->inner_tlist, var->varattno); if (!tle) elog(ERROR, "bogus varattno for subquery var: %d", var->varattno); @@ -3984,23 +4023,23 @@ get_name_for_var_field(Var *var, int fieldno, /* * Try to find the relevant RTE in this rtable. In a plan tree, it's - * likely that varno is OUTER or INNER, in which case we must dig down - * into the subplans. + * likely that varno is OUTER_VAR or INNER_VAR, in which case we must dig + * down into the subplans, or INDEX_VAR, which is resolved similarly. */ if (var->varno >= 1 && var->varno <= list_length(dpns->rtable)) { rte = rt_fetch(var->varno, dpns->rtable); attnum = var->varattno; } - else if (var->varno == OUTER && dpns->outer_plan) + else if (var->varno == OUTER_VAR && dpns->outer_tlist) { TargetEntry *tle; deparse_namespace save_dpns; const char *result; - tle = get_tle_by_resno(dpns->outer_plan->targetlist, var->varattno); + tle = get_tle_by_resno(dpns->outer_tlist, var->varattno); if (!tle) - elog(ERROR, "bogus varattno for OUTER var: %d", var->varattno); + elog(ERROR, "bogus varattno for OUTER_VAR var: %d", var->varattno); Assert(netlevelsup == 0); push_child_plan(dpns, dpns->outer_planstate, &save_dpns); @@ -4011,15 +4050,15 @@ get_name_for_var_field(Var *var, int fieldno, pop_child_plan(dpns, &save_dpns); return result; } - else if (var->varno == INNER && dpns->inner_plan) + else if (var->varno == INNER_VAR && dpns->inner_tlist) { TargetEntry *tle; deparse_namespace save_dpns; const char *result; - tle = get_tle_by_resno(dpns->inner_plan->targetlist, var->varattno); + tle = get_tle_by_resno(dpns->inner_tlist, var->varattno); if (!tle) - elog(ERROR, "bogus varattno for INNER var: %d", var->varattno); + elog(ERROR, "bogus varattno for INNER_VAR var: %d", var->varattno); Assert(netlevelsup == 0); push_child_plan(dpns, dpns->inner_planstate, &save_dpns); @@ -4030,6 +4069,22 @@ get_name_for_var_field(Var *var, int fieldno, pop_child_plan(dpns, &save_dpns); return result; } + else if (var->varno == INDEX_VAR && dpns->index_tlist) + { + TargetEntry *tle; + const char *result; + + tle = get_tle_by_resno(dpns->index_tlist, var->varattno); + if (!tle) + elog(ERROR, "bogus varattno for INDEX_VAR var: %d", var->varattno); + + Assert(netlevelsup == 0); + + result = get_name_for_var_field((Var *) tle->expr, fieldno, + levelsup, context); + + return result; + } else { elog(ERROR, "bogus varno: %d", var->varno); @@ -4115,11 +4170,10 @@ get_name_for_var_field(Var *var, int fieldno, deparse_namespace save_dpns; const char *result; - if (!dpns->inner_plan) + if (!dpns->inner_planstate) elog(ERROR, "failed to find plan for subquery %s", rte->eref->aliasname); - tle = get_tle_by_resno(dpns->inner_plan->targetlist, - attnum); + tle = get_tle_by_resno(dpns->inner_tlist, attnum); if (!tle) elog(ERROR, "bogus varattno for subquery var: %d", attnum); @@ -4232,11 +4286,10 @@ get_name_for_var_field(Var *var, int fieldno, deparse_namespace save_dpns; const char *result; - if (!dpns->inner_plan) + if (!dpns->inner_planstate) elog(ERROR, "failed to find plan for CTE %s", rte->eref->aliasname); - tle = get_tle_by_resno(dpns->inner_plan->targetlist, - attnum); + tle = get_tle_by_resno(dpns->inner_tlist, attnum); if (!tle) elog(ERROR, "bogus varattno for subquery var: %d", attnum); diff --git a/src/backend/utils/adt/tid.c b/src/backend/utils/adt/tid.c index 69e89b82c9..b4ac9357fb 100644 --- a/src/backend/utils/adt/tid.c +++ b/src/backend/utils/adt/tid.c @@ -306,7 +306,7 @@ currtid_for_view(Relation viewrel, ItemPointer tid) Var *var = (Var *) tle->expr; RangeTblEntry *rte; - if (var->varno > 0 && var->varno < INNER && + if (!IS_SPECIAL_VARNO(var->varno) && var->varattno == SelfItemPointerAttributeNumber) { rte = rt_fetch(var->varno, query->rtable); diff --git a/src/include/executor/nodeIndexonlyscan.h b/src/include/executor/nodeIndexonlyscan.h new file mode 100644 index 0000000000..1c59cee5a7 --- /dev/null +++ b/src/include/executor/nodeIndexonlyscan.h @@ -0,0 +1,26 @@ +/*------------------------------------------------------------------------- + * + * nodeIndexonlyscan.h + * + * + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/executor/nodeIndexonlyscan.h + * + *------------------------------------------------------------------------- + */ +#ifndef NODEINDEXONLYSCAN_H +#define NODEINDEXONLYSCAN_H + +#include "nodes/execnodes.h" + +extern IndexOnlyScanState *ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags); +extern TupleTableSlot *ExecIndexOnlyScan(IndexOnlyScanState *node); +extern void ExecEndIndexOnlyScan(IndexOnlyScanState *node); +extern void ExecIndexOnlyMarkPos(IndexOnlyScanState *node); +extern void ExecIndexOnlyRestrPos(IndexOnlyScanState *node); +extern void ExecReScanIndexOnlyScan(IndexOnlyScanState *node); + +#endif /* NODEINDEXONLYSCAN_H */ diff --git a/src/include/executor/nodeIndexscan.h b/src/include/executor/nodeIndexscan.h index 481a7df70f..4094031c18 100644 --- a/src/include/executor/nodeIndexscan.h +++ b/src/include/executor/nodeIndexscan.h @@ -23,9 +23,12 @@ extern void ExecIndexMarkPos(IndexScanState *node); extern void ExecIndexRestrPos(IndexScanState *node); extern void ExecReScanIndexScan(IndexScanState *node); -/* routines exported to share code with nodeBitmapIndexscan.c */ +/* + * These routines are exported to share code with nodeIndexonlyscan.c and + * nodeBitmapIndexscan.c + */ extern void ExecIndexBuildScanKeys(PlanState *planstate, Relation index, - Index scanrelid, List *quals, bool isorderby, + List *quals, bool isorderby, ScanKey *scanKeys, int *numScanKeys, IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys, IndexArrayKeyInfo **arrayKeys, int *numArrayKeys); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 3885fa0099..0a89f189d7 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1226,7 +1226,6 @@ typedef struct * RuntimeContext expr context for evaling runtime Skeys * RelationDesc index relation descriptor * ScanDesc index scan descriptor - * VMBuffer buffer in use for visibility map testing, if any * ---------------- */ typedef struct IndexScanState @@ -1243,9 +1242,42 @@ typedef struct IndexScanState ExprContext *iss_RuntimeContext; Relation iss_RelationDesc; IndexScanDesc iss_ScanDesc; - Buffer iss_VMBuffer; } IndexScanState; +/* ---------------- + * IndexOnlyScanState information + * + * indexqual execution state for indexqual expressions + * ScanKeys Skey structures for index quals + * NumScanKeys number of ScanKeys + * OrderByKeys Skey structures for index ordering operators + * NumOrderByKeys number of OrderByKeys + * RuntimeKeys info about Skeys that must be evaluated at runtime + * NumRuntimeKeys number of RuntimeKeys + * RuntimeKeysReady true if runtime Skeys have been computed + * RuntimeContext expr context for evaling runtime Skeys + * RelationDesc index relation descriptor + * ScanDesc index scan descriptor + * VMBuffer buffer in use for visibility map testing, if any + * ---------------- + */ +typedef struct IndexOnlyScanState +{ + ScanState ss; /* its first field is NodeTag */ + List *indexqual; + ScanKey ioss_ScanKeys; + int ioss_NumScanKeys; + ScanKey ioss_OrderByKeys; + int ioss_NumOrderByKeys; + IndexRuntimeKeyInfo *ioss_RuntimeKeys; + int ioss_NumRuntimeKeys; + bool ioss_RuntimeKeysReady; + ExprContext *ioss_RuntimeContext; + Relation ioss_RelationDesc; + IndexScanDesc ioss_ScanDesc; + Buffer ioss_VMBuffer; +} IndexOnlyScanState; + /* ---------------- * BitmapIndexScanState information * diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index ecf62b335b..7aa299485f 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -52,6 +52,7 @@ typedef enum NodeTag T_Scan, T_SeqScan, T_IndexScan, + T_IndexOnlyScan, T_BitmapIndexScan, T_BitmapHeapScan, T_TidScan, @@ -97,6 +98,7 @@ typedef enum NodeTag T_ScanState, T_SeqScanState, T_IndexScanState, + T_IndexOnlyScanState, T_BitmapIndexScanState, T_BitmapHeapScanState, T_TidScanState, diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 60467f5276..ababded845 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -285,11 +285,8 @@ typedef Scan SeqScan; * * indexqual has the same form, but the expressions have been commuted if * necessary to put the indexkeys on the left, and the indexkeys are replaced - * by Var nodes identifying the index columns (varattno is the index column - * position, not the base table's column, even though varno is for the base - * table). This is a bit hokey ... would be cleaner to use a special-purpose - * node type that could not be mistaken for a regular Var. But it will do - * for now. + * by Var nodes identifying the index columns (their varno is INDEX_VAR and + * their varattno is the index column number). * * indexorderbyorig is similarly the original form of any ORDER BY expressions * that are being implemented by the index, while indexorderby is modified to @@ -302,8 +299,7 @@ typedef Scan SeqScan; * (Note these fields are used for amcanorderbyop cases, not amcanorder cases.) * * indexorderdir specifies the scan ordering, for indexscans on amcanorder - * indexes (for other indexes it should be "don't care"). indexonly specifies - * an index-only scan, for indexscans on amcanreturn indexes. + * indexes (for other indexes it should be "don't care"). * ---------------- */ typedef struct IndexScan @@ -315,9 +311,35 @@ typedef struct IndexScan List *indexorderby; /* list of index ORDER BY exprs */ List *indexorderbyorig; /* the same in original form */ ScanDirection indexorderdir; /* forward or backward or don't care */ - bool indexonly; /* attempt to skip heap fetches? */ } IndexScan; +/* ---------------- + * index-only scan node + * + * IndexOnlyScan is very similar to IndexScan, but it specifies an + * index-only scan, in which the data comes from the index not the heap. + * Because of this, *all* Vars in the plan node's targetlist, qual, and + * index expressions reference index columns and have varno = INDEX_VAR. + * Hence we do not need separate indexqualorig and indexorderbyorig lists, + * since their contents would be equivalent to indexqual and indexorderby. + * + * To help EXPLAIN interpret the index Vars for display, we provide + * indextlist, which represents the contents of the index as a targetlist + * with one TLE per index column. Vars appearing in this list reference + * the base table, and this is the only field in the plan node that may + * contain such Vars. + * ---------------- + */ +typedef struct IndexOnlyScan +{ + Scan scan; + Oid indexid; /* OID of index to scan */ + List *indexqual; /* list of index quals (usually OpExprs) */ + List *indexorderby; /* list of index ORDER BY exprs */ + List *indextlist; /* TargetEntry list describing index's cols */ + ScanDirection indexorderdir; /* forward or backward or don't care */ +} IndexOnlyScan; + /* ---------------- * bitmap index scan node * diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index f1e20ef937..cedf022e17 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -118,15 +118,19 @@ typedef struct Expr * Note: during parsing/planning, varnoold/varoattno are always just copies * of varno/varattno. At the tail end of planning, Var nodes appearing in * upper-level plan nodes are reassigned to point to the outputs of their - * subplans; for example, in a join node varno becomes INNER or OUTER and - * varattno becomes the index of the proper element of that subplan's target - * list. But varnoold/varoattno continue to hold the original values. + * subplans; for example, in a join node varno becomes INNER_VAR or OUTER_VAR + * and varattno becomes the index of the proper element of that subplan's + * target list. But varnoold/varoattno continue to hold the original values. * The code doesn't really need varnoold/varoattno, but they are very useful * for debugging and interpreting completed plans, so we keep them around. */ -#define INNER 65000 -#define OUTER 65001 +#define INNER_VAR 65000 /* reference to inner subplan */ +#define OUTER_VAR 65001 /* reference to outer subplan */ +#define INDEX_VAR 65002 /* reference to index column */ +#define IS_SPECIAL_VARNO(varno) ((varno) >= INNER_VAR) + +/* Symbols for the indexes of the special RTE entries in rules */ #define PRS2_OLD_VARNO 1 #define PRS2_NEW_VARNO 2 @@ -134,7 +138,7 @@ typedef struct Var { Expr xpr; Index varno; /* index of this var's relation in the range - * table (could also be INNER or OUTER) */ + * table, or INNER_VAR/OUTER_VAR/INDEX_VAR */ AttrNumber varattno; /* attribute number of this var, or zero for * all */ Oid vartype; /* pg_type OID for the type of this var */ diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index cf48ba433c..45ca52e516 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -449,6 +449,10 @@ typedef struct RelOptInfo * The indexprs and indpred expressions have been run through * prepqual.c and eval_const_expressions() for ease of matching to * WHERE clauses. indpred is in implicit-AND form. + * + * indextlist is a TargetEntry list representing the index columns. + * It provides an equivalent base-relation Var for each simple column, + * and links to the matching indexprs element for each expression column. */ typedef struct IndexOptInfo { @@ -478,6 +482,8 @@ typedef struct IndexOptInfo List *indexprs; /* expressions for non-simple index columns */ List *indpred; /* predicate if a partial index, else NIL */ + List *indextlist; /* targetlist representing index columns */ + bool predOK; /* true if predicate matches query */ bool unique; /* true if a unique index */ bool hypothetical; /* true if index doesn't really exist */ @@ -640,6 +646,9 @@ typedef struct Path /*---------- * IndexPath represents an index scan over a single index. * + * This struct is used for both regular indexscans and index-only scans; + * path.pathtype is T_IndexScan or T_IndexOnlyScan to show which is meant. + * * 'indexinfo' is the index to be scanned. * * 'indexclauses' is a list of index qualification clauses, with implicit @@ -673,14 +682,10 @@ typedef struct Path * NoMovementScanDirection for an indexscan, but the planner wants to * distinguish ordered from unordered indexes for building pathkeys.) * - * 'indexonly' is TRUE for an index-only scan, that is, the index's access - * method has amcanreturn = TRUE and we only need columns available from the - * index. - * * 'indextotalcost' and 'indexselectivity' are saved in the IndexPath so that * we need not recompute them when considering using the same index in a * bitmap index/heap scan (see BitmapHeapPath). The costs of the IndexPath - * itself represent the costs of an IndexScan plan type. + * itself represent the costs of an IndexScan or IndexOnlyScan plan type. * * 'rows' is the estimated result tuple count for the indexscan. This * is the same as path.parent->rows for a simple indexscan, but it is @@ -698,7 +703,6 @@ typedef struct IndexPath List *indexorderbys; bool isjoininner; ScanDirection indexscandir; - bool indexonly; Cost indextotalcost; Selectivity indexselectivity; double rows; /* estimated number of result tuples */ @@ -714,11 +718,12 @@ typedef struct IndexPath * The individual indexscans are represented by IndexPath nodes, and any * logic on top of them is represented by a tree of BitmapAndPath and * BitmapOrPath nodes. Notice that we can use the same IndexPath node both - * to represent a regular IndexScan plan, and as the child of a BitmapHeapPath - * that represents scanning the same index using a BitmapIndexScan. The - * startup_cost and total_cost figures of an IndexPath always represent the - * costs to use it as a regular IndexScan. The costs of a BitmapIndexScan - * can be computed using the IndexPath's indextotalcost and indexselectivity. + * to represent a regular (or index-only) index scan plan, and as the child + * of a BitmapHeapPath that represents scanning the same index using a + * BitmapIndexScan. The startup_cost and total_cost figures of an IndexPath + * always represent the costs to use it as a regular (or index-only) + * IndexScan. The costs of a BitmapIndexScan can be computed using the + * IndexPath's indextotalcost and indexselectivity. * * BitmapHeapPaths can be nestloop inner indexscans. The isjoininner and * rows fields serve the same purpose as for plain IndexPaths.