1 /*-------------------------------------------------------------------------
4 * Routines to support index-only scans
6 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/executor/nodeIndexonlyscan.c
13 *-------------------------------------------------------------------------
17 * ExecIndexOnlyScan scans an index
18 * IndexOnlyNext retrieve next tuple
19 * ExecInitIndexOnlyScan creates and initializes state info.
20 * ExecReScanIndexOnlyScan rescans the indexed relation.
21 * ExecEndIndexOnlyScan releases all storage.
22 * ExecIndexOnlyMarkPos marks scan position.
23 * ExecIndexOnlyRestrPos restores scan position.
27 #include "access/relscan.h"
28 #include "access/visibilitymap.h"
29 #include "executor/execdebug.h"
30 #include "executor/nodeIndexonlyscan.h"
31 #include "executor/nodeIndexscan.h"
32 #include "storage/bufmgr.h"
33 #include "storage/predicate.h"
34 #include "utils/memutils.h"
35 #include "utils/rel.h"
38 static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node);
39 static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup,
43 /* ----------------------------------------------------------------
46 * Retrieve a tuple from the IndexOnlyScan node's index.
47 * ----------------------------------------------------------------
49 static TupleTableSlot *
50 IndexOnlyNext(IndexOnlyScanState *node)
53 ExprContext *econtext;
54 ScanDirection direction;
55 IndexScanDesc scandesc;
60 * extract necessary information from index scan node
62 estate = node->ss.ps.state;
63 direction = estate->es_direction;
64 /* flip direction if this is an overall backward scan */
65 if (ScanDirectionIsBackward(((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir))
67 if (ScanDirectionIsForward(direction))
68 direction = BackwardScanDirection;
69 else if (ScanDirectionIsBackward(direction))
70 direction = ForwardScanDirection;
72 scandesc = node->ioss_ScanDesc;
73 econtext = node->ss.ps.ps_ExprContext;
74 slot = node->ss.ss_ScanTupleSlot;
77 * OK, now that we have what we need, fetch the next tuple.
79 while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
81 HeapTuple tuple = NULL;
84 * We can skip the heap fetch if the TID references a heap page on
85 * which all tuples are known visible to everybody. In any case,
86 * we'll use the index tuple not the heap tuple as the data source.
88 * Note on Memory Ordering Effects: visibilitymap_test does not lock
89 * the visibility map buffer, and therefore the result we read here
90 * could be slightly stale. However, it can't be stale enough to
91 * matter. It suffices to show that (1) there is a read barrier
92 * between the time we read the index TID and the time we test the
93 * visibility map; and (2) there is a write barrier between the time
94 * some other concurrent process clears the visibility map bit and the
95 * time it inserts the index TID. Since acquiring or releasing a
96 * LWLock interposes a full barrier, this is easy to show: (1) is
97 * satisfied by the release of the index buffer content lock after
98 * reading the TID; and (2) is satisfied by the acquisition of the
99 * buffer content lock in order to insert the TID.
101 if (!visibilitymap_test(scandesc->heapRelation,
102 ItemPointerGetBlockNumber(tid),
103 &node->ioss_VMBuffer))
106 * Rats, we have to visit the heap to check visibility.
108 node->ioss_HeapFetches++;
109 tuple = index_fetch_heap(scandesc);
111 continue; /* no visible tuple, try next index entry */
114 * Only MVCC snapshots are supported here, so there should be no
115 * need to keep following the HOT chain once a visible entry has
116 * been found. If we did want to allow that, we'd need to keep
117 * more state to remember not to call index_getnext_tid next time.
119 if (scandesc->xs_continue_hot)
120 elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
123 * Note: at this point we are holding a pin on the heap page, as
124 * recorded in scandesc->xs_cbuf. We could release that pin now,
125 * but it's not clear whether it's a win to do so. The next index
126 * entry might require a visit to the same heap page.
131 * Fill the scan tuple slot with data from the index.
133 StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc);
136 * If the index was lossy, we have to recheck the index quals.
137 * (Currently, this can never happen, but we should support the case
138 * for possible future use, eg with GiST indexes.)
140 if (scandesc->xs_recheck)
142 econtext->ecxt_scantuple = slot;
143 ResetExprContext(econtext);
144 if (!ExecQual(node->indexqual, econtext, false))
146 /* Fails recheck, so drop it and loop back for another */
147 InstrCountFiltered2(node, 1);
153 * Predicate locks for index-only scans must be acquired at the page
154 * level when the heap is not accessed, since tuple-level predicate
155 * locks need the tuple's xmin value. If we had to visit the tuple
156 * anyway, then we already have the tuple-level lock and can skip the
160 PredicateLockPage(scandesc->heapRelation,
161 ItemPointerGetBlockNumber(tid),
162 estate->es_snapshot);
168 * if we get here it means the index scan failed so we are at the end of
171 return ExecClearTuple(slot);
176 * Fill the slot with data from the index tuple.
178 * At some point this might be generally-useful functionality, but
179 * right now we don't need it elsewhere.
182 StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)
184 int nindexatts = itupdesc->natts;
185 Datum *values = slot->tts_values;
186 bool *isnull = slot->tts_isnull;
190 * Note: we must use the tupdesc supplied by the AM in index_getattr, not
191 * the slot's tupdesc, in case the latter has different datatypes (this
192 * happens for btree name_ops in particular). They'd better have the same
193 * number of columns though, as well as being datatype-compatible which is
194 * something we can't so easily check.
196 Assert(slot->tts_tupleDescriptor->natts == nindexatts);
198 ExecClearTuple(slot);
199 for (i = 0; i < nindexatts; i++)
200 values[i] = index_getattr(itup, i + 1, itupdesc, &isnull[i]);
201 ExecStoreVirtualTuple(slot);
205 * IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual
207 * This can't really happen, since an index can't supply CTID which would
208 * be necessary data for any potential EvalPlanQual target relation. If it
209 * did happen, the EPQ code would pass us the wrong data, namely a heap
210 * tuple not an index tuple. So throw an error.
213 IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot)
215 elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans");
216 return false; /* keep compiler quiet */
219 /* ----------------------------------------------------------------
220 * ExecIndexOnlyScan(node)
221 * ----------------------------------------------------------------
224 ExecIndexOnlyScan(IndexOnlyScanState *node)
227 * If we have runtime keys and they've not already been set up, do it now.
229 if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady)
230 ExecReScan((PlanState *) node);
232 return ExecScan(&node->ss,
233 (ExecScanAccessMtd) IndexOnlyNext,
234 (ExecScanRecheckMtd) IndexOnlyRecheck);
237 /* ----------------------------------------------------------------
238 * ExecReScanIndexOnlyScan(node)
240 * Recalculates the values of any scan keys whose value depends on
241 * information known at runtime, then rescans the indexed relation.
243 * Updating the scan key was formerly done separately in
244 * ExecUpdateIndexScanKeys. Integrating it into ReScan makes
245 * rescans of indices and relations/general streams more uniform.
246 * ----------------------------------------------------------------
249 ExecReScanIndexOnlyScan(IndexOnlyScanState *node)
252 * If we are doing runtime key calculations (ie, any of the index key
253 * values weren't simple Consts), compute the new key values. But first,
254 * reset the context so we don't leak memory as each outer tuple is
255 * scanned. Note this assumes that we will recalculate *all* runtime keys
258 if (node->ioss_NumRuntimeKeys != 0)
260 ExprContext *econtext = node->ioss_RuntimeContext;
262 ResetExprContext(econtext);
263 ExecIndexEvalRuntimeKeys(econtext,
264 node->ioss_RuntimeKeys,
265 node->ioss_NumRuntimeKeys);
267 node->ioss_RuntimeKeysReady = true;
269 /* reset index scan */
270 index_rescan(node->ioss_ScanDesc,
271 node->ioss_ScanKeys, node->ioss_NumScanKeys,
272 node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
274 ExecScanReScan(&node->ss);
278 /* ----------------------------------------------------------------
279 * ExecEndIndexOnlyScan
280 * ----------------------------------------------------------------
283 ExecEndIndexOnlyScan(IndexOnlyScanState *node)
285 Relation indexRelationDesc;
286 IndexScanDesc indexScanDesc;
290 * extract information from the node
292 indexRelationDesc = node->ioss_RelationDesc;
293 indexScanDesc = node->ioss_ScanDesc;
294 relation = node->ss.ss_currentRelation;
296 /* Release VM buffer pin, if any. */
297 if (node->ioss_VMBuffer != InvalidBuffer)
299 ReleaseBuffer(node->ioss_VMBuffer);
300 node->ioss_VMBuffer = InvalidBuffer;
304 * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext
307 ExecFreeExprContext(&node->ss.ps);
308 if (node->ioss_RuntimeContext)
309 FreeExprContext(node->ioss_RuntimeContext, true);
313 * clear out tuple table slots
315 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
316 ExecClearTuple(node->ss.ss_ScanTupleSlot);
319 * close the index relation (no-op if we didn't open it)
322 index_endscan(indexScanDesc);
323 if (indexRelationDesc)
324 index_close(indexRelationDesc, NoLock);
327 * close the heap relation.
329 ExecCloseScanRelation(relation);
332 /* ----------------------------------------------------------------
333 * ExecIndexOnlyMarkPos
334 * ----------------------------------------------------------------
337 ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
339 index_markpos(node->ioss_ScanDesc);
342 /* ----------------------------------------------------------------
343 * ExecIndexOnlyRestrPos
344 * ----------------------------------------------------------------
347 ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
349 index_restrpos(node->ioss_ScanDesc);
352 /* ----------------------------------------------------------------
353 * ExecInitIndexOnlyScan
355 * Initializes the index scan's state information, creates
356 * scan keys, and opens the base and index relations.
358 * Note: index scans have 2 sets of state information because
359 * we have to keep track of the base relation and the
361 * ----------------------------------------------------------------
364 ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
366 IndexOnlyScanState *indexstate;
367 Relation currentRelation;
372 * create state structure
374 indexstate = makeNode(IndexOnlyScanState);
375 indexstate->ss.ps.plan = (Plan *) node;
376 indexstate->ss.ps.state = estate;
377 indexstate->ioss_HeapFetches = 0;
380 * Miscellaneous initialization
382 * create expression context for node
384 ExecAssignExprContext(estate, &indexstate->ss.ps);
386 indexstate->ss.ps.ps_TupFromTlist = false;
389 * initialize child expressions
391 * Note: we don't initialize all of the indexorderby expression, only the
392 * sub-parts corresponding to runtime keys (see below).
394 indexstate->ss.ps.targetlist = (List *)
395 ExecInitExpr((Expr *) node->scan.plan.targetlist,
396 (PlanState *) indexstate);
397 indexstate->ss.ps.qual = (List *)
398 ExecInitExpr((Expr *) node->scan.plan.qual,
399 (PlanState *) indexstate);
400 indexstate->indexqual = (List *)
401 ExecInitExpr((Expr *) node->indexqual,
402 (PlanState *) indexstate);
405 * tuple table initialization
407 ExecInitResultTupleSlot(estate, &indexstate->ss.ps);
408 ExecInitScanTupleSlot(estate, &indexstate->ss);
411 * open the base relation and acquire appropriate lock on it.
413 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
415 indexstate->ss.ss_currentRelation = currentRelation;
416 indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
419 * Build the scan tuple type using the indextlist generated by the
420 * planner. We use this, rather than the index's physical tuple
421 * descriptor, because the latter contains storage column types not the
422 * types of the original datums. (It's the AM's responsibility to return
423 * suitable data anyway.)
425 tupDesc = ExecTypeFromTL(node->indextlist, false);
426 ExecAssignScanType(&indexstate->ss, tupDesc);
429 * Initialize result tuple type and projection info.
431 ExecAssignResultTypeFromTL(&indexstate->ss.ps);
432 ExecAssignScanProjectionInfo(&indexstate->ss);
435 * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
436 * here. This allows an index-advisor plugin to EXPLAIN a plan containing
437 * references to nonexistent indexes.
439 if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
443 * Open the index relation.
445 * If the parent table is one of the target relations of the query, then
446 * InitPlan already opened and write-locked the index, so we can avoid
447 * taking another lock here. Otherwise we need a normal reader's lock.
449 relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid);
450 indexstate->ioss_RelationDesc = index_open(node->indexid,
451 relistarget ? NoLock : AccessShareLock);
454 * Initialize index-specific scan state
456 indexstate->ioss_RuntimeKeysReady = false;
457 indexstate->ioss_RuntimeKeys = NULL;
458 indexstate->ioss_NumRuntimeKeys = 0;
461 * build the index scan keys from the index qualification
463 ExecIndexBuildScanKeys((PlanState *) indexstate,
464 indexstate->ioss_RelationDesc,
467 &indexstate->ioss_ScanKeys,
468 &indexstate->ioss_NumScanKeys,
469 &indexstate->ioss_RuntimeKeys,
470 &indexstate->ioss_NumRuntimeKeys,
471 NULL, /* no ArrayKeys */
475 * any ORDER BY exprs have to be turned into scankeys in the same way
477 ExecIndexBuildScanKeys((PlanState *) indexstate,
478 indexstate->ioss_RelationDesc,
481 &indexstate->ioss_OrderByKeys,
482 &indexstate->ioss_NumOrderByKeys,
483 &indexstate->ioss_RuntimeKeys,
484 &indexstate->ioss_NumRuntimeKeys,
485 NULL, /* no ArrayKeys */
489 * If we have runtime keys, we need an ExprContext to evaluate them. The
490 * node's standard context won't do because we want to reset that context
491 * for every tuple. So, build another context just like the other one...
494 if (indexstate->ioss_NumRuntimeKeys != 0)
496 ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
498 ExecAssignExprContext(estate, &indexstate->ss.ps);
499 indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
500 indexstate->ss.ps.ps_ExprContext = stdecontext;
504 indexstate->ioss_RuntimeContext = NULL;
508 * Initialize scan descriptor.
510 indexstate->ioss_ScanDesc = index_beginscan(currentRelation,
511 indexstate->ioss_RelationDesc,
513 indexstate->ioss_NumScanKeys,
514 indexstate->ioss_NumOrderByKeys);
516 /* Set it up for index-only scan */
517 indexstate->ioss_ScanDesc->xs_want_itup = true;
518 indexstate->ioss_VMBuffer = InvalidBuffer;
521 * If no run-time keys to calculate, go ahead and pass the scankeys to the
524 if (indexstate->ioss_NumRuntimeKeys == 0)
525 index_rescan(indexstate->ioss_ScanDesc,
526 indexstate->ioss_ScanKeys,
527 indexstate->ioss_NumScanKeys,
528 indexstate->ioss_OrderByKeys,
529 indexstate->ioss_NumOrderByKeys);