]> granicus.if.org Git - postgresql/blob - src/backend/executor/nodeIndexonlyscan.c
2f30c55c54a46798c963e020be6ff0764edbe0e4
[postgresql] / src / backend / executor / nodeIndexonlyscan.c
1 /*-------------------------------------------------------------------------
2  *
3  * nodeIndexonlyscan.c
4  *        Routines to support index-only scans
5  *
6  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        src/backend/executor/nodeIndexonlyscan.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  *              ExecIndexOnlyScan                       scans an index
18  *              IndexOnlyNext                           retrieve next tuple
19  *              ExecInitIndexOnlyScan           creates and initializes state info.
20  *              ExecReScanIndexOnlyScan         rescans the indexed relation.
21  *              ExecEndIndexOnlyScan            releases all storage.
22  *              ExecIndexOnlyMarkPos            marks scan position.
23  *              ExecIndexOnlyRestrPos           restores scan position.
24  */
25 #include "postgres.h"
26
27 #include "access/relscan.h"
28 #include "access/visibilitymap.h"
29 #include "executor/execdebug.h"
30 #include "executor/nodeIndexonlyscan.h"
31 #include "executor/nodeIndexscan.h"
32 #include "storage/bufmgr.h"
33 #include "storage/predicate.h"
34 #include "utils/memutils.h"
35 #include "utils/rel.h"
36
37
38 static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node);
39 static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup,
40                                 TupleDesc itupdesc);
41
42
43 /* ----------------------------------------------------------------
44  *              IndexOnlyNext
45  *
46  *              Retrieve a tuple from the IndexOnlyScan node's index.
47  * ----------------------------------------------------------------
48  */
49 static TupleTableSlot *
50 IndexOnlyNext(IndexOnlyScanState *node)
51 {
52         EState     *estate;
53         ExprContext *econtext;
54         ScanDirection direction;
55         IndexScanDesc scandesc;
56         TupleTableSlot *slot;
57         ItemPointer tid;
58
59         /*
60          * extract necessary information from index scan node
61          */
62         estate = node->ss.ps.state;
63         direction = estate->es_direction;
64         /* flip direction if this is an overall backward scan */
65         if (ScanDirectionIsBackward(((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir))
66         {
67                 if (ScanDirectionIsForward(direction))
68                         direction = BackwardScanDirection;
69                 else if (ScanDirectionIsBackward(direction))
70                         direction = ForwardScanDirection;
71         }
72         scandesc = node->ioss_ScanDesc;
73         econtext = node->ss.ps.ps_ExprContext;
74         slot = node->ss.ss_ScanTupleSlot;
75
76         /*
77          * OK, now that we have what we need, fetch the next tuple.
78          */
79         while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
80         {
81                 HeapTuple       tuple = NULL;
82
83                 /*
84                  * We can skip the heap fetch if the TID references a heap page on
85                  * which all tuples are known visible to everybody.  In any case,
86                  * we'll use the index tuple not the heap tuple as the data source.
87                  *
88                  * Note on Memory Ordering Effects: visibilitymap_test does not lock
89                  * the visibility map buffer, and therefore the result we read here
90                  * could be slightly stale.  However, it can't be stale enough to
91                  * matter.      It suffices to show that (1) there is a read barrier
92                  * between the time we read the index TID and the time we test the
93                  * visibility map; and (2) there is a write barrier between the time
94                  * some other concurrent process clears the visibility map bit and the
95                  * time it inserts the index TID.  Since acquiring or releasing a
96                  * LWLock interposes a full barrier, this is easy to show: (1) is
97                  * satisfied by the release of the index buffer content lock after
98                  * reading the TID; and (2) is satisfied by the acquisition of the
99                  * buffer content lock in order to insert the TID.
100                  */
101                 if (!visibilitymap_test(scandesc->heapRelation,
102                                                                 ItemPointerGetBlockNumber(tid),
103                                                                 &node->ioss_VMBuffer))
104                 {
105                         /*
106                          * Rats, we have to visit the heap to check visibility.
107                          */
108                         node->ioss_HeapFetches++;
109                         tuple = index_fetch_heap(scandesc);
110                         if (tuple == NULL)
111                                 continue;               /* no visible tuple, try next index entry */
112
113                         /*
114                          * Only MVCC snapshots are supported here, so there should be no
115                          * need to keep following the HOT chain once a visible entry has
116                          * been found.  If we did want to allow that, we'd need to keep
117                          * more state to remember not to call index_getnext_tid next time.
118                          */
119                         if (scandesc->xs_continue_hot)
120                                 elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
121
122                         /*
123                          * Note: at this point we are holding a pin on the heap page, as
124                          * recorded in scandesc->xs_cbuf.  We could release that pin now,
125                          * but it's not clear whether it's a win to do so.      The next index
126                          * entry might require a visit to the same heap page.
127                          */
128                 }
129
130                 /*
131                  * Fill the scan tuple slot with data from the index.
132                  */
133                 StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc);
134
135                 /*
136                  * If the index was lossy, we have to recheck the index quals.
137                  * (Currently, this can never happen, but we should support the case
138                  * for possible future use, eg with GiST indexes.)
139                  */
140                 if (scandesc->xs_recheck)
141                 {
142                         econtext->ecxt_scantuple = slot;
143                         ResetExprContext(econtext);
144                         if (!ExecQual(node->indexqual, econtext, false))
145                         {
146                                 /* Fails recheck, so drop it and loop back for another */
147                                 InstrCountFiltered2(node, 1);
148                                 continue;
149                         }
150                 }
151
152                 /*
153                  * Predicate locks for index-only scans must be acquired at the page
154                  * level when the heap is not accessed, since tuple-level predicate
155                  * locks need the tuple's xmin value.  If we had to visit the tuple
156                  * anyway, then we already have the tuple-level lock and can skip the
157                  * page lock.
158                  */
159                 if (tuple == NULL)
160                         PredicateLockPage(scandesc->heapRelation,
161                                                           ItemPointerGetBlockNumber(tid),
162                                                           estate->es_snapshot);
163
164                 return slot;
165         }
166
167         /*
168          * if we get here it means the index scan failed so we are at the end of
169          * the scan..
170          */
171         return ExecClearTuple(slot);
172 }
173
174 /*
175  * StoreIndexTuple
176  *              Fill the slot with data from the index tuple.
177  *
178  * At some point this might be generally-useful functionality, but
179  * right now we don't need it elsewhere.
180  */
181 static void
182 StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)
183 {
184         int                     nindexatts = itupdesc->natts;
185         Datum      *values = slot->tts_values;
186         bool       *isnull = slot->tts_isnull;
187         int                     i;
188
189         /*
190          * Note: we must use the tupdesc supplied by the AM in index_getattr, not
191          * the slot's tupdesc, in case the latter has different datatypes (this
192          * happens for btree name_ops in particular).  They'd better have the same
193          * number of columns though, as well as being datatype-compatible which is
194          * something we can't so easily check.
195          */
196         Assert(slot->tts_tupleDescriptor->natts == nindexatts);
197
198         ExecClearTuple(slot);
199         for (i = 0; i < nindexatts; i++)
200                 values[i] = index_getattr(itup, i + 1, itupdesc, &isnull[i]);
201         ExecStoreVirtualTuple(slot);
202 }
203
204 /*
205  * IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual
206  *
207  * This can't really happen, since an index can't supply CTID which would
208  * be necessary data for any potential EvalPlanQual target relation.  If it
209  * did happen, the EPQ code would pass us the wrong data, namely a heap
210  * tuple not an index tuple.  So throw an error.
211  */
212 static bool
213 IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot)
214 {
215         elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans");
216         return false;                           /* keep compiler quiet */
217 }
218
219 /* ----------------------------------------------------------------
220  *              ExecIndexOnlyScan(node)
221  * ----------------------------------------------------------------
222  */
223 TupleTableSlot *
224 ExecIndexOnlyScan(IndexOnlyScanState *node)
225 {
226         /*
227          * If we have runtime keys and they've not already been set up, do it now.
228          */
229         if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady)
230                 ExecReScan((PlanState *) node);
231
232         return ExecScan(&node->ss,
233                                         (ExecScanAccessMtd) IndexOnlyNext,
234                                         (ExecScanRecheckMtd) IndexOnlyRecheck);
235 }
236
237 /* ----------------------------------------------------------------
238  *              ExecReScanIndexOnlyScan(node)
239  *
240  *              Recalculates the values of any scan keys whose value depends on
241  *              information known at runtime, then rescans the indexed relation.
242  *
243  *              Updating the scan key was formerly done separately in
244  *              ExecUpdateIndexScanKeys. Integrating it into ReScan makes
245  *              rescans of indices and relations/general streams more uniform.
246  * ----------------------------------------------------------------
247  */
248 void
249 ExecReScanIndexOnlyScan(IndexOnlyScanState *node)
250 {
251         /*
252          * If we are doing runtime key calculations (ie, any of the index key
253          * values weren't simple Consts), compute the new key values.  But first,
254          * reset the context so we don't leak memory as each outer tuple is
255          * scanned.  Note this assumes that we will recalculate *all* runtime keys
256          * on each call.
257          */
258         if (node->ioss_NumRuntimeKeys != 0)
259         {
260                 ExprContext *econtext = node->ioss_RuntimeContext;
261
262                 ResetExprContext(econtext);
263                 ExecIndexEvalRuntimeKeys(econtext,
264                                                                  node->ioss_RuntimeKeys,
265                                                                  node->ioss_NumRuntimeKeys);
266         }
267         node->ioss_RuntimeKeysReady = true;
268
269         /* reset index scan */
270         index_rescan(node->ioss_ScanDesc,
271                                  node->ioss_ScanKeys, node->ioss_NumScanKeys,
272                                  node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
273
274         ExecScanReScan(&node->ss);
275 }
276
277
278 /* ----------------------------------------------------------------
279  *              ExecEndIndexOnlyScan
280  * ----------------------------------------------------------------
281  */
282 void
283 ExecEndIndexOnlyScan(IndexOnlyScanState *node)
284 {
285         Relation        indexRelationDesc;
286         IndexScanDesc indexScanDesc;
287         Relation        relation;
288
289         /*
290          * extract information from the node
291          */
292         indexRelationDesc = node->ioss_RelationDesc;
293         indexScanDesc = node->ioss_ScanDesc;
294         relation = node->ss.ss_currentRelation;
295
296         /* Release VM buffer pin, if any. */
297         if (node->ioss_VMBuffer != InvalidBuffer)
298         {
299                 ReleaseBuffer(node->ioss_VMBuffer);
300                 node->ioss_VMBuffer = InvalidBuffer;
301         }
302
303         /*
304          * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext
305          */
306 #ifdef NOT_USED
307         ExecFreeExprContext(&node->ss.ps);
308         if (node->ioss_RuntimeContext)
309                 FreeExprContext(node->ioss_RuntimeContext, true);
310 #endif
311
312         /*
313          * clear out tuple table slots
314          */
315         ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
316         ExecClearTuple(node->ss.ss_ScanTupleSlot);
317
318         /*
319          * close the index relation (no-op if we didn't open it)
320          */
321         if (indexScanDesc)
322                 index_endscan(indexScanDesc);
323         if (indexRelationDesc)
324                 index_close(indexRelationDesc, NoLock);
325
326         /*
327          * close the heap relation.
328          */
329         ExecCloseScanRelation(relation);
330 }
331
332 /* ----------------------------------------------------------------
333  *              ExecIndexOnlyMarkPos
334  * ----------------------------------------------------------------
335  */
336 void
337 ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
338 {
339         index_markpos(node->ioss_ScanDesc);
340 }
341
342 /* ----------------------------------------------------------------
343  *              ExecIndexOnlyRestrPos
344  * ----------------------------------------------------------------
345  */
346 void
347 ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
348 {
349         index_restrpos(node->ioss_ScanDesc);
350 }
351
352 /* ----------------------------------------------------------------
353  *              ExecInitIndexOnlyScan
354  *
355  *              Initializes the index scan's state information, creates
356  *              scan keys, and opens the base and index relations.
357  *
358  *              Note: index scans have 2 sets of state information because
359  *                        we have to keep track of the base relation and the
360  *                        index relation.
361  * ----------------------------------------------------------------
362  */
363 IndexOnlyScanState *
364 ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
365 {
366         IndexOnlyScanState *indexstate;
367         Relation        currentRelation;
368         bool            relistarget;
369         TupleDesc       tupDesc;
370
371         /*
372          * create state structure
373          */
374         indexstate = makeNode(IndexOnlyScanState);
375         indexstate->ss.ps.plan = (Plan *) node;
376         indexstate->ss.ps.state = estate;
377         indexstate->ioss_HeapFetches = 0;
378
379         /*
380          * Miscellaneous initialization
381          *
382          * create expression context for node
383          */
384         ExecAssignExprContext(estate, &indexstate->ss.ps);
385
386         indexstate->ss.ps.ps_TupFromTlist = false;
387
388         /*
389          * initialize child expressions
390          *
391          * Note: we don't initialize all of the indexorderby expression, only the
392          * sub-parts corresponding to runtime keys (see below).
393          */
394         indexstate->ss.ps.targetlist = (List *)
395                 ExecInitExpr((Expr *) node->scan.plan.targetlist,
396                                          (PlanState *) indexstate);
397         indexstate->ss.ps.qual = (List *)
398                 ExecInitExpr((Expr *) node->scan.plan.qual,
399                                          (PlanState *) indexstate);
400         indexstate->indexqual = (List *)
401                 ExecInitExpr((Expr *) node->indexqual,
402                                          (PlanState *) indexstate);
403
404         /*
405          * tuple table initialization
406          */
407         ExecInitResultTupleSlot(estate, &indexstate->ss.ps);
408         ExecInitScanTupleSlot(estate, &indexstate->ss);
409
410         /*
411          * open the base relation and acquire appropriate lock on it.
412          */
413         currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
414
415         indexstate->ss.ss_currentRelation = currentRelation;
416         indexstate->ss.ss_currentScanDesc = NULL;       /* no heap scan here */
417
418         /*
419          * Build the scan tuple type using the indextlist generated by the
420          * planner.  We use this, rather than the index's physical tuple
421          * descriptor, because the latter contains storage column types not the
422          * types of the original datums.  (It's the AM's responsibility to return
423          * suitable data anyway.)
424          */
425         tupDesc = ExecTypeFromTL(node->indextlist, false);
426         ExecAssignScanType(&indexstate->ss, tupDesc);
427
428         /*
429          * Initialize result tuple type and projection info.
430          */
431         ExecAssignResultTypeFromTL(&indexstate->ss.ps);
432         ExecAssignScanProjectionInfo(&indexstate->ss);
433
434         /*
435          * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
436          * here.  This allows an index-advisor plugin to EXPLAIN a plan containing
437          * references to nonexistent indexes.
438          */
439         if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
440                 return indexstate;
441
442         /*
443          * Open the index relation.
444          *
445          * If the parent table is one of the target relations of the query, then
446          * InitPlan already opened and write-locked the index, so we can avoid
447          * taking another lock here.  Otherwise we need a normal reader's lock.
448          */
449         relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid);
450         indexstate->ioss_RelationDesc = index_open(node->indexid,
451                                                                          relistarget ? NoLock : AccessShareLock);
452
453         /*
454          * Initialize index-specific scan state
455          */
456         indexstate->ioss_RuntimeKeysReady = false;
457         indexstate->ioss_RuntimeKeys = NULL;
458         indexstate->ioss_NumRuntimeKeys = 0;
459
460         /*
461          * build the index scan keys from the index qualification
462          */
463         ExecIndexBuildScanKeys((PlanState *) indexstate,
464                                                    indexstate->ioss_RelationDesc,
465                                                    node->indexqual,
466                                                    false,
467                                                    &indexstate->ioss_ScanKeys,
468                                                    &indexstate->ioss_NumScanKeys,
469                                                    &indexstate->ioss_RuntimeKeys,
470                                                    &indexstate->ioss_NumRuntimeKeys,
471                                                    NULL,        /* no ArrayKeys */
472                                                    NULL);
473
474         /*
475          * any ORDER BY exprs have to be turned into scankeys in the same way
476          */
477         ExecIndexBuildScanKeys((PlanState *) indexstate,
478                                                    indexstate->ioss_RelationDesc,
479                                                    node->indexorderby,
480                                                    true,
481                                                    &indexstate->ioss_OrderByKeys,
482                                                    &indexstate->ioss_NumOrderByKeys,
483                                                    &indexstate->ioss_RuntimeKeys,
484                                                    &indexstate->ioss_NumRuntimeKeys,
485                                                    NULL,        /* no ArrayKeys */
486                                                    NULL);
487
488         /*
489          * If we have runtime keys, we need an ExprContext to evaluate them. The
490          * node's standard context won't do because we want to reset that context
491          * for every tuple.  So, build another context just like the other one...
492          * -tgl 7/11/00
493          */
494         if (indexstate->ioss_NumRuntimeKeys != 0)
495         {
496                 ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
497
498                 ExecAssignExprContext(estate, &indexstate->ss.ps);
499                 indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
500                 indexstate->ss.ps.ps_ExprContext = stdecontext;
501         }
502         else
503         {
504                 indexstate->ioss_RuntimeContext = NULL;
505         }
506
507         /*
508          * Initialize scan descriptor.
509          */
510         indexstate->ioss_ScanDesc = index_beginscan(currentRelation,
511                                                                                                 indexstate->ioss_RelationDesc,
512                                                                                                 estate->es_snapshot,
513                                                                                                 indexstate->ioss_NumScanKeys,
514                                                                                         indexstate->ioss_NumOrderByKeys);
515
516         /* Set it up for index-only scan */
517         indexstate->ioss_ScanDesc->xs_want_itup = true;
518         indexstate->ioss_VMBuffer = InvalidBuffer;
519
520         /*
521          * If no run-time keys to calculate, go ahead and pass the scankeys to the
522          * index AM.
523          */
524         if (indexstate->ioss_NumRuntimeKeys == 0)
525                 index_rescan(indexstate->ioss_ScanDesc,
526                                          indexstate->ioss_ScanKeys,
527                                          indexstate->ioss_NumScanKeys,
528                                          indexstate->ioss_OrderByKeys,
529                                          indexstate->ioss_NumOrderByKeys);
530
531         /*
532          * all done.
533          */
534         return indexstate;
535 }