]> granicus.if.org Git - postgresql/blob - src/backend/executor/nodeBitmapHeapscan.c
pgindent run for 8.3.
[postgresql] / src / backend / executor / nodeBitmapHeapscan.c
1 /*-------------------------------------------------------------------------
2  *
3  * nodeBitmapHeapscan.c
4  *        Routines to support bitmapped scans of relations
5  *
6  * NOTE: it is critical that this plan type only be used with MVCC-compliant
7  * snapshots (ie, regular snapshots, not SnapshotNow or one of the other
8  * special snapshots).  The reason is that since index and heap scans are
9  * decoupled, there can be no assurance that the index tuple prompting a
10  * visit to a particular heap TID still exists when the visit is made.
11  * Therefore the tuple might not exist anymore either (which is OK because
12  * heap_fetch will cope) --- but worse, the tuple slot could have been
13  * re-used for a newer tuple.  With an MVCC snapshot the newer tuple is
14  * certain to fail the time qual and so it will not be mistakenly returned.
15  * With SnapshotNow we might return a tuple that doesn't meet the required
16  * index qual conditions.
17  *
18  *
19  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
20  * Portions Copyright (c) 1994, Regents of the University of California
21  *
22  *
23  * IDENTIFICATION
24  *        $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.21 2007/11/15 21:14:34 momjian Exp $
25  *
26  *-------------------------------------------------------------------------
27  */
28 /*
29  * INTERFACE ROUTINES
30  *              ExecBitmapHeapScan                      scans a relation using bitmap info
31  *              ExecBitmapHeapNext                      workhorse for above
32  *              ExecInitBitmapHeapScan          creates and initializes state info.
33  *              ExecBitmapHeapReScan            prepares to rescan the plan.
34  *              ExecEndBitmapHeapScan           releases all storage.
35  */
36 #include "postgres.h"
37
38 #include "access/heapam.h"
39 #include "executor/execdebug.h"
40 #include "executor/nodeBitmapHeapscan.h"
41 #include "pgstat.h"
42 #include "utils/memutils.h"
43
44
45 static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
46 static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres);
47
48
49 /* ----------------------------------------------------------------
50  *              BitmapHeapNext
51  *
52  *              Retrieve next tuple from the BitmapHeapScan node's currentRelation
53  * ----------------------------------------------------------------
54  */
55 static TupleTableSlot *
56 BitmapHeapNext(BitmapHeapScanState *node)
57 {
58         EState     *estate;
59         ExprContext *econtext;
60         HeapScanDesc scan;
61         Index           scanrelid;
62         TIDBitmap  *tbm;
63         TBMIterateResult *tbmres;
64         OffsetNumber targoffset;
65         TupleTableSlot *slot;
66
67         /*
68          * extract necessary information from index scan node
69          */
70         estate = node->ss.ps.state;
71         econtext = node->ss.ps.ps_ExprContext;
72         slot = node->ss.ss_ScanTupleSlot;
73         scan = node->ss.ss_currentScanDesc;
74         scanrelid = ((BitmapHeapScan *) node->ss.ps.plan)->scan.scanrelid;
75         tbm = node->tbm;
76         tbmres = node->tbmres;
77
78         /*
79          * Check if we are evaluating PlanQual for tuple of this relation.
80          * Additional checking is not good, but no other way for now. We could
81          * introduce new nodes for this case and handle IndexScan --> NewNode
82          * switching in Init/ReScan plan...
83          */
84         if (estate->es_evTuple != NULL &&
85                 estate->es_evTuple[scanrelid - 1] != NULL)
86         {
87                 if (estate->es_evTupleNull[scanrelid - 1])
88                         return ExecClearTuple(slot);
89
90                 ExecStoreTuple(estate->es_evTuple[scanrelid - 1],
91                                            slot, InvalidBuffer, false);
92
93                 /* Does the tuple meet the original qual conditions? */
94                 econtext->ecxt_scantuple = slot;
95
96                 ResetExprContext(econtext);
97
98                 if (!ExecQual(node->bitmapqualorig, econtext, false))
99                         ExecClearTuple(slot);           /* would not be returned by scan */
100
101                 /* Flag for the next call that no more tuples */
102                 estate->es_evTupleNull[scanrelid - 1] = true;
103
104                 return slot;
105         }
106
107         /*
108          * If we haven't yet performed the underlying index scan, do it, and
109          * prepare the bitmap to be iterated over.
110          */
111         if (tbm == NULL)
112         {
113                 tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
114
115                 if (!tbm || !IsA(tbm, TIDBitmap))
116                         elog(ERROR, "unrecognized result from subplan");
117
118                 node->tbm = tbm;
119                 node->tbmres = tbmres = NULL;
120
121                 tbm_begin_iterate(tbm);
122         }
123
124         for (;;)
125         {
126                 Page            dp;
127                 ItemId          lp;
128
129                 /*
130                  * Get next page of results if needed
131                  */
132                 if (tbmres == NULL)
133                 {
134                         node->tbmres = tbmres = tbm_iterate(tbm);
135                         if (tbmres == NULL)
136                         {
137                                 /* no more entries in the bitmap */
138                                 break;
139                         }
140
141                         /*
142                          * Ignore any claimed entries past what we think is the end of the
143                          * relation.  (This is probably not necessary given that we got at
144                          * least AccessShareLock on the table before performing any of the
145                          * indexscans, but let's be safe.)
146                          */
147                         if (tbmres->blockno >= scan->rs_nblocks)
148                         {
149                                 node->tbmres = tbmres = NULL;
150                                 continue;
151                         }
152
153                         /*
154                          * Fetch the current heap page and identify candidate tuples.
155                          */
156                         bitgetpage(scan, tbmres);
157
158                         /*
159                          * Set rs_cindex to first slot to examine
160                          */
161                         scan->rs_cindex = 0;
162                 }
163                 else
164                 {
165                         /*
166                          * Continuing in previously obtained page; advance rs_cindex
167                          */
168                         scan->rs_cindex++;
169                 }
170
171                 /*
172                  * Out of range?  If so, nothing more to look at on this page
173                  */
174                 if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples)
175                 {
176                         node->tbmres = tbmres = NULL;
177                         continue;
178                 }
179
180                 /*
181                  * Okay to fetch the tuple
182                  */
183                 targoffset = scan->rs_vistuples[scan->rs_cindex];
184                 dp = (Page) BufferGetPage(scan->rs_cbuf);
185                 lp = PageGetItemId(dp, targoffset);
186                 Assert(ItemIdIsNormal(lp));
187
188                 scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
189                 scan->rs_ctup.t_len = ItemIdGetLength(lp);
190                 ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
191
192                 pgstat_count_heap_fetch(scan->rs_rd);
193
194                 /*
195                  * Set up the result slot to point to this tuple. Note that the slot
196                  * acquires a pin on the buffer.
197                  */
198                 ExecStoreTuple(&scan->rs_ctup,
199                                            slot,
200                                            scan->rs_cbuf,
201                                            false);
202
203                 /*
204                  * If we are using lossy info, we have to recheck the qual conditions
205                  * at every tuple.
206                  */
207                 if (tbmres->ntuples < 0)
208                 {
209                         econtext->ecxt_scantuple = slot;
210                         ResetExprContext(econtext);
211
212                         if (!ExecQual(node->bitmapqualorig, econtext, false))
213                         {
214                                 /* Fails recheck, so drop it and loop back for another */
215                                 ExecClearTuple(slot);
216                                 continue;
217                         }
218                 }
219
220                 /* OK to return this tuple */
221                 return slot;
222         }
223
224         /*
225          * if we get here it means we are at the end of the scan..
226          */
227         return ExecClearTuple(slot);
228 }
229
230 /*
231  * bitgetpage - subroutine for BitmapHeapNext()
232  *
233  * This routine reads and pins the specified page of the relation, then
234  * builds an array indicating which tuples on the page are both potentially
235  * interesting according to the bitmap, and visible according to the snapshot.
236  */
237 static void
238 bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
239 {
240         BlockNumber page = tbmres->blockno;
241         Buffer          buffer;
242         Snapshot        snapshot;
243         int                     ntup;
244
245         /*
246          * Acquire pin on the target heap page, trading in any pin we held before.
247          */
248         Assert(page < scan->rs_nblocks);
249
250         scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf,
251                                                                                  scan->rs_rd,
252                                                                                  page);
253         buffer = scan->rs_cbuf;
254         snapshot = scan->rs_snapshot;
255
256         ntup = 0;
257
258         /*
259          * Prune and repair fragmentation for the whole page, if possible.
260          */
261         heap_page_prune_opt(scan->rs_rd, buffer, RecentGlobalXmin);
262
263         /*
264          * We must hold share lock on the buffer content while examining tuple
265          * visibility.  Afterwards, however, the tuples we have found to be
266          * visible are guaranteed good as long as we hold the buffer pin.
267          */
268         LockBuffer(buffer, BUFFER_LOCK_SHARE);
269
270         /*
271          * We need two separate strategies for lossy and non-lossy cases.
272          */
273         if (tbmres->ntuples >= 0)
274         {
275                 /*
276                  * Bitmap is non-lossy, so we just look through the offsets listed in
277                  * tbmres; but we have to follow any HOT chain starting at each such
278                  * offset.
279                  */
280                 int                     curslot;
281
282                 for (curslot = 0; curslot < tbmres->ntuples; curslot++)
283                 {
284                         OffsetNumber offnum = tbmres->offsets[curslot];
285                         ItemPointerData tid;
286
287                         ItemPointerSet(&tid, page, offnum);
288                         if (heap_hot_search_buffer(&tid, buffer, snapshot, NULL))
289                                 scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
290                 }
291         }
292         else
293         {
294                 /*
295                  * Bitmap is lossy, so we must examine each item pointer on the page.
296                  * But we can ignore HOT chains, since we'll check each tuple anyway.
297                  */
298                 Page            dp = (Page) BufferGetPage(buffer);
299                 OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
300                 OffsetNumber offnum;
301
302                 for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum++)
303                 {
304                         ItemId          lp;
305                         HeapTupleData loctup;
306
307                         lp = PageGetItemId(dp, offnum);
308                         if (!ItemIdIsNormal(lp))
309                                 continue;
310                         loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
311                         loctup.t_len = ItemIdGetLength(lp);
312                         if (HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer))
313                                 scan->rs_vistuples[ntup++] = offnum;
314                 }
315         }
316
317         LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
318
319         Assert(ntup <= MaxHeapTuplesPerPage);
320         scan->rs_ntuples = ntup;
321 }
322
323 /* ----------------------------------------------------------------
324  *              ExecBitmapHeapScan(node)
325  * ----------------------------------------------------------------
326  */
327 TupleTableSlot *
328 ExecBitmapHeapScan(BitmapHeapScanState *node)
329 {
330         /*
331          * use BitmapHeapNext as access method
332          */
333         return ExecScan(&node->ss, (ExecScanAccessMtd) BitmapHeapNext);
334 }
335
336 /* ----------------------------------------------------------------
337  *              ExecBitmapHeapReScan(node)
338  * ----------------------------------------------------------------
339  */
340 void
341 ExecBitmapHeapReScan(BitmapHeapScanState *node, ExprContext *exprCtxt)
342 {
343         EState     *estate;
344         Index           scanrelid;
345
346         estate = node->ss.ps.state;
347         scanrelid = ((BitmapHeapScan *) node->ss.ps.plan)->scan.scanrelid;
348
349         node->ss.ps.ps_TupFromTlist = false;
350
351         /*
352          * If we are being passed an outer tuple, link it into the "regular"
353          * per-tuple econtext for possible qual eval.
354          */
355         if (exprCtxt != NULL)
356         {
357                 ExprContext *stdecontext;
358
359                 stdecontext = node->ss.ps.ps_ExprContext;
360                 stdecontext->ecxt_outertuple = exprCtxt->ecxt_outertuple;
361         }
362
363         /* If this is re-scanning of PlanQual ... */
364         if (estate->es_evTuple != NULL &&
365                 estate->es_evTuple[scanrelid - 1] != NULL)
366         {
367                 estate->es_evTupleNull[scanrelid - 1] = false;
368         }
369
370         /* rescan to release any page pin */
371         heap_rescan(node->ss.ss_currentScanDesc, NULL);
372
373         if (node->tbm)
374                 tbm_free(node->tbm);
375         node->tbm = NULL;
376         node->tbmres = NULL;
377
378         /*
379          * Always rescan the input immediately, to ensure we can pass down any
380          * outer tuple that might be used in index quals.
381          */
382         ExecReScan(outerPlanState(node), exprCtxt);
383 }
384
385 /* ----------------------------------------------------------------
386  *              ExecEndBitmapHeapScan
387  * ----------------------------------------------------------------
388  */
389 void
390 ExecEndBitmapHeapScan(BitmapHeapScanState *node)
391 {
392         Relation        relation;
393         HeapScanDesc scanDesc;
394
395         /*
396          * extract information from the node
397          */
398         relation = node->ss.ss_currentRelation;
399         scanDesc = node->ss.ss_currentScanDesc;
400
401         /*
402          * Free the exprcontext
403          */
404         ExecFreeExprContext(&node->ss.ps);
405
406         /*
407          * clear out tuple table slots
408          */
409         ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
410         ExecClearTuple(node->ss.ss_ScanTupleSlot);
411
412         /*
413          * close down subplans
414          */
415         ExecEndNode(outerPlanState(node));
416
417         /*
418          * release bitmap if any
419          */
420         if (node->tbm)
421                 tbm_free(node->tbm);
422
423         /*
424          * close heap scan
425          */
426         heap_endscan(scanDesc);
427
428         /*
429          * close the heap relation.
430          */
431         ExecCloseScanRelation(relation);
432 }
433
434 /* ----------------------------------------------------------------
435  *              ExecInitBitmapHeapScan
436  *
437  *              Initializes the scan's state information.
438  * ----------------------------------------------------------------
439  */
440 BitmapHeapScanState *
441 ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
442 {
443         BitmapHeapScanState *scanstate;
444         Relation        currentRelation;
445
446         /* check for unsupported flags */
447         Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
448
449         /*
450          * Assert caller didn't ask for an unsafe snapshot --- see comments at
451          * head of file.
452          */
453         Assert(IsMVCCSnapshot(estate->es_snapshot));
454
455         /*
456          * create state structure
457          */
458         scanstate = makeNode(BitmapHeapScanState);
459         scanstate->ss.ps.plan = (Plan *) node;
460         scanstate->ss.ps.state = estate;
461
462         scanstate->tbm = NULL;
463         scanstate->tbmres = NULL;
464
465         /*
466          * Miscellaneous initialization
467          *
468          * create expression context for node
469          */
470         ExecAssignExprContext(estate, &scanstate->ss.ps);
471
472         scanstate->ss.ps.ps_TupFromTlist = false;
473
474         /*
475          * initialize child expressions
476          */
477         scanstate->ss.ps.targetlist = (List *)
478                 ExecInitExpr((Expr *) node->scan.plan.targetlist,
479                                          (PlanState *) scanstate);
480         scanstate->ss.ps.qual = (List *)
481                 ExecInitExpr((Expr *) node->scan.plan.qual,
482                                          (PlanState *) scanstate);
483         scanstate->bitmapqualorig = (List *)
484                 ExecInitExpr((Expr *) node->bitmapqualorig,
485                                          (PlanState *) scanstate);
486
487 #define BITMAPHEAPSCAN_NSLOTS 2
488
489         /*
490          * tuple table initialization
491          */
492         ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
493         ExecInitScanTupleSlot(estate, &scanstate->ss);
494
495         /*
496          * open the base relation and acquire appropriate lock on it.
497          */
498         currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid);
499
500         scanstate->ss.ss_currentRelation = currentRelation;
501
502         /*
503          * Even though we aren't going to do a conventional seqscan, it is useful
504          * to create a HeapScanDesc --- most of the fields in it are usable.
505          */
506         scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation,
507                                                                                                                  estate->es_snapshot,
508                                                                                                                  0,
509                                                                                                                  NULL);
510
511         /*
512          * get the scan type from the relation descriptor.
513          */
514         ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));
515
516         /*
517          * Initialize result tuple type and projection info.
518          */
519         ExecAssignResultTypeFromTL(&scanstate->ss.ps);
520         ExecAssignScanProjectionInfo(&scanstate->ss);
521
522         /*
523          * initialize child nodes
524          *
525          * We do this last because the child nodes will open indexscans on our
526          * relation's indexes, and we want to be sure we have acquired a lock on
527          * the relation first.
528          */
529         outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
530
531         /*
532          * all done.
533          */
534         return scanstate;
535 }
536
537 int
538 ExecCountSlotsBitmapHeapScan(BitmapHeapScan *node)
539 {
540         return ExecCountSlotsNode(outerPlan((Plan *) node)) +
541                 ExecCountSlotsNode(innerPlan((Plan *) node)) + BITMAPHEAPSCAN_NSLOTS;
542 }