]> granicus.if.org Git - postgresql/blob - src/backend/access/index/indexam.c
Fix up pgstats counting of live and dead tuples to recognize that committed
[postgresql] / src / backend / access / index / indexam.c
1 /*-------------------------------------------------------------------------
2  *
3  * indexam.c
4  *        general index access method routines
5  *
6  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.98 2007/05/27 03:50:38 tgl Exp $
12  *
13  * INTERFACE ROUTINES
14  *              index_open              - open an index relation by relation OID
15  *              index_close             - close an index relation
16  *              index_beginscan - start a scan of an index with amgettuple
17  *              index_beginscan_multi - start a scan of an index with amgetmulti
18  *              index_rescan    - restart a scan of an index
19  *              index_endscan   - end a scan
20  *              index_insert    - insert an index tuple into a relation
21  *              index_markpos   - mark a scan position
22  *              index_restrpos  - restore a scan position
23  *              index_getnext   - get the next tuple from a scan
24  *              index_getmulti  - get multiple tuples from a scan
25  *              index_bulk_delete       - bulk deletion of index tuples
26  *              index_vacuum_cleanup    - post-deletion cleanup of an index
27  *              index_getprocid - get a support procedure OID
28  *              index_getprocinfo - get a support procedure's lookup info
29  *
30  * NOTES
31  *              This file contains the index_ routines which used
32  *              to be a scattered collection of stuff in access/genam.
33  *
34  *
35  * old comments
36  *              Scans are implemented as follows:
37  *
38  *              `0' represents an invalid item pointer.
39  *              `-' represents an unknown item pointer.
40  *              `X' represents a known item pointers.
41  *              `+' represents known or invalid item pointers.
42  *              `*' represents any item pointers.
43  *
44  *              State is represented by a triple of these symbols in the order of
45  *              previous, current, next.  Note that the case of reverse scans works
46  *              identically.
47  *
48  *                              State   Result
49  *              (1)             + + -   + 0 0                   (if the next item pointer is invalid)
50  *              (2)                             + X -                   (otherwise)
51  *              (3)             * 0 0   * 0 0                   (no change)
52  *              (4)             + X 0   X 0 0                   (shift)
53  *              (5)             * + X   + X -                   (shift, add unknown)
54  *
55  *              All other states cannot occur.
56  *
57  *              Note: It would be possible to cache the status of the previous and
58  *                        next item pointer using the flags.
59  *
60  *-------------------------------------------------------------------------
61  */
62
63 #include "postgres.h"
64
65 #include "access/genam.h"
66 #include "access/heapam.h"
67 #include "pgstat.h"
68 #include "utils/relcache.h"
69
70
71 /* ----------------------------------------------------------------
72  *                                      macros used in index_ routines
73  * ----------------------------------------------------------------
74  */
75 #define RELATION_CHECKS \
76 ( \
77         AssertMacro(RelationIsValid(indexRelation)), \
78         AssertMacro(PointerIsValid(indexRelation->rd_am)) \
79 )
80
81 #define SCAN_CHECKS \
82 ( \
83         AssertMacro(IndexScanIsValid(scan)), \
84         AssertMacro(RelationIsValid(scan->indexRelation)), \
85         AssertMacro(PointerIsValid(scan->indexRelation->rd_am)) \
86 )
87
88 #define GET_REL_PROCEDURE(pname) \
89 do { \
90         procedure = &indexRelation->rd_aminfo->pname; \
91         if (!OidIsValid(procedure->fn_oid)) \
92         { \
93                 RegProcedure    procOid = indexRelation->rd_am->pname; \
94                 if (!RegProcedureIsValid(procOid)) \
95                         elog(ERROR, "invalid %s regproc", CppAsString(pname)); \
96                 fmgr_info_cxt(procOid, procedure, indexRelation->rd_indexcxt); \
97         } \
98 } while(0)
99
100 #define GET_SCAN_PROCEDURE(pname) \
101 do { \
102         procedure = &scan->indexRelation->rd_aminfo->pname; \
103         if (!OidIsValid(procedure->fn_oid)) \
104         { \
105                 RegProcedure    procOid = scan->indexRelation->rd_am->pname; \
106                 if (!RegProcedureIsValid(procOid)) \
107                         elog(ERROR, "invalid %s regproc", CppAsString(pname)); \
108                 fmgr_info_cxt(procOid, procedure, scan->indexRelation->rd_indexcxt); \
109         } \
110 } while(0)
111
112 static IndexScanDesc index_beginscan_internal(Relation indexRelation,
113                                                  int nkeys, ScanKey key);
114
115
116 /* ----------------------------------------------------------------
117  *                                 index_ interface functions
118  * ----------------------------------------------------------------
119  */
120
121 /* ----------------
122  *              index_open - open an index relation by relation OID
123  *
124  *              If lockmode is not "NoLock", the specified kind of lock is
125  *              obtained on the index.  (Generally, NoLock should only be
126  *              used if the caller knows it has some appropriate lock on the
127  *              index already.)
128  *
129  *              An error is raised if the index does not exist.
130  *
131  *              This is a convenience routine adapted for indexscan use.
132  *              Some callers may prefer to use relation_open directly.
133  * ----------------
134  */
135 Relation
136 index_open(Oid relationId, LOCKMODE lockmode)
137 {
138         Relation        r;
139
140         r = relation_open(relationId, lockmode);
141
142         if (r->rd_rel->relkind != RELKIND_INDEX)
143                 ereport(ERROR,
144                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
145                                  errmsg("\"%s\" is not an index",
146                                                 RelationGetRelationName(r))));
147
148         return r;
149 }
150
151 /* ----------------
152  *              index_close - close an index relation
153  *
154  *              If lockmode is not "NoLock", we then release the specified lock.
155  *
156  *              Note that it is often sensible to hold a lock beyond index_close;
157  *              in that case, the lock is released automatically at xact end.
158  * ----------------
159  */
160 void
161 index_close(Relation relation, LOCKMODE lockmode)
162 {
163         LockRelId       relid = relation->rd_lockInfo.lockRelId;
164
165         Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
166
167         /* The relcache does the real work... */
168         RelationClose(relation);
169
170         if (lockmode != NoLock)
171                 UnlockRelationId(&relid, lockmode);
172 }
173
174 /* ----------------
175  *              index_insert - insert an index tuple into a relation
176  * ----------------
177  */
178 bool
179 index_insert(Relation indexRelation,
180                          Datum *values,
181                          bool *isnull,
182                          ItemPointer heap_t_ctid,
183                          Relation heapRelation,
184                          bool check_uniqueness)
185 {
186         FmgrInfo   *procedure;
187
188         RELATION_CHECKS;
189         GET_REL_PROCEDURE(aminsert);
190
191         /*
192          * have the am's insert proc do all the work.
193          */
194         return DatumGetBool(FunctionCall6(procedure,
195                                                                           PointerGetDatum(indexRelation),
196                                                                           PointerGetDatum(values),
197                                                                           PointerGetDatum(isnull),
198                                                                           PointerGetDatum(heap_t_ctid),
199                                                                           PointerGetDatum(heapRelation),
200                                                                           BoolGetDatum(check_uniqueness)));
201 }
202
203 /*
204  * index_beginscan - start a scan of an index with amgettuple
205  *
206  * Note: heapRelation may be NULL if there is no intention of calling
207  * index_getnext on this scan; index_getnext_indexitem will not use the
208  * heapRelation link (nor the snapshot).  However, the caller had better
209  * be holding some kind of lock on the heap relation in any case, to ensure
210  * no one deletes it (or the index) out from under us.  Caller must also
211  * be holding a lock on the index.
212  */
213 IndexScanDesc
214 index_beginscan(Relation heapRelation,
215                                 Relation indexRelation,
216                                 Snapshot snapshot,
217                                 int nkeys, ScanKey key)
218 {
219         IndexScanDesc scan;
220
221         scan = index_beginscan_internal(indexRelation, nkeys, key);
222
223         /*
224          * Save additional parameters into the scandesc.  Everything else was set
225          * up by RelationGetIndexScan.
226          */
227         scan->is_multiscan = false;
228         scan->heapRelation = heapRelation;
229         scan->xs_snapshot = snapshot;
230
231         return scan;
232 }
233
234 /*
235  * index_beginscan_multi - start a scan of an index with amgetmulti
236  *
237  * As above, caller had better be holding some lock on the parent heap
238  * relation, even though it's not explicitly mentioned here.
239  */
240 IndexScanDesc
241 index_beginscan_multi(Relation indexRelation,
242                                           Snapshot snapshot,
243                                           int nkeys, ScanKey key)
244 {
245         IndexScanDesc scan;
246
247         scan = index_beginscan_internal(indexRelation, nkeys, key);
248
249         /*
250          * Save additional parameters into the scandesc.  Everything else was set
251          * up by RelationGetIndexScan.
252          */
253         scan->is_multiscan = true;
254         scan->xs_snapshot = snapshot;
255
256         return scan;
257 }
258
259 /*
260  * index_beginscan_internal --- common code for index_beginscan variants
261  */
262 static IndexScanDesc
263 index_beginscan_internal(Relation indexRelation,
264                                                  int nkeys, ScanKey key)
265 {
266         IndexScanDesc scan;
267         FmgrInfo   *procedure;
268
269         RELATION_CHECKS;
270         GET_REL_PROCEDURE(ambeginscan);
271
272         /*
273          * We hold a reference count to the relcache entry throughout the scan.
274          */
275         RelationIncrementReferenceCount(indexRelation);
276
277         /*
278          * Tell the AM to open a scan.
279          */
280         scan = (IndexScanDesc)
281                 DatumGetPointer(FunctionCall3(procedure,
282                                                                           PointerGetDatum(indexRelation),
283                                                                           Int32GetDatum(nkeys),
284                                                                           PointerGetDatum(key)));
285
286         return scan;
287 }
288
289 /* ----------------
290  *              index_rescan  - (re)start a scan of an index
291  *
292  * The caller may specify a new set of scankeys (but the number of keys
293  * cannot change).      To restart the scan without changing keys, pass NULL
294  * for the key array.
295  *
296  * Note that this is also called when first starting an indexscan;
297  * see RelationGetIndexScan.  Keys *must* be passed in that case,
298  * unless scan->numberOfKeys is zero.
299  * ----------------
300  */
301 void
302 index_rescan(IndexScanDesc scan, ScanKey key)
303 {
304         FmgrInfo   *procedure;
305
306         SCAN_CHECKS;
307         GET_SCAN_PROCEDURE(amrescan);
308
309         /* Release any held pin on a heap page */
310         if (BufferIsValid(scan->xs_cbuf))
311         {
312                 ReleaseBuffer(scan->xs_cbuf);
313                 scan->xs_cbuf = InvalidBuffer;
314         }
315
316         scan->kill_prior_tuple = false;         /* for safety */
317
318         FunctionCall2(procedure,
319                                   PointerGetDatum(scan),
320                                   PointerGetDatum(key));
321 }
322
323 /* ----------------
324  *              index_endscan - end a scan
325  * ----------------
326  */
327 void
328 index_endscan(IndexScanDesc scan)
329 {
330         FmgrInfo   *procedure;
331
332         SCAN_CHECKS;
333         GET_SCAN_PROCEDURE(amendscan);
334
335         /* Release any held pin on a heap page */
336         if (BufferIsValid(scan->xs_cbuf))
337         {
338                 ReleaseBuffer(scan->xs_cbuf);
339                 scan->xs_cbuf = InvalidBuffer;
340         }
341
342         /* End the AM's scan */
343         FunctionCall1(procedure, PointerGetDatum(scan));
344
345         /* Release index refcount acquired by index_beginscan */
346         RelationDecrementReferenceCount(scan->indexRelation);
347
348         /* Release the scan data structure itself */
349         IndexScanEnd(scan);
350 }
351
352 /* ----------------
353  *              index_markpos  - mark a scan position
354  * ----------------
355  */
356 void
357 index_markpos(IndexScanDesc scan)
358 {
359         FmgrInfo   *procedure;
360
361         SCAN_CHECKS;
362         GET_SCAN_PROCEDURE(ammarkpos);
363
364         FunctionCall1(procedure, PointerGetDatum(scan));
365 }
366
367 /* ----------------
368  *              index_restrpos  - restore a scan position
369  *
370  * NOTE: this only restores the internal scan state of the index AM.
371  * The current result tuple (scan->xs_ctup) doesn't change.  See comments
372  * for ExecRestrPos().
373  * ----------------
374  */
375 void
376 index_restrpos(IndexScanDesc scan)
377 {
378         FmgrInfo   *procedure;
379
380         SCAN_CHECKS;
381         GET_SCAN_PROCEDURE(amrestrpos);
382
383         scan->kill_prior_tuple = false;         /* for safety */
384
385         FunctionCall1(procedure, PointerGetDatum(scan));
386 }
387
388 /* ----------------
389  *              index_getnext - get the next heap tuple from a scan
390  *
391  * The result is the next heap tuple satisfying the scan keys and the
392  * snapshot, or NULL if no more matching tuples exist.  On success,
393  * the buffer containing the heap tuple is pinned (the pin will be dropped
394  * at the next index_getnext or index_endscan).
395  * ----------------
396  */
397 HeapTuple
398 index_getnext(IndexScanDesc scan, ScanDirection direction)
399 {
400         HeapTuple       heapTuple = &scan->xs_ctup;
401         FmgrInfo   *procedure;
402
403         SCAN_CHECKS;
404         GET_SCAN_PROCEDURE(amgettuple);
405
406         /* just make sure this is false... */
407         scan->kill_prior_tuple = false;
408
409         for (;;)
410         {
411                 bool            found;
412
413                 /*
414                  * The AM's gettuple proc finds the next tuple matching the scan keys.
415                  */
416                 found = DatumGetBool(FunctionCall2(procedure,
417                                                                                    PointerGetDatum(scan),
418                                                                                    Int32GetDatum(direction)));
419
420                 /* Reset kill flag immediately for safety */
421                 scan->kill_prior_tuple = false;
422
423                 if (!found)
424                 {
425                         /* Release any held pin on a heap page */
426                         if (BufferIsValid(scan->xs_cbuf))
427                         {
428                                 ReleaseBuffer(scan->xs_cbuf);
429                                 scan->xs_cbuf = InvalidBuffer;
430                         }
431                         return NULL;            /* failure exit */
432                 }
433
434                 pgstat_count_index_tuples(scan->indexRelation, 1);
435
436                 /*
437                  * Fetch the heap tuple and see if it matches the snapshot.
438                  */
439                 if (heap_release_fetch(scan->heapRelation, scan->xs_snapshot,
440                                                            heapTuple, &scan->xs_cbuf, true,
441                                                            scan->indexRelation))
442                         break;
443
444                 /* Skip if no undeleted tuple at this location */
445                 if (heapTuple->t_data == NULL)
446                         continue;
447
448                 /*
449                  * If we can't see it, maybe no one else can either.  Check to see if
450                  * the tuple is dead to all transactions.  If so, signal the index AM
451                  * to not return it on future indexscans.
452                  *
453                  * We told heap_release_fetch to keep a pin on the buffer, so we can
454                  * re-access the tuple here.  But we must re-lock the buffer first.
455                  */
456                 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
457
458                 if (HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin,
459                                                                          scan->xs_cbuf) == HEAPTUPLE_DEAD)
460                         scan->kill_prior_tuple = true;
461
462                 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
463         }
464
465         /* Success exit */
466         return heapTuple;
467 }
468
469 /* ----------------
470  *              index_getnext_indexitem - get the next index tuple from a scan
471  *
472  * Finds the next index tuple satisfying the scan keys.  Note that the
473  * corresponding heap tuple is not accessed, and thus no time qual (snapshot)
474  * check is done, other than the index AM's internal check for killed tuples
475  * (which most callers of this routine will probably want to suppress by
476  * setting scan->ignore_killed_tuples = false).
477  *
478  * On success (TRUE return), the heap TID of the found index entry is in
479  * scan->xs_ctup.t_self.  scan->xs_cbuf is untouched.
480  * ----------------
481  */
482 bool
483 index_getnext_indexitem(IndexScanDesc scan,
484                                                 ScanDirection direction)
485 {
486         FmgrInfo   *procedure;
487         bool            found;
488
489         SCAN_CHECKS;
490         GET_SCAN_PROCEDURE(amgettuple);
491
492         /* just make sure this is false... */
493         scan->kill_prior_tuple = false;
494
495         /*
496          * have the am's gettuple proc do all the work.
497          */
498         found = DatumGetBool(FunctionCall2(procedure,
499                                                                            PointerGetDatum(scan),
500                                                                            Int32GetDatum(direction)));
501
502         if (found)
503                 pgstat_count_index_tuples(scan->indexRelation, 1);
504
505         return found;
506 }
507
508 /* ----------------
509  *              index_getmulti - get multiple tuples from an index scan
510  *
511  * Collects the TIDs of multiple heap tuples satisfying the scan keys.
512  * Since there's no interlock between the index scan and the eventual heap
513  * access, this is only safe to use with MVCC-based snapshots: the heap
514  * item slot could have been replaced by a newer tuple by the time we get
515  * to it.
516  *
517  * A TRUE result indicates more calls should occur; a FALSE result says the
518  * scan is done.  *returned_tids could be zero or nonzero in either case.
519  * ----------------
520  */
521 bool
522 index_getmulti(IndexScanDesc scan,
523                            ItemPointer tids, int32 max_tids,
524                            int32 *returned_tids)
525 {
526         FmgrInfo   *procedure;
527         bool            found;
528
529         SCAN_CHECKS;
530         GET_SCAN_PROCEDURE(amgetmulti);
531
532         /* just make sure this is false... */
533         scan->kill_prior_tuple = false;
534
535         /*
536          * have the am's getmulti proc do all the work.
537          */
538         found = DatumGetBool(FunctionCall4(procedure,
539                                                                            PointerGetDatum(scan),
540                                                                            PointerGetDatum(tids),
541                                                                            Int32GetDatum(max_tids),
542                                                                            PointerGetDatum(returned_tids)));
543
544         pgstat_count_index_tuples(scan->indexRelation, *returned_tids);
545
546         return found;
547 }
548
549 /* ----------------
550  *              index_bulk_delete - do mass deletion of index entries
551  *
552  *              callback routine tells whether a given main-heap tuple is
553  *              to be deleted
554  *
555  *              return value is an optional palloc'd struct of statistics
556  * ----------------
557  */
558 IndexBulkDeleteResult *
559 index_bulk_delete(IndexVacuumInfo *info,
560                                   IndexBulkDeleteResult *stats,
561                                   IndexBulkDeleteCallback callback,
562                                   void *callback_state)
563 {
564         Relation        indexRelation = info->index;
565         FmgrInfo   *procedure;
566         IndexBulkDeleteResult *result;
567
568         RELATION_CHECKS;
569         GET_REL_PROCEDURE(ambulkdelete);
570
571         result = (IndexBulkDeleteResult *)
572                 DatumGetPointer(FunctionCall4(procedure,
573                                                                           PointerGetDatum(info),
574                                                                           PointerGetDatum(stats),
575                                                                           PointerGetDatum((Pointer) callback),
576                                                                           PointerGetDatum(callback_state)));
577
578         return result;
579 }
580
581 /* ----------------
582  *              index_vacuum_cleanup - do post-deletion cleanup of an index
583  *
584  *              return value is an optional palloc'd struct of statistics
585  * ----------------
586  */
587 IndexBulkDeleteResult *
588 index_vacuum_cleanup(IndexVacuumInfo *info,
589                                          IndexBulkDeleteResult *stats)
590 {
591         Relation        indexRelation = info->index;
592         FmgrInfo   *procedure;
593         IndexBulkDeleteResult *result;
594
595         RELATION_CHECKS;
596         GET_REL_PROCEDURE(amvacuumcleanup);
597
598         result = (IndexBulkDeleteResult *)
599                 DatumGetPointer(FunctionCall2(procedure,
600                                                                           PointerGetDatum(info),
601                                                                           PointerGetDatum(stats)));
602
603         return result;
604 }
605
606 /* ----------------
607  *              index_getprocid
608  *
609  *              Index access methods typically require support routines that are
610  *              not directly the implementation of any WHERE-clause query operator
611  *              and so cannot be kept in pg_amop.  Instead, such routines are kept
612  *              in pg_amproc.  These registered procedure OIDs are assigned numbers
613  *              according to a convention established by the access method.
614  *              The general index code doesn't know anything about the routines
615  *              involved; it just builds an ordered list of them for
616  *              each attribute on which an index is defined.
617  *
618  *              As of Postgres 8.3, support routines within an operator family
619  *              are further subdivided by the "left type" and "right type" of the
620  *              query operator(s) that they support.  The "default" functions for a
621  *              particular indexed attribute are those with both types equal to
622  *              the index opclass' opcintype (note that this is subtly different
623  *              from the indexed attribute's own type: it may be a binary-compatible
624  *              type instead).  Only the default functions are stored in relcache
625  *              entries --- access methods can use the syscache to look up non-default
626  *              functions.
627  *
628  *              This routine returns the requested default procedure OID for a
629  *              particular indexed attribute.
630  * ----------------
631  */
632 RegProcedure
633 index_getprocid(Relation irel,
634                                 AttrNumber attnum,
635                                 uint16 procnum)
636 {
637         RegProcedure *loc;
638         int                     nproc;
639         int                     procindex;
640
641         nproc = irel->rd_am->amsupport;
642
643         Assert(procnum > 0 && procnum <= (uint16) nproc);
644
645         procindex = (nproc * (attnum - 1)) + (procnum - 1);
646
647         loc = irel->rd_support;
648
649         Assert(loc != NULL);
650
651         return loc[procindex];
652 }
653
654 /* ----------------
655  *              index_getprocinfo
656  *
657  *              This routine allows index AMs to keep fmgr lookup info for
658  *              support procs in the relcache.  As above, only the "default"
659  *              functions for any particular indexed attribute are cached.
660  *
661  * Note: the return value points into cached data that will be lost during
662  * any relcache rebuild!  Therefore, either use the callinfo right away,
663  * or save it only after having acquired some type of lock on the index rel.
664  * ----------------
665  */
666 FmgrInfo *
667 index_getprocinfo(Relation irel,
668                                   AttrNumber attnum,
669                                   uint16 procnum)
670 {
671         FmgrInfo   *locinfo;
672         int                     nproc;
673         int                     procindex;
674
675         nproc = irel->rd_am->amsupport;
676
677         Assert(procnum > 0 && procnum <= (uint16) nproc);
678
679         procindex = (nproc * (attnum - 1)) + (procnum - 1);
680
681         locinfo = irel->rd_supportinfo;
682
683         Assert(locinfo != NULL);
684
685         locinfo += procindex;
686
687         /* Initialize the lookup info if first time through */
688         if (locinfo->fn_oid == InvalidOid)
689         {
690                 RegProcedure *loc = irel->rd_support;
691                 RegProcedure procId;
692
693                 Assert(loc != NULL);
694
695                 procId = loc[procindex];
696
697                 /*
698                  * Complain if function was not found during IndexSupportInitialize.
699                  * This should not happen unless the system tables contain bogus
700                  * entries for the index opclass.  (If an AM wants to allow a support
701                  * function to be optional, it can use index_getprocid.)
702                  */
703                 if (!RegProcedureIsValid(procId))
704                         elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
705                                  procnum, attnum, RelationGetRelationName(irel));
706
707                 fmgr_info_cxt(procId, locinfo, irel->rd_indexcxt);
708         }
709
710         return locinfo;
711 }