1 /*-------------------------------------------------------------------------
4 * general index access method routines
6 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/index/genam.c
14 * many of the old access method routines have been turned into
15 * macros and moved to genam.h -cim 4/30/91
17 *-------------------------------------------------------------------------
22 #include "access/relscan.h"
23 #include "access/transam.h"
24 #include "catalog/index.h"
25 #include "lib/stringinfo.h"
26 #include "miscadmin.h"
27 #include "storage/bufmgr.h"
28 #include "utils/acl.h"
29 #include "utils/builtins.h"
30 #include "utils/lsyscache.h"
31 #include "utils/rel.h"
32 #include "utils/rls.h"
33 #include "utils/ruleutils.h"
34 #include "utils/snapmgr.h"
35 #include "utils/syscache.h"
36 #include "utils/tqual.h"
39 /* ----------------------------------------------------------------
40 * general access method routines
42 * All indexed access methods use an identical scan structure.
43 * We don't know how the various AMs do locking, however, so we don't
44 * do anything about that here.
46 * The intent is that an AM implementor will define a beginscan routine
47 * that calls RelationGetIndexScan, to fill in the scan, and then does
48 * whatever kind of locking he wants.
50 * At the end of a scan, the AM's endscan routine undoes the locking,
51 * but does *not* call IndexScanEnd --- the higher-level index_endscan
52 * routine does that. (We can't do it in the AM because index_endscan
53 * still needs to touch the IndexScanDesc after calling the AM.)
55 * Because of this, the AM does not have a choice whether to call
56 * RelationGetIndexScan or not; its beginscan routine must return an
57 * object made by RelationGetIndexScan. This is kinda ugly but not
58 * worth cleaning up now.
59 * ----------------------------------------------------------------
63 * RelationGetIndexScan -- Create and fill an IndexScanDesc.
65 * This routine creates an index scan structure and sets up initial
69 * indexRelation -- index relation for scan.
70 * nkeys -- count of scan keys (index qual conditions).
71 * norderbys -- count of index order-by operators.
74 * An initialized IndexScanDesc.
78 RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
82 scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData));
84 scan->heapRelation = NULL; /* may be set later */
85 scan->indexRelation = indexRelation;
86 scan->xs_snapshot = InvalidSnapshot; /* caller must initialize this */
87 scan->numberOfKeys = nkeys;
88 scan->numberOfOrderBys = norderbys;
91 * We allocate key workspace here, but it won't get filled until amrescan.
94 scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
98 scan->orderByData = (ScanKey) palloc(sizeof(ScanKeyData) * norderbys);
100 scan->orderByData = NULL;
102 scan->xs_want_itup = false; /* may be set later */
105 * During recovery we ignore killed tuples and don't bother to kill them
106 * either. We do this because the xmin on the primary node could easily be
107 * later than the xmin on the standby node, so that what the primary
108 * thinks is killed is supposed to be visible on standby. So for correct
109 * MVCC for queries during recovery we must ignore these hints and check
110 * all tuples. Do *not* set ignore_killed_tuples to true when running in a
111 * transaction that was started during recovery. xactStartedInRecovery
112 * should not be altered by index AMs.
114 scan->kill_prior_tuple = false;
115 scan->xactStartedInRecovery = TransactionStartedDuringRecovery();
116 scan->ignore_killed_tuples = !scan->xactStartedInRecovery;
120 scan->xs_itup = NULL;
121 scan->xs_itupdesc = NULL;
123 ItemPointerSetInvalid(&scan->xs_ctup.t_self);
124 scan->xs_ctup.t_data = NULL;
125 scan->xs_cbuf = InvalidBuffer;
126 scan->xs_continue_hot = false;
132 * IndexScanEnd -- End an index scan.
134 * This routine just releases the storage acquired by
135 * RelationGetIndexScan(). Any AM-level resources are
136 * assumed to already have been released by the AM's
144 IndexScanEnd(IndexScanDesc scan)
146 if (scan->keyData != NULL)
147 pfree(scan->keyData);
148 if (scan->orderByData != NULL)
149 pfree(scan->orderByData);
155 * BuildIndexValueDescription
157 * Construct a string describing the contents of an index entry, in the
158 * form "(key_name, ...)=(key_value, ...)". This is currently used
159 * for building unique-constraint and exclusion-constraint error messages.
161 * Note that if the user does not have permissions to view all of the
162 * columns involved then a NULL is returned. Returning a partial key seems
163 * unlikely to be useful and we have no way to know which of the columns the
164 * user provided (unlike in ExecBuildSlotValueDescription).
166 * The passed-in values/nulls arrays are the "raw" input to the index AM,
167 * e.g. results of FormIndexDatum --- this is not necessarily what is stored
168 * in the index, but it's what the user perceives to be stored.
171 BuildIndexValueDescription(Relation indexRelation,
172 Datum *values, bool *isnull)
175 Form_pg_index idxrec;
177 int natts = indexRelation->rd_rel->relnatts;
180 Oid indexrelid = RelationGetRelid(indexRelation);
185 * Check permissions- if the user does not have access to view all of the
186 * key columns then return NULL to avoid leaking data.
188 * First check if RLS is enabled for the relation. If so, return NULL to
189 * avoid leaking data.
191 * Next we need to check table-level SELECT access and then, if there is
192 * no access there, check column-level permissions.
196 * Fetch the pg_index tuple by the Oid of the index
198 ht_idx = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexrelid));
199 if (!HeapTupleIsValid(ht_idx))
200 elog(ERROR, "cache lookup failed for index %u", indexrelid);
201 idxrec = (Form_pg_index) GETSTRUCT(ht_idx);
203 indrelid = idxrec->indrelid;
204 Assert(indexrelid == idxrec->indexrelid);
206 /* RLS check- if RLS is enabled then we don't return anything. */
207 if (check_enable_rls(indrelid, InvalidOid, true) == RLS_ENABLED)
209 ReleaseSysCache(ht_idx);
213 /* Table-level SELECT is enough, if the user has it */
214 aclresult = pg_class_aclcheck(indrelid, GetUserId(), ACL_SELECT);
215 if (aclresult != ACLCHECK_OK)
218 * No table-level access, so step through the columns in the index and
219 * make sure the user has SELECT rights on all of them.
221 for (keyno = 0; keyno < idxrec->indnatts; keyno++)
223 AttrNumber attnum = idxrec->indkey.values[keyno];
226 * Note that if attnum == InvalidAttrNumber, then this is an index
227 * based on an expression and we return no detail rather than try
228 * to figure out what column(s) the expression includes and if the
229 * user has SELECT rights on them.
231 if (attnum == InvalidAttrNumber ||
232 pg_attribute_aclcheck(indrelid, attnum, GetUserId(),
233 ACL_SELECT) != ACLCHECK_OK)
235 /* No access, so clean up and return */
236 ReleaseSysCache(ht_idx);
241 ReleaseSysCache(ht_idx);
243 initStringInfo(&buf);
244 appendStringInfo(&buf, "(%s)=(",
245 pg_get_indexdef_columns(indexrelid, true));
247 for (i = 0; i < natts; i++)
259 * The provided data is not necessarily of the type stored in the
260 * index; rather it is of the index opclass's input type. So look
261 * at rd_opcintype not the index tupdesc.
263 * Note: this is a bit shaky for opclasses that have pseudotype
264 * input types such as ANYARRAY or RECORD. Currently, the
265 * typoutput functions associated with the pseudotypes will work
266 * okay, but we might have to try harder in future.
268 getTypeOutputInfo(indexRelation->rd_opcintype[i],
269 &foutoid, &typisvarlena);
270 val = OidOutputFunctionCall(foutoid, values[i]);
274 appendStringInfoString(&buf, ", ");
275 appendStringInfoString(&buf, val);
278 appendStringInfoChar(&buf, ')');
284 /* ----------------------------------------------------------------
285 * heap-or-index-scan access to system catalogs
287 * These functions support system catalog accesses that normally use
288 * an index but need to be capable of being switched to heap scans
289 * if the system indexes are unavailable.
291 * The specified scan keys must be compatible with the named index.
292 * Generally this means that they must constrain either all columns
293 * of the index, or the first K columns of an N-column index.
295 * These routines could work with non-system tables, actually,
296 * but they're only useful when there is a known index to use with
297 * the given scan keys; so in practice they're only good for
298 * predetermined types of scans of system catalogs.
299 * ----------------------------------------------------------------
303 * systable_beginscan --- set up for heap-or-index scan
305 * rel: catalog to scan, already opened and suitably locked
306 * indexId: OID of index to conditionally use
307 * indexOK: if false, forces a heap scan (see notes below)
308 * snapshot: time qual to use (NULL for a recent catalog snapshot)
309 * nkeys, key: scan keys
311 * The attribute numbers in the scan key should be set for the heap case.
312 * If we choose to index, we reset them to 1..n to reference the index
313 * columns. Note this means there must be one scankey qualification per
314 * index column! This is checked by the Asserts in the normal, index-using
315 * case, but won't be checked if the heapscan path is taken.
317 * The routine checks the normal cases for whether an indexscan is safe,
318 * but caller can make additional checks and pass indexOK=false if needed.
319 * In standard case indexOK can simply be constant TRUE.
322 systable_beginscan(Relation heapRelation,
326 int nkeys, ScanKey key)
332 !IgnoreSystemIndexes &&
333 !ReindexIsProcessingIndex(indexId))
334 irel = index_open(indexId, AccessShareLock);
338 sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData));
340 sysscan->heap_rel = heapRelation;
341 sysscan->irel = irel;
343 if (snapshot == NULL)
345 Oid relid = RelationGetRelid(heapRelation);
347 snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
348 sysscan->snapshot = snapshot;
352 /* Caller is responsible for any snapshot. */
353 sysscan->snapshot = NULL;
360 /* Change attribute numbers to be index column numbers. */
361 for (i = 0; i < nkeys; i++)
365 for (j = 0; j < irel->rd_index->indnatts; j++)
367 if (key[i].sk_attno == irel->rd_index->indkey.values[j])
369 key[i].sk_attno = j + 1;
373 if (j == irel->rd_index->indnatts)
374 elog(ERROR, "column is not in index");
377 sysscan->iscan = index_beginscan(heapRelation, irel,
379 index_rescan(sysscan->iscan, key, nkeys, NULL, 0);
380 sysscan->scan = NULL;
385 * We disallow synchronized scans when forced to use a heapscan on a
386 * catalog. In most cases the desired rows are near the front, so
387 * that the unpredictable start point of a syncscan is a serious
388 * disadvantage; and there are no compensating advantages, because
389 * it's unlikely that such scans will occur in parallel.
391 sysscan->scan = heap_beginscan_strat(heapRelation, snapshot,
394 sysscan->iscan = NULL;
401 * systable_getnext --- get next tuple in a heap-or-index scan
403 * Returns NULL if no more tuples available.
405 * Note that returned tuple is a reference to data in a disk buffer;
406 * it must not be modified, and should be presumed inaccessible after
407 * next getnext() or endscan() call.
410 systable_getnext(SysScanDesc sysscan)
416 htup = index_getnext(sysscan->iscan, ForwardScanDirection);
419 * We currently don't need to support lossy index operators for any
420 * system catalog scan. It could be done here, using the scan keys to
421 * drive the operator calls, if we arranged to save the heap attnums
422 * during systable_beginscan(); this is practical because we still
423 * wouldn't need to support indexes on expressions.
425 if (htup && sysscan->iscan->xs_recheck)
426 elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
429 htup = heap_getnext(sysscan->scan, ForwardScanDirection);
435 * systable_recheck_tuple --- recheck visibility of most-recently-fetched tuple
437 * In particular, determine if this tuple would be visible to a catalog scan
438 * that started now. We don't handle the case of a non-MVCC scan snapshot,
439 * because no caller needs that yet.
441 * This is useful to test whether an object was deleted while we waited to
442 * acquire lock on it.
444 * Note: we don't actually *need* the tuple to be passed in, but it's a
445 * good crosscheck that the caller is interested in the right tuple.
448 systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
454 * Trust that LockBuffer() and HeapTupleSatisfiesMVCC() do not themselves
455 * acquire snapshots, so we need not register the snapshot. Those
456 * facilities are too low-level to have any business scanning tables.
458 freshsnap = GetCatalogSnapshot(RelationGetRelid(sysscan->heap_rel));
462 IndexScanDesc scan = sysscan->iscan;
464 Assert(IsMVCCSnapshot(scan->xs_snapshot));
465 Assert(tup == &scan->xs_ctup);
466 Assert(BufferIsValid(scan->xs_cbuf));
467 /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */
468 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
469 result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->xs_cbuf);
470 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
474 HeapScanDesc scan = sysscan->scan;
476 Assert(IsMVCCSnapshot(scan->rs_snapshot));
477 Assert(tup == &scan->rs_ctup);
478 Assert(BufferIsValid(scan->rs_cbuf));
479 /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */
480 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
481 result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->rs_cbuf);
482 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
488 * systable_endscan --- close scan, release resources
490 * Note that it's still up to the caller to close the heap relation.
493 systable_endscan(SysScanDesc sysscan)
497 index_endscan(sysscan->iscan);
498 index_close(sysscan->irel, AccessShareLock);
501 heap_endscan(sysscan->scan);
503 if (sysscan->snapshot)
504 UnregisterSnapshot(sysscan->snapshot);
511 * systable_beginscan_ordered --- set up for ordered catalog scan
513 * These routines have essentially the same API as systable_beginscan etc,
514 * except that they guarantee to return multiple matching tuples in
515 * index order. Also, for largely historical reasons, the index to use
516 * is opened and locked by the caller, not here.
518 * Currently we do not support non-index-based scans here. (In principle
519 * we could do a heapscan and sort, but the uses are in places that
520 * probably don't need to still work with corrupted catalog indexes.)
521 * For the moment, therefore, these functions are merely the thinnest of
522 * wrappers around index_beginscan/index_getnext. The main reason for their
523 * existence is to centralize possible future support of lossy operators
527 systable_beginscan_ordered(Relation heapRelation,
528 Relation indexRelation,
530 int nkeys, ScanKey key)
535 /* REINDEX can probably be a hard error here ... */
536 if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation)))
537 elog(ERROR, "cannot do ordered scan on index \"%s\", because it is being reindexed",
538 RelationGetRelationName(indexRelation));
539 /* ... but we only throw a warning about violating IgnoreSystemIndexes */
540 if (IgnoreSystemIndexes)
541 elog(WARNING, "using index \"%s\" despite IgnoreSystemIndexes",
542 RelationGetRelationName(indexRelation));
544 sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData));
546 sysscan->heap_rel = heapRelation;
547 sysscan->irel = indexRelation;
549 if (snapshot == NULL)
551 Oid relid = RelationGetRelid(heapRelation);
553 snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
554 sysscan->snapshot = snapshot;
558 /* Caller is responsible for any snapshot. */
559 sysscan->snapshot = NULL;
562 /* Change attribute numbers to be index column numbers. */
563 for (i = 0; i < nkeys; i++)
567 for (j = 0; j < indexRelation->rd_index->indnatts; j++)
569 if (key[i].sk_attno == indexRelation->rd_index->indkey.values[j])
571 key[i].sk_attno = j + 1;
575 if (j == indexRelation->rd_index->indnatts)
576 elog(ERROR, "column is not in index");
579 sysscan->iscan = index_beginscan(heapRelation, indexRelation,
581 index_rescan(sysscan->iscan, key, nkeys, NULL, 0);
582 sysscan->scan = NULL;
588 * systable_getnext_ordered --- get next tuple in an ordered catalog scan
591 systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction)
595 Assert(sysscan->irel);
596 htup = index_getnext(sysscan->iscan, direction);
597 /* See notes in systable_getnext */
598 if (htup && sysscan->iscan->xs_recheck)
599 elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
605 * systable_endscan_ordered --- close scan, release resources
608 systable_endscan_ordered(SysScanDesc sysscan)
610 Assert(sysscan->irel);
611 index_endscan(sysscan->iscan);
612 if (sysscan->snapshot)
613 UnregisterSnapshot(sysscan->snapshot);