From: Robert Haas Date: Tue, 2 Jul 2013 13:47:01 +0000 (-0400) Subject: Use an MVCC snapshot, rather than SnapshotNow, for catalog scans. X-Git-Tag: REL9_4_BETA1~1394 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=568d4138c646cd7cd8a837ac244ef2caf27c6bb8;p=postgresql Use an MVCC snapshot, rather than SnapshotNow, for catalog scans. SnapshotNow scans have the undesirable property that, in the face of concurrent updates, the scan can fail to see either the old or the new versions of the row. In many cases, we work around this by requiring DDL operations to hold AccessExclusiveLock on the object being modified; in some cases, the existing locking is inadequate and random failures occur as a result. This commit doesn't change anything related to locking, but will hopefully pave the way to allowing lock strength reductions in the future. The major issue has held us back from making this change in the past is that taking an MVCC snapshot is significantly more expensive than using a static special snapshot such as SnapshotNow. However, testing of various worst-case scenarios reveals that this problem is not severe except under fairly extreme workloads. To mitigate those problems, we avoid retaking the MVCC snapshot for each new scan; instead, we take a new snapshot only when invalidation messages have been processed. The catcache machinery already requires that invalidation messages be sent before releasing the related heavyweight lock; else other backends might rely on locally-cached data rather than scanning the catalog at all. Thus, making snapshot reuse dependent on the same guarantees shouldn't break anything that wasn't already subtly broken. Patch by me. Review by Michael Paquier and Andres Freund. --- diff --git a/contrib/dblink/dblink.c b/contrib/dblink/dblink.c index e617f9b399..111071940b 100644 --- a/contrib/dblink/dblink.c +++ b/contrib/dblink/dblink.c @@ -2046,7 +2046,7 @@ get_pkey_attnames(Relation rel, int16 *numatts) ObjectIdGetDatum(RelationGetRelid(rel))); scan = systable_beginscan(indexRelation, IndexIndrelidIndexId, true, - SnapshotNow, 1, &skey); + NULL, 1, &skey); while (HeapTupleIsValid(indexTuple = systable_getnext(scan))) { diff --git a/contrib/sepgsql/label.c b/contrib/sepgsql/label.c index 17b832efe2..81ab972e04 100644 --- a/contrib/sepgsql/label.c +++ b/contrib/sepgsql/label.c @@ -727,7 +727,7 @@ exec_object_restorecon(struct selabel_handle * sehnd, Oid catalogId) rel = heap_open(catalogId, AccessShareLock); sscan = systable_beginscan(rel, InvalidOid, false, - SnapshotNow, 0, NULL); + NULL, 0, NULL); while (HeapTupleIsValid(tuple = systable_getnext(sscan))) { Form_pg_database datForm; diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml index 570ee90dcd..fd1168991a 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/indexam.sgml @@ -713,7 +713,7 @@ amrestrpos (IndexScanDesc scan); When using an MVCC-compliant snapshot, there is no problem because the new occupant of the slot is certain to be too new to pass the snapshot test. However, with a non-MVCC-compliant snapshot (such as - SnapshotNow), it would be possible to accept and return + SnapshotAny), it would be possible to accept and return a row that does not in fact match the scan keys. We could defend against this scenario by requiring the scan keys to be rechecked against the heap row in all cases, but that is too expensive. Instead, diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 1531f3b479..5bcbc92bfa 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -80,7 +80,7 @@ static HeapScanDesc heap_beginscan_internal(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, bool allow_strat, bool allow_sync, - bool is_bitmapscan); + bool is_bitmapscan, bool temp_snap); static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options); static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, @@ -1286,7 +1286,17 @@ heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key) { return heap_beginscan_internal(relation, snapshot, nkeys, key, - true, true, false); + true, true, false, false); +} + +HeapScanDesc +heap_beginscan_catalog(Relation relation, int nkeys, ScanKey key) +{ + Oid relid = RelationGetRelid(relation); + Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); + + return heap_beginscan_internal(relation, snapshot, nkeys, key, + true, true, false, true); } HeapScanDesc @@ -1295,7 +1305,7 @@ heap_beginscan_strat(Relation relation, Snapshot snapshot, bool allow_strat, bool allow_sync) { return heap_beginscan_internal(relation, snapshot, nkeys, key, - allow_strat, allow_sync, false); + allow_strat, allow_sync, false, false); } HeapScanDesc @@ -1303,14 +1313,14 @@ heap_beginscan_bm(Relation relation, Snapshot snapshot, int nkeys, ScanKey key) { return heap_beginscan_internal(relation, snapshot, nkeys, key, - false, false, true); + false, false, true, false); } static HeapScanDesc heap_beginscan_internal(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, bool allow_strat, bool allow_sync, - bool is_bitmapscan) + bool is_bitmapscan, bool temp_snap) { HeapScanDesc scan; @@ -1335,6 +1345,7 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot, scan->rs_strategy = NULL; /* set in initscan */ scan->rs_allow_strat = allow_strat; scan->rs_allow_sync = allow_sync; + scan->rs_temp_snap = temp_snap; /* * we can use page-at-a-time mode if it's an MVCC-safe snapshot @@ -1421,6 +1432,9 @@ heap_endscan(HeapScanDesc scan) if (scan->rs_strategy != NULL) FreeAccessStrategy(scan->rs_strategy); + if (scan->rs_temp_snap) + UnregisterSnapshot(scan->rs_snapshot); + pfree(scan); } diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 31a419b841..2bfe78a92a 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -28,6 +28,7 @@ #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/rel.h" +#include "utils/snapmgr.h" #include "utils/tqual.h" @@ -231,7 +232,7 @@ BuildIndexValueDescription(Relation indexRelation, * rel: catalog to scan, already opened and suitably locked * indexId: OID of index to conditionally use * indexOK: if false, forces a heap scan (see notes below) - * snapshot: time qual to use (usually should be SnapshotNow) + * snapshot: time qual to use (NULL for a recent catalog snapshot) * nkeys, key: scan keys * * The attribute numbers in the scan key should be set for the heap case. @@ -266,6 +267,19 @@ systable_beginscan(Relation heapRelation, sysscan->heap_rel = heapRelation; sysscan->irel = irel; + if (snapshot == NULL) + { + Oid relid = RelationGetRelid(heapRelation); + + snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); + sysscan->snapshot = snapshot; + } + else + { + /* Caller is responsible for any snapshot. */ + sysscan->snapshot = NULL; + } + if (irel) { int i; @@ -401,6 +415,9 @@ systable_endscan(SysScanDesc sysscan) else heap_endscan(sysscan->scan); + if (sysscan->snapshot) + UnregisterSnapshot(sysscan->snapshot); + pfree(sysscan); } @@ -444,6 +461,19 @@ systable_beginscan_ordered(Relation heapRelation, sysscan->heap_rel = heapRelation; sysscan->irel = indexRelation; + if (snapshot == NULL) + { + Oid relid = RelationGetRelid(heapRelation); + + snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); + sysscan->snapshot = snapshot; + } + else + { + /* Caller is responsible for any snapshot. */ + sysscan->snapshot = NULL; + } + /* Change attribute numbers to be index column numbers. */ for (i = 0; i < nkeys; i++) { @@ -494,5 +524,7 @@ systable_endscan_ordered(SysScanDesc sysscan) { Assert(sysscan->irel); index_endscan(sysscan->iscan); + if (sysscan->snapshot) + UnregisterSnapshot(sysscan->snapshot); pfree(sysscan); } diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README index fcf1a95140..40f09e397e 100644 --- a/src/backend/access/nbtree/README +++ b/src/backend/access/nbtree/README @@ -141,9 +141,10 @@ deletes index entries before deleting tuples, the super-exclusive lock guarantees that VACUUM can't delete any heap tuple that an indexscanning process might be about to visit. (This guarantee works only for simple indexscans that visit the heap in sync with the index scan, not for bitmap -scans. We only need the guarantee when using non-MVCC snapshot rules such -as SnapshotNow, so in practice this is only important for system catalog -accesses.) +scans. We only need the guarantee when using non-MVCC snapshot rules; in +an MVCC snapshot, it wouldn't matter if the heap tuple were replaced with +an unrelated tuple at the same TID, because the new tuple wouldn't be +visible to our scan anyway.) Because a page can be split even while someone holds a pin on it, it is possible that an indexscan will return items that are no longer stored on diff --git a/src/backend/access/rmgrdesc/xactdesc.c b/src/backend/access/rmgrdesc/xactdesc.c index c9c7b4a208..7670b60f53 100644 --- a/src/backend/access/rmgrdesc/xactdesc.c +++ b/src/backend/access/rmgrdesc/xactdesc.c @@ -69,11 +69,14 @@ xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec) appendStringInfo(buf, " catalog %u", msg->cat.catId); else if (msg->id == SHAREDINVALRELCACHE_ID) appendStringInfo(buf, " relcache %u", msg->rc.relId); - /* remaining cases not expected, but print something anyway */ + /* not expected, but print something anyway */ else if (msg->id == SHAREDINVALSMGR_ID) appendStringInfo(buf, " smgr"); + /* not expected, but print something anyway */ else if (msg->id == SHAREDINVALRELMAP_ID) appendStringInfo(buf, " relmap"); + else if (msg->id == SHAREDINVALSNAPSHOT_ID) + appendStringInfo(buf, " snapshot %u", msg->sn.relId); else appendStringInfo(buf, " unknown id %d", msg->id); } diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 8905596c0b..d23dc4504a 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -611,7 +611,7 @@ boot_openrel(char *relname) { /* We can now load the pg_type data */ rel = heap_open(TypeRelationId, NoLock); - scan = heap_beginscan(rel, SnapshotNow, 0, NULL); + scan = heap_beginscan_catalog(rel, 0, NULL); i = 0; while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) ++i; @@ -620,7 +620,7 @@ boot_openrel(char *relname) while (i-- > 0) *app++ = ALLOC(struct typmap, 1); *app = NULL; - scan = heap_beginscan(rel, SnapshotNow, 0, NULL); + scan = heap_beginscan_catalog(rel, 0, NULL); app = Typ; while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) { @@ -918,7 +918,7 @@ gettype(char *type) } elog(DEBUG4, "external type: %s", type); rel = heap_open(TypeRelationId, NoLock); - scan = heap_beginscan(rel, SnapshotNow, 0, NULL); + scan = heap_beginscan_catalog(rel, 0, NULL); i = 0; while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) ++i; @@ -927,7 +927,7 @@ gettype(char *type) while (i-- > 0) *app++ = ALLOC(struct typmap, 1); *app = NULL; - scan = heap_beginscan(rel, SnapshotNow, 0, NULL); + scan = heap_beginscan_catalog(rel, 0, NULL); app = Typ; while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) { diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index ced66b127b..e0dcf05687 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -788,7 +788,7 @@ objectsInSchemaToOids(GrantObjectType objtype, List *nspnames) ObjectIdGetDatum(namespaceId)); rel = heap_open(ProcedureRelationId, AccessShareLock); - scan = heap_beginscan(rel, SnapshotNow, 1, key); + scan = heap_beginscan_catalog(rel, 1, key); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { @@ -833,7 +833,7 @@ getRelationsInNamespace(Oid namespaceId, char relkind) CharGetDatum(relkind)); rel = heap_open(RelationRelationId, AccessShareLock); - scan = heap_beginscan(rel, SnapshotNow, 2, key); + scan = heap_beginscan_catalog(rel, 2, key); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { @@ -1332,7 +1332,7 @@ RemoveRoleFromObjectACL(Oid roleid, Oid classid, Oid objid) ObjectIdGetDatum(objid)); scan = systable_beginscan(rel, DefaultAclOidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); tuple = systable_getnext(scan); @@ -1452,7 +1452,7 @@ RemoveDefaultACLById(Oid defaclOid) ObjectIdGetDatum(defaclOid)); scan = systable_beginscan(rel, DefaultAclOidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); tuple = systable_getnext(scan); @@ -2705,7 +2705,7 @@ ExecGrant_Largeobject(InternalGrant *istmt) scan = systable_beginscan(relation, LargeObjectMetadataOidIndexId, true, - SnapshotNow, 1, entry); + NULL, 1, entry); tuple = systable_getnext(scan); if (!HeapTupleIsValid(tuple)) @@ -3468,7 +3468,7 @@ pg_aclmask(AclObjectKind objkind, Oid table_oid, AttrNumber attnum, Oid roleid, return pg_language_aclmask(table_oid, roleid, mask, how); case ACL_KIND_LARGEOBJECT: return pg_largeobject_aclmask_snapshot(table_oid, roleid, - mask, how, SnapshotNow); + mask, how, NULL); case ACL_KIND_NAMESPACE: return pg_namespace_aclmask(table_oid, roleid, mask, how); case ACL_KIND_TABLESPACE: @@ -3856,10 +3856,13 @@ pg_language_aclmask(Oid lang_oid, Oid roleid, * Exported routine for examining a user's privileges for a largeobject * * When a large object is opened for reading, it is opened relative to the - * caller's snapshot, but when it is opened for writing, it is always relative - * to SnapshotNow, as documented in doc/src/sgml/lobj.sgml. This function - * takes a snapshot argument so that the permissions check can be made relative - * to the same snapshot that will be used to read the underlying data. + * caller's snapshot, but when it is opened for writing, a current + * MVCC snapshot will be used. See doc/src/sgml/lobj.sgml. This function + * takes a snapshot argument so that the permissions check can be made + * relative to the same snapshot that will be used to read the underlying + * data. The caller will actually pass NULL for an instantaneous MVCC + * snapshot, since all we do with the snapshot argument is pass it through + * to systable_beginscan(). */ AclMode pg_largeobject_aclmask_snapshot(Oid lobj_oid, Oid roleid, @@ -4644,7 +4647,7 @@ pg_language_ownercheck(Oid lan_oid, Oid roleid) * Ownership check for a largeobject (specified by OID) * * This is only used for operations like ALTER LARGE OBJECT that are always - * relative to SnapshotNow. + * relative to an up-to-date snapshot. */ bool pg_largeobject_ownercheck(Oid lobj_oid, Oid roleid) @@ -4670,7 +4673,7 @@ pg_largeobject_ownercheck(Oid lobj_oid, Oid roleid) scan = systable_beginscan(pg_lo_meta, LargeObjectMetadataOidIndexId, true, - SnapshotNow, 1, entry); + NULL, 1, entry); tuple = systable_getnext(scan); if (!HeapTupleIsValid(tuple)) @@ -5032,7 +5035,7 @@ pg_extension_ownercheck(Oid ext_oid, Oid roleid) scan = systable_beginscan(pg_extension, ExtensionOidIndexId, true, - SnapshotNow, 1, entry); + NULL, 1, entry); tuple = systable_getnext(scan); if (!HeapTupleIsValid(tuple)) diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c index 41a5da0bd2..c1287a77df 100644 --- a/src/backend/catalog/catalog.c +++ b/src/backend/catalog/catalog.c @@ -218,20 +218,16 @@ IsReservedName(const char *name) * Given the OID of a relation, determine whether it's supposed to be * shared across an entire database cluster. * - * Hard-wiring this list is pretty grotty, but we really need it so that - * we can compute the locktag for a relation (and then lock it) without - * having already read its pg_class entry. If we try to retrieve relisshared - * from pg_class with no pre-existing lock, there is a race condition against - * anyone who is concurrently committing a change to the pg_class entry: - * since we read system catalog entries under SnapshotNow, it's possible - * that both the old and new versions of the row are invalid at the instants - * we scan them. We fix this by insisting that updaters of a pg_class - * row must hold exclusive lock on the corresponding rel, and that users - * of a relation must hold at least AccessShareLock on the rel *before* - * trying to open its relcache entry. But to lock a rel, you have to - * know if it's shared. Fortunately, the set of shared relations is - * fairly static, so a hand-maintained list of their OIDs isn't completely - * impractical. + * In older releases, this had to be hard-wired so that we could compute the + * locktag for a relation and lock it before examining its catalog entry. + * Since we now have MVCC catalog access, the race conditions that made that + * a hard requirement are gone, so we could look at relaxing this restriction. + * However, if we scanned the pg_class entry to find relisshared, and only + * then locked the relation, pg_class could get updated in the meantime, + * forcing us to scan the relation again, which would definitely be complex + * and might have undesirable performance consequences. Fortunately, the set + * of shared relations is fairly static, so a hand-maintained list of their + * OIDs isn't completely impractical. */ bool IsSharedRelation(Oid relationId) diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 69171f8311..fe17c96f12 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -558,7 +558,7 @@ findDependentObjects(const ObjectAddress *object, nkeys = 2; scan = systable_beginscan(*depRel, DependDependerIndexId, true, - SnapshotNow, nkeys, key); + NULL, nkeys, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { @@ -733,7 +733,7 @@ findDependentObjects(const ObjectAddress *object, nkeys = 2; scan = systable_beginscan(*depRel, DependReferenceIndexId, true, - SnapshotNow, nkeys, key); + NULL, nkeys, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { @@ -1069,7 +1069,7 @@ deleteOneObject(const ObjectAddress *object, Relation *depRel, int flags) nkeys = 2; scan = systable_beginscan(*depRel, DependDependerIndexId, true, - SnapshotNow, nkeys, key); + NULL, nkeys, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 45a84e44a1..4fd42ed1af 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1386,7 +1386,7 @@ RelationRemoveInheritance(Oid relid) ObjectIdGetDatum(relid)); scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true, - SnapshotNow, 1, &key); + NULL, 1, &key); while (HeapTupleIsValid(tuple = systable_getnext(scan))) simple_heap_delete(catalogRelation, &tuple->t_self); @@ -1450,7 +1450,7 @@ DeleteAttributeTuples(Oid relid) ObjectIdGetDatum(relid)); scan = systable_beginscan(attrel, AttributeRelidNumIndexId, true, - SnapshotNow, 1, key); + NULL, 1, key); /* Delete all the matching tuples */ while ((atttup = systable_getnext(scan)) != NULL) @@ -1491,7 +1491,7 @@ DeleteSystemAttributeTuples(Oid relid) Int16GetDatum(0)); scan = systable_beginscan(attrel, AttributeRelidNumIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); /* Delete all the matching tuples */ while ((atttup = systable_getnext(scan)) != NULL) @@ -1623,7 +1623,7 @@ RemoveAttrDefault(Oid relid, AttrNumber attnum, Int16GetDatum(attnum)); scan = systable_beginscan(attrdef_rel, AttrDefaultIndexId, true, - SnapshotNow, 2, scankeys); + NULL, 2, scankeys); /* There should be at most one matching tuple, but we loop anyway */ while (HeapTupleIsValid(tuple = systable_getnext(scan))) @@ -1677,7 +1677,7 @@ RemoveAttrDefaultById(Oid attrdefId) ObjectIdGetDatum(attrdefId)); scan = systable_beginscan(attrdef_rel, AttrDefaultOidIndexId, true, - SnapshotNow, 1, scankeys); + NULL, 1, scankeys); tuple = systable_getnext(scan); if (!HeapTupleIsValid(tuple)) @@ -2374,7 +2374,7 @@ MergeWithExistingConstraint(Relation rel, char *ccname, Node *expr, ObjectIdGetDatum(RelationGetNamespace(rel))); conscan = systable_beginscan(conDesc, ConstraintNameNspIndexId, true, - SnapshotNow, 2, skey); + NULL, 2, skey); while (HeapTupleIsValid(tup = systable_getnext(conscan))) { @@ -2640,7 +2640,7 @@ RemoveStatistics(Oid relid, AttrNumber attnum) } scan = systable_beginscan(pgstatistic, StatisticRelidAttnumInhIndexId, true, - SnapshotNow, nkeys, key); + NULL, nkeys, key); /* we must loop even when attnum != 0, in case of inherited stats */ while (HeapTupleIsValid(tuple = systable_getnext(scan))) @@ -2885,7 +2885,7 @@ heap_truncate_find_FKs(List *relationIds) fkeyRel = heap_open(ConstraintRelationId, AccessShareLock); fkeyScan = systable_beginscan(fkeyRel, InvalidOid, false, - SnapshotNow, 0, NULL); + NULL, 0, NULL); while (HeapTupleIsValid(tuple = systable_getnext(fkeyScan))) { diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 5f61ecbfdf..ca0c672c38 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -1845,7 +1845,7 @@ index_update_stats(Relation rel, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relid)); - pg_class_scan = heap_beginscan(pg_class, SnapshotNow, 1, key); + pg_class_scan = heap_beginscan_catalog(pg_class, 1, key); tuple = heap_getnext(pg_class_scan, ForwardScanDirection); tuple = heap_copytuple(tuple); heap_endscan(pg_class_scan); @@ -2181,15 +2181,10 @@ IndexBuildHeapScan(Relation heapRelation, * Prepare for scan of the base relation. In a normal index build, we use * SnapshotAny because we must retrieve all tuples and do our own time * qual checks (because we have to index RECENTLY_DEAD tuples). In a - * concurrent build, we take a regular MVCC snapshot and index whatever's - * live according to that. During bootstrap we just use SnapshotNow. + * concurrent build, or during bootstrap, we take a regular MVCC snapshot + * and index whatever's live according to that. */ - if (IsBootstrapProcessingMode()) - { - snapshot = SnapshotNow; - OldestXmin = InvalidTransactionId; /* not used */ - } - else if (indexInfo->ii_Concurrent) + if (IsBootstrapProcessingMode() || indexInfo->ii_Concurrent) { snapshot = RegisterSnapshot(GetTransactionSnapshot()); OldestXmin = InvalidTransactionId; /* not used */ @@ -2500,7 +2495,7 @@ IndexBuildHeapScan(Relation heapRelation, heap_endscan(scan); /* we can now forget our snapshot, if set */ - if (indexInfo->ii_Concurrent) + if (IsBootstrapProcessingMode() || indexInfo->ii_Concurrent) UnregisterSnapshot(snapshot); ExecDropSingleTupleTableSlot(slot); @@ -2520,10 +2515,10 @@ IndexBuildHeapScan(Relation heapRelation, * * When creating an exclusion constraint, we first build the index normally * and then rescan the heap to check for conflicts. We assume that we only - * need to validate tuples that are live according to SnapshotNow, and that - * these were correctly indexed even in the presence of broken HOT chains. - * This should be OK since we are holding at least ShareLock on the table, - * meaning there can be no uncommitted updates from other transactions. + * need to validate tuples that are live according to an up-to-date snapshot, + * and that these were correctly indexed even in the presence of broken HOT + * chains. This should be OK since we are holding at least ShareLock on the + * table, meaning there can be no uncommitted updates from other transactions. * (Note: that wouldn't necessarily work for system catalogs, since many * operations release write lock early on the system catalogs.) */ @@ -2540,6 +2535,7 @@ IndexCheckExclusion(Relation heapRelation, TupleTableSlot *slot; EState *estate; ExprContext *econtext; + Snapshot snapshot; /* * If we are reindexing the target index, mark it as no longer being @@ -2568,8 +2564,9 @@ IndexCheckExclusion(Relation heapRelation, /* * Scan all live tuples in the base relation. */ + snapshot = RegisterSnapshot(GetLatestSnapshot()); scan = heap_beginscan_strat(heapRelation, /* relation */ - SnapshotNow, /* snapshot */ + snapshot, /* snapshot */ 0, /* number of keys */ NULL, /* scan key */ true, /* buffer access strategy OK */ @@ -2612,6 +2609,7 @@ IndexCheckExclusion(Relation heapRelation, } heap_endscan(scan); + UnregisterSnapshot(snapshot); ExecDropSingleTupleTableSlot(slot); diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index 23943ff9ce..4434dd61f3 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -4013,8 +4013,8 @@ fetch_search_path_array(Oid *sarray, int sarray_len) * a nonexistent object OID, rather than failing. This is to avoid race * condition errors when a query that's scanning a catalog using an MVCC * snapshot uses one of these functions. The underlying IsVisible functions - * operate on SnapshotNow semantics and so might see the object as already - * gone when it's still visible to the MVCC snapshot. (There is no race + * always use an up-to-date snapshot and so might see the object as already + * gone when it's still visible to the transaction snapshot. (There is no race * condition in the current coding because we don't accept sinval messages * between the SearchSysCacheExists test and the subsequent lookup.) */ diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c index 215eaf53e6..4d22f3a9ce 100644 --- a/src/backend/catalog/objectaddress.c +++ b/src/backend/catalog/objectaddress.c @@ -1481,7 +1481,7 @@ get_catalog_object_by_oid(Relation catalog, Oid objectId) ObjectIdGetDatum(objectId)); scan = systable_beginscan(catalog, oidIndexId, true, - SnapshotNow, 1, &skey); + NULL, 1, &skey); tuple = systable_getnext(scan); if (!HeapTupleIsValid(tuple)) { @@ -1544,7 +1544,7 @@ getObjectDescription(const ObjectAddress *object) ObjectIdGetDatum(object->objectId)); rcscan = systable_beginscan(castDesc, CastOidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); tup = systable_getnext(rcscan); @@ -1644,7 +1644,7 @@ getObjectDescription(const ObjectAddress *object) ObjectIdGetDatum(object->objectId)); adscan = systable_beginscan(attrdefDesc, AttrDefaultOidIndexId, - true, SnapshotNow, 1, skey); + true, NULL, 1, skey); tup = systable_getnext(adscan); @@ -1750,7 +1750,7 @@ getObjectDescription(const ObjectAddress *object) ObjectIdGetDatum(object->objectId)); amscan = systable_beginscan(amopDesc, AccessMethodOperatorOidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); tup = systable_getnext(amscan); @@ -1800,7 +1800,7 @@ getObjectDescription(const ObjectAddress *object) ObjectIdGetDatum(object->objectId)); amscan = systable_beginscan(amprocDesc, AccessMethodProcedureOidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); tup = systable_getnext(amscan); @@ -1848,7 +1848,7 @@ getObjectDescription(const ObjectAddress *object) ObjectIdGetDatum(object->objectId)); rcscan = systable_beginscan(ruleDesc, RewriteOidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); tup = systable_getnext(rcscan); @@ -1883,7 +1883,7 @@ getObjectDescription(const ObjectAddress *object) ObjectIdGetDatum(object->objectId)); tgscan = systable_beginscan(trigDesc, TriggerOidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); tup = systable_getnext(tgscan); @@ -2064,7 +2064,7 @@ getObjectDescription(const ObjectAddress *object) ObjectIdGetDatum(object->objectId)); rcscan = systable_beginscan(defaclrel, DefaultAclOidIndexId, - true, SnapshotNow, 1, skey); + true, NULL, 1, skey); tup = systable_getnext(rcscan); @@ -2816,7 +2816,7 @@ getObjectIdentity(const ObjectAddress *object) ObjectIdGetDatum(object->objectId)); adscan = systable_beginscan(attrdefDesc, AttrDefaultOidIndexId, - true, SnapshotNow, 1, skey); + true, NULL, 1, skey); tup = systable_getnext(adscan); @@ -2921,7 +2921,7 @@ getObjectIdentity(const ObjectAddress *object) ObjectIdGetDatum(object->objectId)); amscan = systable_beginscan(amopDesc, AccessMethodOperatorOidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); tup = systable_getnext(amscan); @@ -2965,7 +2965,7 @@ getObjectIdentity(const ObjectAddress *object) ObjectIdGetDatum(object->objectId)); amscan = systable_beginscan(amprocDesc, AccessMethodProcedureOidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); tup = systable_getnext(amscan); @@ -3218,7 +3218,7 @@ getObjectIdentity(const ObjectAddress *object) ObjectIdGetDatum(object->objectId)); rcscan = systable_beginscan(defaclrel, DefaultAclOidIndexId, - true, SnapshotNow, 1, skey); + true, NULL, 1, skey); tup = systable_getnext(rcscan); diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index dd0050267f..99f4be5056 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -166,7 +166,7 @@ RemoveCollationById(Oid collationOid) ObjectIdGetDatum(collationOid)); scandesc = systable_beginscan(rel, CollationOidIndexId, true, - SnapshotNow, 1, &scanKeyData); + NULL, 1, &scanKeyData); tuple = systable_getnext(scandesc); diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c index a8eb4cbc45..5021420a72 100644 --- a/src/backend/catalog/pg_constraint.c +++ b/src/backend/catalog/pg_constraint.c @@ -412,7 +412,7 @@ ConstraintNameIsUsed(ConstraintCategory conCat, Oid objId, ObjectIdGetDatum(objNamespace)); conscan = systable_beginscan(conDesc, ConstraintNameNspIndexId, true, - SnapshotNow, 2, skey); + NULL, 2, skey); while (HeapTupleIsValid(tup = systable_getnext(conscan))) { @@ -506,7 +506,7 @@ ChooseConstraintName(const char *name1, const char *name2, ObjectIdGetDatum(namespaceid)); conscan = systable_beginscan(conDesc, ConstraintNameNspIndexId, true, - SnapshotNow, 2, skey); + NULL, 2, skey); found = (HeapTupleIsValid(systable_getnext(conscan))); @@ -699,7 +699,7 @@ AlterConstraintNamespaces(Oid ownerId, Oid oldNspId, ObjectIdGetDatum(ownerId)); scan = systable_beginscan(conRel, ConstraintTypidIndexId, true, - SnapshotNow, 1, key); + NULL, 1, key); } else { @@ -709,7 +709,7 @@ AlterConstraintNamespaces(Oid ownerId, Oid oldNspId, ObjectIdGetDatum(ownerId)); scan = systable_beginscan(conRel, ConstraintRelidIndexId, true, - SnapshotNow, 1, key); + NULL, 1, key); } while (HeapTupleIsValid((tup = systable_getnext(scan)))) @@ -778,7 +778,7 @@ get_relation_constraint_oid(Oid relid, const char *conname, bool missing_ok) ObjectIdGetDatum(relid)); scan = systable_beginscan(pg_constraint, ConstraintRelidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); while (HeapTupleIsValid(tuple = systable_getnext(scan))) { @@ -836,7 +836,7 @@ get_domain_constraint_oid(Oid typid, const char *conname, bool missing_ok) ObjectIdGetDatum(typid)); scan = systable_beginscan(pg_constraint, ConstraintTypidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); while (HeapTupleIsValid(tuple = systable_getnext(scan))) { @@ -903,7 +903,7 @@ check_functional_grouping(Oid relid, ObjectIdGetDatum(relid)); scan = systable_beginscan(pg_constraint, ConstraintRelidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); while (HeapTupleIsValid(tuple = systable_getnext(scan))) { diff --git a/src/backend/catalog/pg_conversion.c b/src/backend/catalog/pg_conversion.c index 45d8e62808..08b2a99bfb 100644 --- a/src/backend/catalog/pg_conversion.c +++ b/src/backend/catalog/pg_conversion.c @@ -166,8 +166,7 @@ RemoveConversionById(Oid conversionOid) /* open pg_conversion */ rel = heap_open(ConversionRelationId, RowExclusiveLock); - scan = heap_beginscan(rel, SnapshotNow, - 1, &scanKeyData); + scan = heap_beginscan_catalog(rel, 1, &scanKeyData); /* search for the target tuple */ if (HeapTupleIsValid(tuple = heap_getnext(scan, ForwardScanDirection))) diff --git a/src/backend/catalog/pg_db_role_setting.c b/src/backend/catalog/pg_db_role_setting.c index 45949129cf..6e19736de0 100644 --- a/src/backend/catalog/pg_db_role_setting.c +++ b/src/backend/catalog/pg_db_role_setting.c @@ -43,7 +43,7 @@ AlterSetting(Oid databaseid, Oid roleid, VariableSetStmt *setstmt) BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(roleid)); scan = systable_beginscan(rel, DbRoleSettingDatidRolidIndexId, true, - SnapshotNow, 2, scankey); + NULL, 2, scankey); tuple = systable_getnext(scan); /* @@ -205,7 +205,7 @@ DropSetting(Oid databaseid, Oid roleid) numkeys++; } - scan = heap_beginscan(relsetting, SnapshotNow, numkeys, keys); + scan = heap_beginscan_catalog(relsetting, numkeys, keys); while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) { simple_heap_delete(relsetting, &tup->t_self); @@ -226,7 +226,8 @@ DropSetting(Oid databaseid, Oid roleid) * databaseid/roleid. */ void -ApplySetting(Oid databaseid, Oid roleid, Relation relsetting, GucSource source) +ApplySetting(Snapshot snapshot, Oid databaseid, Oid roleid, + Relation relsetting, GucSource source) { SysScanDesc scan; ScanKeyData keys[2]; @@ -244,7 +245,7 @@ ApplySetting(Oid databaseid, Oid roleid, Relation relsetting, GucSource source) ObjectIdGetDatum(roleid)); scan = systable_beginscan(relsetting, DbRoleSettingDatidRolidIndexId, true, - SnapshotNow, 2, keys); + snapshot, 2, keys); while (HeapTupleIsValid(tup = systable_getnext(scan))) { bool isnull; diff --git a/src/backend/catalog/pg_depend.c b/src/backend/catalog/pg_depend.c index 9535fba21e..bd5cd99732 100644 --- a/src/backend/catalog/pg_depend.c +++ b/src/backend/catalog/pg_depend.c @@ -211,7 +211,7 @@ deleteDependencyRecordsFor(Oid classId, Oid objectId, ObjectIdGetDatum(objectId)); scan = systable_beginscan(depRel, DependDependerIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { @@ -261,7 +261,7 @@ deleteDependencyRecordsForClass(Oid classId, Oid objectId, ObjectIdGetDatum(objectId)); scan = systable_beginscan(depRel, DependDependerIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { @@ -343,7 +343,7 @@ changeDependencyFor(Oid classId, Oid objectId, ObjectIdGetDatum(objectId)); scan = systable_beginscan(depRel, DependDependerIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); while (HeapTupleIsValid((tup = systable_getnext(scan)))) { @@ -407,7 +407,7 @@ isObjectPinned(const ObjectAddress *object, Relation rel) ObjectIdGetDatum(object->objectId)); scan = systable_beginscan(rel, DependReferenceIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); /* * Since we won't generate additional pg_depend entries for pinned @@ -467,7 +467,7 @@ getExtensionOfObject(Oid classId, Oid objectId) ObjectIdGetDatum(objectId)); scan = systable_beginscan(depRel, DependDependerIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); while (HeapTupleIsValid((tup = systable_getnext(scan)))) { @@ -520,7 +520,7 @@ sequenceIsOwned(Oid seqId, Oid *tableId, int32 *colId) ObjectIdGetDatum(seqId)); scan = systable_beginscan(depRel, DependDependerIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); while (HeapTupleIsValid((tup = systable_getnext(scan)))) { @@ -580,7 +580,7 @@ getOwnedSequences(Oid relid) ObjectIdGetDatum(relid)); scan = systable_beginscan(depRel, DependReferenceIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { @@ -643,7 +643,7 @@ get_constraint_index(Oid constraintId) Int32GetDatum(0)); scan = systable_beginscan(depRel, DependReferenceIndexId, true, - SnapshotNow, 3, key); + NULL, 3, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { @@ -701,7 +701,7 @@ get_index_constraint(Oid indexId) Int32GetDatum(0)); scan = systable_beginscan(depRel, DependDependerIndexId, true, - SnapshotNow, 3, key); + NULL, 3, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { diff --git a/src/backend/catalog/pg_enum.c b/src/backend/catalog/pg_enum.c index 7e746f9667..a7ef8cda59 100644 --- a/src/backend/catalog/pg_enum.c +++ b/src/backend/catalog/pg_enum.c @@ -156,7 +156,7 @@ EnumValuesDelete(Oid enumTypeOid) ObjectIdGetDatum(enumTypeOid)); scan = systable_beginscan(pg_enum, EnumTypIdLabelIndexId, true, - SnapshotNow, 1, key); + NULL, 1, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { @@ -483,6 +483,9 @@ restart: * (for example, enum_in and enum_out do so). The worst that can happen * is a transient failure to find any valid value of the row. This is * judged acceptable in view of the infrequency of use of RenumberEnumType. + * + * XXX. Now that we have MVCC catalog scans, the above reasoning is no longer + * correct. Should we revisit any decisions here? */ static void RenumberEnumType(Relation pg_enum, HeapTuple *existing, int nelems) diff --git a/src/backend/catalog/pg_inherits.c b/src/backend/catalog/pg_inherits.c index fbfe7bc7a1..638e535efc 100644 --- a/src/backend/catalog/pg_inherits.c +++ b/src/backend/catalog/pg_inherits.c @@ -81,7 +81,7 @@ find_inheritance_children(Oid parentrelId, LOCKMODE lockmode) ObjectIdGetDatum(parentrelId)); scan = systable_beginscan(relation, InheritsParentIndexId, true, - SnapshotNow, 1, key); + NULL, 1, key); while ((inheritsTuple = systable_getnext(scan)) != NULL) { @@ -325,7 +325,7 @@ typeInheritsFrom(Oid subclassTypeId, Oid superclassTypeId) ObjectIdGetDatum(this_relid)); inhscan = systable_beginscan(inhrel, InheritsRelidSeqnoIndexId, true, - SnapshotNow, 1, &skey); + NULL, 1, &skey); while ((inhtup = systable_getnext(inhscan)) != NULL) { diff --git a/src/backend/catalog/pg_largeobject.c b/src/backend/catalog/pg_largeobject.c index d01a5a7a72..22d499d7d1 100644 --- a/src/backend/catalog/pg_largeobject.c +++ b/src/backend/catalog/pg_largeobject.c @@ -104,7 +104,7 @@ LargeObjectDrop(Oid loid) scan = systable_beginscan(pg_lo_meta, LargeObjectMetadataOidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); tuple = systable_getnext(scan); if (!HeapTupleIsValid(tuple)) @@ -126,7 +126,7 @@ LargeObjectDrop(Oid loid) scan = systable_beginscan(pg_largeobject, LargeObjectLOidPNIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); while (HeapTupleIsValid(tuple = systable_getnext(scan))) { simple_heap_delete(pg_largeobject, &tuple->t_self); @@ -145,11 +145,11 @@ LargeObjectDrop(Oid loid) * We don't use the system cache for large object metadata, for fear of * using too much local memory. * - * This function always scans the system catalog using SnapshotNow, so it - * should not be used when a large object is opened in read-only mode (because - * large objects opened in read only mode are supposed to be viewed relative - * to the caller's snapshot, whereas in read-write mode they are relative to - * SnapshotNow). + * This function always scans the system catalog using an up-to-date snapshot, + * so it should not be used when a large object is opened in read-only mode + * (because large objects opened in read only mode are supposed to be viewed + * relative to the caller's snapshot, whereas in read-write mode they are + * relative to a current snapshot). */ bool LargeObjectExists(Oid loid) @@ -170,7 +170,7 @@ LargeObjectExists(Oid loid) sd = systable_beginscan(pg_lo_meta, LargeObjectMetadataOidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); tuple = systable_getnext(sd); if (HeapTupleIsValid(tuple)) diff --git a/src/backend/catalog/pg_range.c b/src/backend/catalog/pg_range.c index 639b40c73b..b782f900dc 100644 --- a/src/backend/catalog/pg_range.c +++ b/src/backend/catalog/pg_range.c @@ -126,7 +126,7 @@ RangeDelete(Oid rangeTypeOid) ObjectIdGetDatum(rangeTypeOid)); scan = systable_beginscan(pg_range, RangeTypidIndexId, true, - SnapshotNow, 1, key); + NULL, 1, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { diff --git a/src/backend/catalog/pg_shdepend.c b/src/backend/catalog/pg_shdepend.c index 7de4420fa3..dc21c1002a 100644 --- a/src/backend/catalog/pg_shdepend.c +++ b/src/backend/catalog/pg_shdepend.c @@ -220,7 +220,7 @@ shdepChangeDep(Relation sdepRel, Int32GetDatum(objsubid)); scan = systable_beginscan(sdepRel, SharedDependDependerIndexId, true, - SnapshotNow, 4, key); + NULL, 4, key); while ((scantup = systable_getnext(scan)) != NULL) { @@ -554,7 +554,7 @@ checkSharedDependencies(Oid classId, Oid objectId, ObjectIdGetDatum(objectId)); scan = systable_beginscan(sdepRel, SharedDependReferenceIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { @@ -729,7 +729,7 @@ copyTemplateDependencies(Oid templateDbId, Oid newDbId) ObjectIdGetDatum(templateDbId)); scan = systable_beginscan(sdepRel, SharedDependDependerIndexId, true, - SnapshotNow, 1, key); + NULL, 1, key); /* Set up to copy the tuples except for inserting newDbId */ memset(values, 0, sizeof(values)); @@ -792,7 +792,7 @@ dropDatabaseDependencies(Oid databaseId) /* We leave the other index fields unspecified */ scan = systable_beginscan(sdepRel, SharedDependDependerIndexId, true, - SnapshotNow, 1, key); + NULL, 1, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { @@ -936,7 +936,7 @@ shdepDropDependency(Relation sdepRel, } scan = systable_beginscan(sdepRel, SharedDependDependerIndexId, true, - SnapshotNow, nkeys, key); + NULL, nkeys, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { @@ -1125,7 +1125,7 @@ isSharedObjectPinned(Oid classId, Oid objectId, Relation sdepRel) ObjectIdGetDatum(objectId)); scan = systable_beginscan(sdepRel, SharedDependReferenceIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); /* * Since we won't generate additional pg_shdepend entries for pinned @@ -1212,7 +1212,7 @@ shdepDropOwned(List *roleids, DropBehavior behavior) ObjectIdGetDatum(roleid)); scan = systable_beginscan(sdepRel, SharedDependReferenceIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); while ((tuple = systable_getnext(scan)) != NULL) { @@ -1319,7 +1319,7 @@ shdepReassignOwned(List *roleids, Oid newrole) ObjectIdGetDatum(roleid)); scan = systable_beginscan(sdepRel, SharedDependReferenceIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); while ((tuple = systable_getnext(scan)) != NULL) { diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 095d5e42d9..f23730c26f 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -480,6 +480,11 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMOD * against concurrent SnapshotNow scans of pg_index. Therefore this is unsafe * to execute with less than full exclusive lock on the parent table; * otherwise concurrent executions of RelationGetIndexList could miss indexes. + * + * XXX: Now that we have MVCC catalog access, SnapshotNow scans of pg_index + * shouldn't be common enough to worry about. The above comment needs + * to be updated, and it may be possible to simplify the logic here in other + * ways also. */ void mark_index_clustered(Relation rel, Oid indexOid, bool is_internal) @@ -1583,7 +1588,7 @@ get_tables_to_cluster(MemoryContext cluster_context) Anum_pg_index_indisclustered, BTEqualStrategyNumber, F_BOOLEQ, BoolGetDatum(true)); - scan = heap_beginscan(indRelation, SnapshotNow, 1, &entry); + scan = heap_beginscan_catalog(indRelation, 1, &entry); while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { index = (Form_pg_index) GETSTRUCT(indexTuple); diff --git a/src/backend/commands/comment.c b/src/backend/commands/comment.c index 60db27c205..8baf017380 100644 --- a/src/backend/commands/comment.c +++ b/src/backend/commands/comment.c @@ -187,7 +187,7 @@ CreateComments(Oid oid, Oid classoid, int32 subid, char *comment) description = heap_open(DescriptionRelationId, RowExclusiveLock); sd = systable_beginscan(description, DescriptionObjIndexId, true, - SnapshotNow, 3, skey); + NULL, 3, skey); while ((oldtuple = systable_getnext(sd)) != NULL) { @@ -281,7 +281,7 @@ CreateSharedComments(Oid oid, Oid classoid, char *comment) shdescription = heap_open(SharedDescriptionRelationId, RowExclusiveLock); sd = systable_beginscan(shdescription, SharedDescriptionObjIndexId, true, - SnapshotNow, 2, skey); + NULL, 2, skey); while ((oldtuple = systable_getnext(sd)) != NULL) { @@ -363,7 +363,7 @@ DeleteComments(Oid oid, Oid classoid, int32 subid) description = heap_open(DescriptionRelationId, RowExclusiveLock); sd = systable_beginscan(description, DescriptionObjIndexId, true, - SnapshotNow, nkeys, skey); + NULL, nkeys, skey); while ((oldtuple = systable_getnext(sd)) != NULL) simple_heap_delete(description, &oldtuple->t_self); @@ -399,7 +399,7 @@ DeleteSharedComments(Oid oid, Oid classoid) shdescription = heap_open(SharedDescriptionRelationId, RowExclusiveLock); sd = systable_beginscan(shdescription, SharedDescriptionObjIndexId, true, - SnapshotNow, 2, skey); + NULL, 2, skey); while ((oldtuple = systable_getnext(sd)) != NULL) simple_heap_delete(shdescription, &oldtuple->t_self); @@ -442,7 +442,7 @@ GetComment(Oid oid, Oid classoid, int32 subid) tupdesc = RelationGetDescr(description); sd = systable_beginscan(description, DescriptionObjIndexId, true, - SnapshotNow, 3, skey); + NULL, 3, skey); comment = NULL; while ((tuple = systable_getnext(sd)) != NULL) diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 0e10a75218..a3a150d700 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -133,7 +133,6 @@ createdb(const CreatedbStmt *stmt) int notherbackends; int npreparedxacts; createdb_failure_params fparms; - Snapshot snapshot; /* Extract options from the statement node tree */ foreach(option, stmt->options) @@ -537,29 +536,6 @@ createdb(const CreatedbStmt *stmt) */ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT); - /* - * Take an MVCC snapshot to use while scanning through pg_tablespace. For - * safety, register the snapshot (this prevents it from changing if - * something else were to request a snapshot during the loop). - * - * Traversing pg_tablespace with an MVCC snapshot is necessary to provide - * us with a consistent view of the tablespaces that exist. Using - * SnapshotNow here would risk seeing the same tablespace multiple times, - * or worse not seeing a tablespace at all, if its tuple is moved around - * by a concurrent update (eg an ACL change). - * - * Inconsistency of this sort is inherent to all SnapshotNow scans, unless - * some lock is held to prevent concurrent updates of the rows being - * sought. There should be a generic fix for that, but in the meantime - * it's worth fixing this case in particular because we are doing very - * heavyweight operations within the scan, so that the elapsed time for - * the scan is vastly longer than for most other catalog scans. That - * means there's a much wider window for concurrent updates to cause - * trouble here than anywhere else. XXX this code should be changed - * whenever a generic fix is implemented. - */ - snapshot = RegisterSnapshot(GetLatestSnapshot()); - /* * Once we start copying subdirectories, we need to be able to clean 'em * up if we fail. Use an ENSURE block to make sure this happens. (This @@ -577,7 +553,7 @@ createdb(const CreatedbStmt *stmt) * each one to the new database. */ rel = heap_open(TableSpaceRelationId, AccessShareLock); - scan = heap_beginscan(rel, snapshot, 0, NULL); + scan = heap_beginscan_catalog(rel, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Oid srctablespace = HeapTupleGetOid(tuple); @@ -682,9 +658,6 @@ createdb(const CreatedbStmt *stmt) PG_END_ENSURE_ERROR_CLEANUP(createdb_failure_callback, PointerGetDatum(&fparms)); - /* Free our snapshot */ - UnregisterSnapshot(snapshot); - return dboid; } @@ -1214,7 +1187,7 @@ movedb(const char *dbname, const char *tblspcname) BTEqualStrategyNumber, F_NAMEEQ, NameGetDatum(dbname)); sysscan = systable_beginscan(pgdbrel, DatabaseNameIndexId, true, - SnapshotNow, 1, &scankey); + NULL, 1, &scankey); oldtuple = systable_getnext(sysscan); if (!HeapTupleIsValid(oldtuple)) /* shouldn't happen... */ ereport(ERROR, @@ -1403,7 +1376,7 @@ AlterDatabase(AlterDatabaseStmt *stmt, bool isTopLevel) BTEqualStrategyNumber, F_NAMEEQ, NameGetDatum(stmt->dbname)); scan = systable_beginscan(rel, DatabaseNameIndexId, true, - SnapshotNow, 1, &scankey); + NULL, 1, &scankey); tuple = systable_getnext(scan); if (!HeapTupleIsValid(tuple)) ereport(ERROR, @@ -1498,7 +1471,7 @@ AlterDatabaseOwner(const char *dbname, Oid newOwnerId) BTEqualStrategyNumber, F_NAMEEQ, NameGetDatum(dbname)); scan = systable_beginscan(rel, DatabaseNameIndexId, true, - SnapshotNow, 1, &scankey); + NULL, 1, &scankey); tuple = systable_getnext(scan); if (!HeapTupleIsValid(tuple)) ereport(ERROR, @@ -1637,7 +1610,7 @@ get_db_info(const char *name, LOCKMODE lockmode, NameGetDatum(name)); scan = systable_beginscan(relation, DatabaseNameIndexId, true, - SnapshotNow, 1, &scanKey); + NULL, 1, &scanKey); tuple = systable_getnext(scan); @@ -1751,20 +1724,9 @@ remove_dbtablespaces(Oid db_id) Relation rel; HeapScanDesc scan; HeapTuple tuple; - Snapshot snapshot; - - /* - * As in createdb(), we'd better use an MVCC snapshot here, since this - * scan can run for a long time. Duplicate visits to tablespaces would be - * harmless, but missing a tablespace could result in permanently leaked - * files. - * - * XXX change this when a generic fix for SnapshotNow races is implemented - */ - snapshot = RegisterSnapshot(GetLatestSnapshot()); rel = heap_open(TableSpaceRelationId, AccessShareLock); - scan = heap_beginscan(rel, snapshot, 0, NULL); + scan = heap_beginscan_catalog(rel, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Oid dsttablespace = HeapTupleGetOid(tuple); @@ -1810,7 +1772,6 @@ remove_dbtablespaces(Oid db_id) heap_endscan(scan); heap_close(rel, AccessShareLock); - UnregisterSnapshot(snapshot); } /* @@ -1832,19 +1793,9 @@ check_db_file_conflict(Oid db_id) Relation rel; HeapScanDesc scan; HeapTuple tuple; - Snapshot snapshot; - - /* - * As in createdb(), we'd better use an MVCC snapshot here; missing a - * tablespace could result in falsely reporting the OID is unique, with - * disastrous future consequences per the comment above. - * - * XXX change this when a generic fix for SnapshotNow races is implemented - */ - snapshot = RegisterSnapshot(GetLatestSnapshot()); rel = heap_open(TableSpaceRelationId, AccessShareLock); - scan = heap_beginscan(rel, snapshot, 0, NULL); + scan = heap_beginscan_catalog(rel, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Oid dsttablespace = HeapTupleGetOid(tuple); @@ -1870,7 +1821,6 @@ check_db_file_conflict(Oid db_id) heap_endscan(scan); heap_close(rel, AccessShareLock); - UnregisterSnapshot(snapshot); return result; } @@ -1927,7 +1877,7 @@ get_database_oid(const char *dbname, bool missing_ok) BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(dbname)); scan = systable_beginscan(pg_database, DatabaseNameIndexId, true, - SnapshotNow, 1, entry); + NULL, 1, entry); dbtuple = systable_getnext(scan); diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c index 08e8cade6b..798c92a026 100644 --- a/src/backend/commands/extension.c +++ b/src/backend/commands/extension.c @@ -128,7 +128,7 @@ get_extension_oid(const char *extname, bool missing_ok) CStringGetDatum(extname)); scandesc = systable_beginscan(rel, ExtensionNameIndexId, true, - SnapshotNow, 1, entry); + NULL, 1, entry); tuple = systable_getnext(scandesc); @@ -173,7 +173,7 @@ get_extension_name(Oid ext_oid) ObjectIdGetDatum(ext_oid)); scandesc = systable_beginscan(rel, ExtensionOidIndexId, true, - SnapshotNow, 1, entry); + NULL, 1, entry); tuple = systable_getnext(scandesc); @@ -212,7 +212,7 @@ get_extension_schema(Oid ext_oid) ObjectIdGetDatum(ext_oid)); scandesc = systable_beginscan(rel, ExtensionOidIndexId, true, - SnapshotNow, 1, entry); + NULL, 1, entry); tuple = systable_getnext(scandesc); @@ -1609,7 +1609,7 @@ RemoveExtensionById(Oid extId) BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(extId)); scandesc = systable_beginscan(rel, ExtensionOidIndexId, true, - SnapshotNow, 1, entry); + NULL, 1, entry); tuple = systable_getnext(scandesc); @@ -2107,7 +2107,7 @@ pg_extension_config_dump(PG_FUNCTION_ARGS) ObjectIdGetDatum(CurrentExtensionObject)); extScan = systable_beginscan(extRel, ExtensionOidIndexId, true, - SnapshotNow, 1, key); + NULL, 1, key); extTup = systable_getnext(extScan); @@ -2256,7 +2256,7 @@ extension_config_remove(Oid extensionoid, Oid tableoid) ObjectIdGetDatum(extensionoid)); extScan = systable_beginscan(extRel, ExtensionOidIndexId, true, - SnapshotNow, 1, key); + NULL, 1, key); extTup = systable_getnext(extScan); @@ -2464,7 +2464,7 @@ AlterExtensionNamespace(List *names, const char *newschema) ObjectIdGetDatum(extensionOid)); extScan = systable_beginscan(extRel, ExtensionOidIndexId, true, - SnapshotNow, 1, key); + NULL, 1, key); extTup = systable_getnext(extScan); @@ -2512,7 +2512,7 @@ AlterExtensionNamespace(List *names, const char *newschema) ObjectIdGetDatum(extensionOid)); depScan = systable_beginscan(depRel, DependReferenceIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); while (HeapTupleIsValid(depTup = systable_getnext(depScan))) { @@ -2622,7 +2622,7 @@ ExecAlterExtensionStmt(AlterExtensionStmt *stmt) CStringGetDatum(stmt->extname)); extScan = systable_beginscan(extRel, ExtensionNameIndexId, true, - SnapshotNow, 1, key); + NULL, 1, key); extTup = systable_getnext(extScan); @@ -2772,7 +2772,7 @@ ApplyExtensionUpdates(Oid extensionOid, ObjectIdGetDatum(extensionOid)); extScan = systable_beginscan(extRel, ExtensionOidIndexId, true, - SnapshotNow, 1, key); + NULL, 1, key); extTup = systable_getnext(extScan); diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c index c776758b51..0a9facfbaa 100644 --- a/src/backend/commands/functioncmds.c +++ b/src/backend/commands/functioncmds.c @@ -1607,7 +1607,7 @@ DropCastById(Oid castOid) BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(castOid)); scan = systable_beginscan(relation, CastOidIndexId, true, - SnapshotNow, 1, &scankey); + NULL, 1, &scankey); tuple = systable_getnext(scan); if (!HeapTupleIsValid(tuple)) diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 7ea90d07d3..9d9745e714 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -1358,7 +1358,7 @@ GetDefaultOpClass(Oid type_id, Oid am_id) ObjectIdGetDatum(am_id)); scan = systable_beginscan(rel, OpclassAmNameNspIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); while (HeapTupleIsValid(tup = systable_getnext(scan))) { @@ -1838,7 +1838,7 @@ ReindexDatabase(const char *databaseName, bool do_system, bool do_user) * indirectly by reindex_relation). */ relationRelation = heap_open(RelationRelationId, AccessShareLock); - scan = heap_beginscan(relationRelation, SnapshotNow, 0, NULL); + scan = heap_beginscan_catalog(relationRelation, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_class classtuple = (Form_pg_class) GETSTRUCT(tuple); diff --git a/src/backend/commands/opclasscmds.c b/src/backend/commands/opclasscmds.c index f2d78ef663..3140b37234 100644 --- a/src/backend/commands/opclasscmds.c +++ b/src/backend/commands/opclasscmds.c @@ -614,7 +614,7 @@ DefineOpClass(CreateOpClassStmt *stmt) ObjectIdGetDatum(amoid)); scan = systable_beginscan(rel, OpclassAmNameNspIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); while (HeapTupleIsValid(tup = systable_getnext(scan))) { @@ -1622,7 +1622,7 @@ RemoveAmOpEntryById(Oid entryOid) rel = heap_open(AccessMethodOperatorRelationId, RowExclusiveLock); scan = systable_beginscan(rel, AccessMethodOperatorOidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); /* we expect exactly one match */ tup = systable_getnext(scan); @@ -1651,7 +1651,7 @@ RemoveAmProcEntryById(Oid entryOid) rel = heap_open(AccessMethodProcedureRelationId, RowExclusiveLock); scan = systable_beginscan(rel, AccessMethodProcedureOidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); /* we expect exactly one match */ tup = systable_getnext(scan); diff --git a/src/backend/commands/proclang.c b/src/backend/commands/proclang.c index 6e4c682072..b7be1f7208 100644 --- a/src/backend/commands/proclang.c +++ b/src/backend/commands/proclang.c @@ -455,7 +455,7 @@ find_language_template(const char *languageName) BTEqualStrategyNumber, F_NAMEEQ, NameGetDatum(languageName)); scan = systable_beginscan(rel, PLTemplateNameIndexId, true, - SnapshotNow, 1, &key); + NULL, 1, &key); tup = systable_getnext(scan); if (HeapTupleIsValid(tup)) diff --git a/src/backend/commands/seclabel.c b/src/backend/commands/seclabel.c index 3b27ac26c8..7466e66465 100644 --- a/src/backend/commands/seclabel.c +++ b/src/backend/commands/seclabel.c @@ -167,7 +167,7 @@ GetSharedSecurityLabel(const ObjectAddress *object, const char *provider) pg_shseclabel = heap_open(SharedSecLabelRelationId, AccessShareLock); scan = systable_beginscan(pg_shseclabel, SharedSecLabelObjectIndexId, true, - SnapshotNow, 3, keys); + NULL, 3, keys); tuple = systable_getnext(scan); if (HeapTupleIsValid(tuple)) @@ -224,7 +224,7 @@ GetSecurityLabel(const ObjectAddress *object, const char *provider) pg_seclabel = heap_open(SecLabelRelationId, AccessShareLock); scan = systable_beginscan(pg_seclabel, SecLabelObjectIndexId, true, - SnapshotNow, 4, keys); + NULL, 4, keys); tuple = systable_getnext(scan); if (HeapTupleIsValid(tuple)) @@ -284,7 +284,7 @@ SetSharedSecurityLabel(const ObjectAddress *object, pg_shseclabel = heap_open(SharedSecLabelRelationId, RowExclusiveLock); scan = systable_beginscan(pg_shseclabel, SharedSecLabelObjectIndexId, true, - SnapshotNow, 3, keys); + NULL, 3, keys); oldtup = systable_getnext(scan); if (HeapTupleIsValid(oldtup)) @@ -375,7 +375,7 @@ SetSecurityLabel(const ObjectAddress *object, pg_seclabel = heap_open(SecLabelRelationId, RowExclusiveLock); scan = systable_beginscan(pg_seclabel, SecLabelObjectIndexId, true, - SnapshotNow, 4, keys); + NULL, 4, keys); oldtup = systable_getnext(scan); if (HeapTupleIsValid(oldtup)) @@ -434,7 +434,7 @@ DeleteSharedSecurityLabel(Oid objectId, Oid classId) pg_shseclabel = heap_open(SharedSecLabelRelationId, RowExclusiveLock); scan = systable_beginscan(pg_shseclabel, SharedSecLabelObjectIndexId, true, - SnapshotNow, 2, skey); + NULL, 2, skey); while (HeapTupleIsValid(oldtup = systable_getnext(scan))) simple_heap_delete(pg_shseclabel, &oldtup->t_self); systable_endscan(scan); @@ -485,7 +485,7 @@ DeleteSecurityLabel(const ObjectAddress *object) pg_seclabel = heap_open(SecLabelRelationId, RowExclusiveLock); scan = systable_beginscan(pg_seclabel, SecLabelObjectIndexId, true, - SnapshotNow, nkeys, skey); + NULL, nkeys, skey); while (HeapTupleIsValid(oldtup = systable_getnext(scan))) simple_heap_delete(pg_seclabel, &oldtup->t_self); systable_endscan(scan); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index ea1c309add..6a7aa44ccc 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -2741,7 +2741,7 @@ AlterTableGetLockLevel(List *cmds) * multiple DDL operations occur in a stream against frequently accessed * tables. * - * 1. Catalog tables are read using SnapshotNow, which has a race bug that + * 1. Catalog tables were read using SnapshotNow, which has a race bug that * allows a scan to return no valid rows even when one is present in the * case of a commit of a concurrent update of the catalog table. * SnapshotNow also ignores transactions in progress, so takes the latest @@ -3753,6 +3753,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) MemoryContext oldCxt; List *dropped_attrs = NIL; ListCell *lc; + Snapshot snapshot; if (newrel) ereport(DEBUG1, @@ -3805,7 +3806,8 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) * Scan through the rows, generating a new row if needed and then * checking all the constraints. */ - scan = heap_beginscan(oldrel, SnapshotNow, 0, NULL); + snapshot = RegisterSnapshot(GetLatestSnapshot()); + scan = heap_beginscan(oldrel, snapshot, 0, NULL); /* * Switch to per-tuple memory context and reset it for each tuple @@ -3906,6 +3908,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) MemoryContextSwitchTo(oldCxt); heap_endscan(scan); + UnregisterSnapshot(snapshot); ExecDropSingleTupleTableSlot(oldslot); ExecDropSingleTupleTableSlot(newslot); @@ -4182,7 +4185,7 @@ find_composite_type_dependencies(Oid typeOid, Relation origRelation, ObjectIdGetDatum(typeOid)); depScan = systable_beginscan(depRel, DependReferenceIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); while (HeapTupleIsValid(depTup = systable_getnext(depScan))) { @@ -4281,7 +4284,7 @@ find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior be BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(typeOid)); - scan = heap_beginscan(classRel, SnapshotNow, 1, key); + scan = heap_beginscan_catalog(classRel, 1, key); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { @@ -6220,7 +6223,7 @@ ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(rel))); scan = systable_beginscan(conrel, ConstraintRelidIndexId, - true, SnapshotNow, 1, &key); + true, NULL, 1, &key); while (HeapTupleIsValid(contuple = systable_getnext(scan))) { @@ -6282,7 +6285,7 @@ ATExecAlterConstraint(Relation rel, AlterTableCmd *cmd, ObjectIdGetDatum(HeapTupleGetOid(contuple))); tgscan = systable_beginscan(tgrel, TriggerConstraintIndexId, true, - SnapshotNow, 1, &tgkey); + NULL, 1, &tgkey); while (HeapTupleIsValid(tgtuple = systable_getnext(tgscan))) { @@ -6343,7 +6346,7 @@ ATExecValidateConstraint(Relation rel, char *constrName, bool recurse, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(rel))); scan = systable_beginscan(conrel, ConstraintRelidIndexId, - true, SnapshotNow, 1, &key); + true, NULL, 1, &key); while (HeapTupleIsValid(tuple = systable_getnext(scan))) { @@ -6824,6 +6827,7 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup) TupleTableSlot *slot; Form_pg_constraint constrForm; bool isnull; + Snapshot snapshot; constrForm = (Form_pg_constraint) GETSTRUCT(constrtup); @@ -6849,7 +6853,8 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup) slot = MakeSingleTupleTableSlot(tupdesc); econtext->ecxt_scantuple = slot; - scan = heap_beginscan(rel, SnapshotNow, 0, NULL); + snapshot = RegisterSnapshot(GetLatestSnapshot()); + scan = heap_beginscan(rel, snapshot, 0, NULL); /* * Switch to per-tuple memory context and reset it for each tuple @@ -6873,6 +6878,7 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup) MemoryContextSwitchTo(oldcxt); heap_endscan(scan); + UnregisterSnapshot(snapshot); ExecDropSingleTupleTableSlot(slot); FreeExecutorState(estate); } @@ -6893,6 +6899,7 @@ validateForeignKeyConstraint(char *conname, HeapScanDesc scan; HeapTuple tuple; Trigger trig; + Snapshot snapshot; ereport(DEBUG1, (errmsg("validating foreign key constraint \"%s\"", conname))); @@ -6924,7 +6931,8 @@ validateForeignKeyConstraint(char *conname, * if that tuple had just been inserted. If any of those fail, it should * ereport(ERROR) and that's that. */ - scan = heap_beginscan(rel, SnapshotNow, 0, NULL); + snapshot = RegisterSnapshot(GetLatestSnapshot()); + scan = heap_beginscan(rel, snapshot, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { @@ -6956,6 +6964,7 @@ validateForeignKeyConstraint(char *conname, } heap_endscan(scan); + UnregisterSnapshot(snapshot); } static void @@ -7174,7 +7183,7 @@ ATExecDropConstraint(Relation rel, const char *constrName, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(rel))); scan = systable_beginscan(conrel, ConstraintRelidIndexId, - true, SnapshotNow, 1, &key); + true, NULL, 1, &key); while (HeapTupleIsValid(tuple = systable_getnext(scan))) { @@ -7255,7 +7264,7 @@ ATExecDropConstraint(Relation rel, const char *constrName, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(childrelid)); scan = systable_beginscan(conrel, ConstraintRelidIndexId, - true, SnapshotNow, 1, &key); + true, NULL, 1, &key); /* scan for matching tuple - there should only be one */ while (HeapTupleIsValid(tuple = systable_getnext(scan))) @@ -7655,7 +7664,7 @@ ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel, Int32GetDatum((int32) attnum)); scan = systable_beginscan(depRel, DependReferenceIndexId, true, - SnapshotNow, 3, key); + NULL, 3, key); while (HeapTupleIsValid(depTup = systable_getnext(scan))) { @@ -7840,7 +7849,7 @@ ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel, Int32GetDatum((int32) attnum)); scan = systable_beginscan(depRel, DependDependerIndexId, true, - SnapshotNow, 3, key); + NULL, 3, key); while (HeapTupleIsValid(depTup = systable_getnext(scan))) { @@ -8517,7 +8526,7 @@ change_owner_fix_column_acls(Oid relationOid, Oid oldOwnerId, Oid newOwnerId) BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relationOid)); scan = systable_beginscan(attRelation, AttributeRelidNumIndexId, - true, SnapshotNow, 1, key); + true, NULL, 1, key); while (HeapTupleIsValid(attributeTuple = systable_getnext(scan))) { Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attributeTuple); @@ -8594,7 +8603,7 @@ change_owner_recurse_to_sequences(Oid relationOid, Oid newOwnerId, LOCKMODE lock /* we leave refobjsubid unspecified */ scan = systable_beginscan(depRel, DependReferenceIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { @@ -9188,7 +9197,7 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode) BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(child_rel))); scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, - true, SnapshotNow, 1, &key); + true, NULL, 1, &key); /* inhseqno sequences start at 1 */ inhseqno = 0; @@ -9430,7 +9439,7 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel) BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(parent_rel))); parent_scan = systable_beginscan(catalog_relation, ConstraintRelidIndexId, - true, SnapshotNow, 1, &parent_key); + true, NULL, 1, &parent_key); while (HeapTupleIsValid(parent_tuple = systable_getnext(parent_scan))) { @@ -9453,7 +9462,7 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel) BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(child_rel))); child_scan = systable_beginscan(catalog_relation, ConstraintRelidIndexId, - true, SnapshotNow, 1, &child_key); + true, NULL, 1, &child_key); while (HeapTupleIsValid(child_tuple = systable_getnext(child_scan))) { @@ -9561,7 +9570,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(rel))); scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, - true, SnapshotNow, 1, key); + true, NULL, 1, key); while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan))) { @@ -9595,7 +9604,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(rel))); scan = systable_beginscan(catalogRelation, AttributeRelidNumIndexId, - true, SnapshotNow, 1, key); + true, NULL, 1, key); while (HeapTupleIsValid(attributeTuple = systable_getnext(scan))) { Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attributeTuple); @@ -9637,7 +9646,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(parent_rel))); scan = systable_beginscan(catalogRelation, ConstraintRelidIndexId, - true, SnapshotNow, 1, key); + true, NULL, 1, key); connames = NIL; @@ -9657,7 +9666,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(rel))); scan = systable_beginscan(catalogRelation, ConstraintRelidIndexId, - true, SnapshotNow, 1, key); + true, NULL, 1, key); while (HeapTupleIsValid(constraintTuple = systable_getnext(scan))) { @@ -9749,7 +9758,7 @@ drop_parent_dependency(Oid relid, Oid refclassid, Oid refobjid) Int32GetDatum(0)); scan = systable_beginscan(catalogRelation, DependDependerIndexId, true, - SnapshotNow, 3, key); + NULL, 3, key); while (HeapTupleIsValid(depTuple = systable_getnext(scan))) { @@ -9804,7 +9813,7 @@ ATExecAddOf(Relation rel, const TypeName *ofTypename, LOCKMODE lockmode) BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relid)); scan = systable_beginscan(inheritsRelation, InheritsRelidSeqnoIndexId, - true, SnapshotNow, 1, &key); + true, NULL, 1, &key); if (HeapTupleIsValid(systable_getnext(scan))) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -10260,7 +10269,7 @@ AlterSeqNamespaces(Relation classRel, Relation rel, /* we leave refobjsubid unspecified */ scan = systable_beginscan(depRel, DependReferenceIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index 8589512998..ba9cb1f8f1 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -400,7 +400,7 @@ DropTableSpace(DropTableSpaceStmt *stmt) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(tablespacename)); - scandesc = heap_beginscan(rel, SnapshotNow, 1, entry); + scandesc = heap_beginscan_catalog(rel, 1, entry); tuple = heap_getnext(scandesc, ForwardScanDirection); if (!HeapTupleIsValid(tuple)) @@ -831,7 +831,7 @@ RenameTableSpace(const char *oldname, const char *newname) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(oldname)); - scan = heap_beginscan(rel, SnapshotNow, 1, entry); + scan = heap_beginscan_catalog(rel, 1, entry); tup = heap_getnext(scan, ForwardScanDirection); if (!HeapTupleIsValid(tup)) ereport(ERROR, @@ -861,7 +861,7 @@ RenameTableSpace(const char *oldname, const char *newname) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(newname)); - scan = heap_beginscan(rel, SnapshotNow, 1, entry); + scan = heap_beginscan_catalog(rel, 1, entry); tup = heap_getnext(scan, ForwardScanDirection); if (HeapTupleIsValid(tup)) ereport(ERROR, @@ -910,7 +910,7 @@ AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(stmt->tablespacename)); - scandesc = heap_beginscan(rel, SnapshotNow, 1, entry); + scandesc = heap_beginscan_catalog(rel, 1, entry); tup = heap_getnext(scandesc, ForwardScanDirection); if (!HeapTupleIsValid(tup)) ereport(ERROR, @@ -1311,7 +1311,7 @@ get_tablespace_oid(const char *tablespacename, bool missing_ok) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(tablespacename)); - scandesc = heap_beginscan(rel, SnapshotNow, 1, entry); + scandesc = heap_beginscan_catalog(rel, 1, entry); tuple = heap_getnext(scandesc, ForwardScanDirection); /* We assume that there can be at most one matching tuple */ @@ -1357,7 +1357,7 @@ get_tablespace_name(Oid spc_oid) ObjectIdAttributeNumber, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(spc_oid)); - scandesc = heap_beginscan(rel, SnapshotNow, 1, entry); + scandesc = heap_beginscan_catalog(rel, 1, entry); tuple = heap_getnext(scandesc, ForwardScanDirection); /* We assume that there can be at most one matching tuple */ diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index ed65bab361..d86e9ad2c7 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -492,7 +492,7 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(rel))); tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true, - SnapshotNow, 1, &key); + NULL, 1, &key); while (HeapTupleIsValid(tuple = systable_getnext(tgscan))) { Form_pg_trigger pg_trigger = (Form_pg_trigger) GETSTRUCT(tuple); @@ -1048,7 +1048,7 @@ RemoveTriggerById(Oid trigOid) ObjectIdGetDatum(trigOid)); tgscan = systable_beginscan(tgrel, TriggerOidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); tup = systable_getnext(tgscan); if (!HeapTupleIsValid(tup)) @@ -1127,7 +1127,7 @@ get_trigger_oid(Oid relid, const char *trigname, bool missing_ok) CStringGetDatum(trigname)); tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true, - SnapshotNow, 2, skey); + NULL, 2, skey); tup = systable_getnext(tgscan); @@ -1242,7 +1242,7 @@ renametrig(RenameStmt *stmt) BTEqualStrategyNumber, F_NAMEEQ, PointerGetDatum(stmt->newname)); tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); if (HeapTupleIsValid(tuple = systable_getnext(tgscan))) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), @@ -1262,7 +1262,7 @@ renametrig(RenameStmt *stmt) BTEqualStrategyNumber, F_NAMEEQ, PointerGetDatum(stmt->subname)); tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); if (HeapTupleIsValid(tuple = systable_getnext(tgscan))) { tgoid = HeapTupleGetOid(tuple); @@ -1359,7 +1359,7 @@ EnableDisableTrigger(Relation rel, const char *tgname, nkeys = 1; tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true, - SnapshotNow, nkeys, keys); + NULL, nkeys, keys); found = changed = false; @@ -1468,7 +1468,7 @@ RelationBuildTriggers(Relation relation) tgrel = heap_open(TriggerRelationId, AccessShareLock); tgscan = systable_beginscan(tgrel, TriggerRelidNameIndexId, true, - SnapshotNow, 1, &skey); + NULL, 1, &skey); while (HeapTupleIsValid(htup = systable_getnext(tgscan))) { @@ -4270,7 +4270,7 @@ AfterTriggerSetState(ConstraintsSetStmt *stmt) ObjectIdGetDatum(namespaceId)); conscan = systable_beginscan(conrel, ConstraintNameNspIndexId, - true, SnapshotNow, 2, skey); + true, NULL, 2, skey); while (HeapTupleIsValid(tup = systable_getnext(conscan))) { @@ -4333,7 +4333,7 @@ AfterTriggerSetState(ConstraintsSetStmt *stmt) ObjectIdGetDatum(conoid)); tgscan = systable_beginscan(tgrel, TriggerConstraintIndexId, true, - SnapshotNow, 1, &skey); + NULL, 1, &skey); while (HeapTupleIsValid(htup = systable_getnext(tgscan))) { diff --git a/src/backend/commands/tsearchcmds.c b/src/backend/commands/tsearchcmds.c index 57b69f88ab..61ebc2eca7 100644 --- a/src/backend/commands/tsearchcmds.c +++ b/src/backend/commands/tsearchcmds.c @@ -921,7 +921,7 @@ makeConfigurationDependencies(HeapTuple tuple, bool removeOld, ObjectIdGetDatum(myself.objectId)); scan = systable_beginscan(mapRel, TSConfigMapIndexId, true, - SnapshotNow, 1, &skey); + NULL, 1, &skey); while (HeapTupleIsValid((maptup = systable_getnext(scan)))) { @@ -1059,7 +1059,7 @@ DefineTSConfiguration(List *names, List *parameters) ObjectIdGetDatum(sourceOid)); scan = systable_beginscan(mapRel, TSConfigMapIndexId, true, - SnapshotNow, 1, &skey); + NULL, 1, &skey); while (HeapTupleIsValid((maptup = systable_getnext(scan)))) { @@ -1138,7 +1138,7 @@ RemoveTSConfigurationById(Oid cfgId) ObjectIdGetDatum(cfgId)); scan = systable_beginscan(relMap, TSConfigMapIndexId, true, - SnapshotNow, 1, &skey); + NULL, 1, &skey); while (HeapTupleIsValid((tup = systable_getnext(scan)))) { @@ -1294,7 +1294,7 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt, Int32GetDatum(tokens[i])); scan = systable_beginscan(relMap, TSConfigMapIndexId, true, - SnapshotNow, 2, skey); + NULL, 2, skey); while (HeapTupleIsValid((maptup = systable_getnext(scan)))) { @@ -1333,7 +1333,7 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt, ObjectIdGetDatum(cfgId)); scan = systable_beginscan(relMap, TSConfigMapIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); while (HeapTupleIsValid((maptup = systable_getnext(scan)))) { @@ -1450,7 +1450,7 @@ DropConfigurationMapping(AlterTSConfigurationStmt *stmt, Int32GetDatum(tokens[i])); scan = systable_beginscan(relMap, TSConfigMapIndexId, true, - SnapshotNow, 2, skey); + NULL, 2, skey); while (HeapTupleIsValid((maptup = systable_getnext(scan)))) { diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index 6bc16f198e..031433d4bb 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -71,6 +71,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" +#include "utils/snapmgr.h" #include "utils/syscache.h" #include "utils/tqual.h" @@ -2256,9 +2257,11 @@ AlterDomainNotNull(List *names, bool notNull) TupleDesc tupdesc = RelationGetDescr(testrel); HeapScanDesc scan; HeapTuple tuple; + Snapshot snapshot; /* Scan all tuples in this relation */ - scan = heap_beginscan(testrel, SnapshotNow, 0, NULL); + snapshot = RegisterSnapshot(GetLatestSnapshot()); + scan = heap_beginscan(testrel, snapshot, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { int i; @@ -2288,6 +2291,7 @@ AlterDomainNotNull(List *names, bool notNull) } } heap_endscan(scan); + UnregisterSnapshot(snapshot); /* Close each rel after processing, but keep lock */ heap_close(testrel, NoLock); @@ -2356,7 +2360,7 @@ AlterDomainDropConstraint(List *names, const char *constrName, ObjectIdGetDatum(HeapTupleGetOid(tup))); conscan = systable_beginscan(conrel, ConstraintTypidIndexId, true, - SnapshotNow, 1, key); + NULL, 1, key); /* * Scan over the result set, removing any matching entries. @@ -2551,7 +2555,7 @@ AlterDomainValidateConstraint(List *names, char *constrName) BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(domainoid)); scan = systable_beginscan(conrel, ConstraintTypidIndexId, - true, SnapshotNow, 1, &key); + true, NULL, 1, &key); while (HeapTupleIsValid(tuple = systable_getnext(scan))) { @@ -2638,9 +2642,11 @@ validateDomainConstraint(Oid domainoid, char *ccbin) TupleDesc tupdesc = RelationGetDescr(testrel); HeapScanDesc scan; HeapTuple tuple; + Snapshot snapshot; /* Scan all tuples in this relation */ - scan = heap_beginscan(testrel, SnapshotNow, 0, NULL); + snapshot = RegisterSnapshot(GetLatestSnapshot()); + scan = heap_beginscan(testrel, snapshot, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { int i; @@ -2684,6 +2690,7 @@ validateDomainConstraint(Oid domainoid, char *ccbin) ResetExprContext(econtext); } heap_endscan(scan); + UnregisterSnapshot(snapshot); /* Hold relation lock till commit (XXX bad for concurrency) */ heap_close(testrel, NoLock); @@ -2751,7 +2758,7 @@ get_rels_with_domain(Oid domainOid, LOCKMODE lockmode) ObjectIdGetDatum(domainOid)); depScan = systable_beginscan(depRel, DependReferenceIndexId, true, - SnapshotNow, 2, key); + NULL, 2, key); while (HeapTupleIsValid(depTup = systable_getnext(depScan))) { @@ -3066,7 +3073,7 @@ GetDomainConstraints(Oid typeOid) ObjectIdGetDatum(typeOid)); scan = systable_beginscan(conRel, ConstraintTypidIndexId, true, - SnapshotNow, 1, key); + NULL, 1, key); while (HeapTupleIsValid(conTup = systable_getnext(scan))) { diff --git a/src/backend/commands/user.c b/src/backend/commands/user.c index 844f25cfa6..e101a8669e 100644 --- a/src/backend/commands/user.c +++ b/src/backend/commands/user.c @@ -1006,7 +1006,7 @@ DropRole(DropRoleStmt *stmt) ObjectIdGetDatum(roleid)); sscan = systable_beginscan(pg_auth_members_rel, AuthMemRoleMemIndexId, - true, SnapshotNow, 1, &scankey); + true, NULL, 1, &scankey); while (HeapTupleIsValid(tmp_tuple = systable_getnext(sscan))) { @@ -1021,7 +1021,7 @@ DropRole(DropRoleStmt *stmt) ObjectIdGetDatum(roleid)); sscan = systable_beginscan(pg_auth_members_rel, AuthMemMemRoleIndexId, - true, SnapshotNow, 1, &scankey); + true, NULL, 1, &scankey); while (HeapTupleIsValid(tmp_tuple = systable_getnext(sscan))) { diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 641c740268..68fc9c6bae 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -351,7 +351,7 @@ get_rel_oids(Oid relid, const RangeVar *vacrel) pgclass = heap_open(RelationRelationId, AccessShareLock); - scan = heap_beginscan(pgclass, SnapshotNow, 0, NULL); + scan = heap_beginscan_catalog(pgclass, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { @@ -735,7 +735,7 @@ vac_update_datfrozenxid(void) relation = heap_open(RelationRelationId, AccessShareLock); scan = systable_beginscan(relation, InvalidOid, false, - SnapshotNow, 0, NULL); + NULL, 0, NULL); while ((classTup = systable_getnext(scan)) != NULL) { @@ -852,7 +852,7 @@ vac_truncate_clog(TransactionId frozenXID, MultiXactId frozenMulti) */ relation = heap_open(DatabaseRelationId, AccessShareLock); - scan = heap_beginscan(relation, SnapshotNow, 0, NULL); + scan = heap_beginscan_catalog(relation, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index d2b27213ff..bbb89e6996 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -4,16 +4,16 @@ * Routines to support bitmapped scans of relations * * NOTE: it is critical that this plan type only be used with MVCC-compliant - * snapshots (ie, regular snapshots, not SnapshotNow or one of the other + * snapshots (ie, regular snapshots, not SnapshotAny or one of the other * special snapshots). The reason is that since index and heap scans are * decoupled, there can be no assurance that the index tuple prompting a * visit to a particular heap TID still exists when the visit is made. * Therefore the tuple might not exist anymore either (which is OK because * heap_fetch will cope) --- but worse, the tuple slot could have been * re-used for a newer tuple. With an MVCC snapshot the newer tuple is - * certain to fail the time qual and so it will not be mistakenly returned. - * With SnapshotNow we might return a tuple that doesn't meet the required - * index qual conditions. + * certain to fail the time qual and so it will not be mistakenly returned, + * but with anything else we might return a tuple that doesn't meet the + * required index qual conditions. * * * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index cd8806165c..5b9f3480a5 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -1855,7 +1855,7 @@ get_database_list(void) (void) GetTransactionSnapshot(); rel = heap_open(DatabaseRelationId, AccessShareLock); - scan = heap_beginscan(rel, SnapshotNow, 0, NULL); + scan = heap_beginscan_catalog(rel, 0, NULL); while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) { @@ -2002,7 +2002,7 @@ do_autovacuum(void) * wide tables there might be proportionally much more activity in the * TOAST table than in its parent. */ - relScan = heap_beginscan(classRel, SnapshotNow, 0, NULL); + relScan = heap_beginscan_catalog(classRel, 0, NULL); /* * On the first pass, we collect main tables to vacuum, and also the main @@ -2120,7 +2120,7 @@ do_autovacuum(void) BTEqualStrategyNumber, F_CHAREQ, CharGetDatum(RELKIND_TOASTVALUE)); - relScan = heap_beginscan(classRel, SnapshotNow, 1, &key); + relScan = heap_beginscan_catalog(classRel, 1, &key); while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL) { Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple); diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index ac20dffd98..e539bac0c1 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -59,6 +59,7 @@ #include "utils/memutils.h" #include "utils/ps_status.h" #include "utils/rel.h" +#include "utils/snapmgr.h" #include "utils/timestamp.h" #include "utils/tqual.h" @@ -1097,6 +1098,7 @@ pgstat_collect_oids(Oid catalogid) Relation rel; HeapScanDesc scan; HeapTuple tup; + Snapshot snapshot; memset(&hash_ctl, 0, sizeof(hash_ctl)); hash_ctl.keysize = sizeof(Oid); @@ -1109,7 +1111,8 @@ pgstat_collect_oids(Oid catalogid) HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); rel = heap_open(catalogid, AccessShareLock); - scan = heap_beginscan(rel, SnapshotNow, 0, NULL); + snapshot = RegisterSnapshot(GetLatestSnapshot()); + scan = heap_beginscan(rel, snapshot, 0, NULL); while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) { Oid thisoid = HeapTupleGetOid(tup); @@ -1119,6 +1122,7 @@ pgstat_collect_oids(Oid catalogid) (void) hash_search(htab, (void *) &thisoid, HASH_ENTER, NULL); } heap_endscan(scan); + UnregisterSnapshot(snapshot); heap_close(rel, AccessShareLock); return htab; diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c index fb57621962..3157aba330 100644 --- a/src/backend/rewrite/rewriteDefine.c +++ b/src/backend/rewrite/rewriteDefine.c @@ -38,6 +38,7 @@ #include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/rel.h" +#include "utils/snapmgr.h" #include "utils/syscache.h" #include "utils/tqual.h" @@ -418,14 +419,17 @@ DefineQueryRewrite(char *rulename, event_relation->rd_rel->relkind != RELKIND_MATVIEW) { HeapScanDesc scanDesc; + Snapshot snapshot; - scanDesc = heap_beginscan(event_relation, SnapshotNow, 0, NULL); + snapshot = RegisterSnapshot(GetLatestSnapshot()); + scanDesc = heap_beginscan(event_relation, snapshot, 0, NULL); if (heap_getnext(scanDesc, ForwardScanDirection) != NULL) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("could not convert table \"%s\" to a view because it is not empty", RelationGetRelationName(event_relation)))); heap_endscan(scanDesc); + UnregisterSnapshot(snapshot); if (event_relation->rd_rel->relhastriggers) ereport(ERROR, diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index a467588e50..d4b9708195 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -2092,8 +2092,8 @@ relation_is_updatable(Oid reloid, bool include_triggers) /* * If the relation doesn't exist, return zero rather than throwing an * error. This is helpful since scanning an information_schema view under - * MVCC rules can result in referencing rels that were just deleted - * according to a SnapshotNow probe. + * MVCC rules can result in referencing rels that have actually been + * deleted already. */ if (rel == NULL) return 0; diff --git a/src/backend/rewrite/rewriteRemove.c b/src/backend/rewrite/rewriteRemove.c index 75fc7765ae..51e27cf4e2 100644 --- a/src/backend/rewrite/rewriteRemove.c +++ b/src/backend/rewrite/rewriteRemove.c @@ -58,7 +58,7 @@ RemoveRewriteRuleById(Oid ruleOid) ObjectIdGetDatum(ruleOid)); rcscan = systable_beginscan(RewriteRelation, RewriteOidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); tuple = systable_getnext(rcscan); diff --git a/src/backend/rewrite/rewriteSupport.c b/src/backend/rewrite/rewriteSupport.c index f481c531ac..a68734257c 100644 --- a/src/backend/rewrite/rewriteSupport.c +++ b/src/backend/rewrite/rewriteSupport.c @@ -143,7 +143,7 @@ get_rewrite_oid_without_relid(const char *rulename, CStringGetDatum(rulename)); RewriteRelation = heap_open(RewriteRelationId, AccessShareLock); - scanDesc = heap_beginscan(RewriteRelation, SnapshotNow, 1, &scanKeyData); + scanDesc = heap_beginscan_catalog(RewriteRelation, 1, &scanKeyData); htup = heap_getnext(scanDesc, ForwardScanDirection); if (!HeapTupleIsValid(htup)) diff --git a/src/backend/storage/large_object/inv_api.c b/src/backend/storage/large_object/inv_api.c index b98110cd99..fb9157131f 100644 --- a/src/backend/storage/large_object/inv_api.c +++ b/src/backend/storage/large_object/inv_api.c @@ -250,7 +250,7 @@ inv_open(Oid lobjId, int flags, MemoryContext mcxt) if (flags & INV_WRITE) { - retval->snapshot = SnapshotNow; + retval->snapshot = NULL; /* instantaneous MVCC snapshot */ retval->flags = IFS_WRLOCK | IFS_RDLOCK; } else if (flags & INV_READ) @@ -270,7 +270,7 @@ inv_open(Oid lobjId, int flags, MemoryContext mcxt) errmsg("invalid flags for opening a large object: %d", flags))); - /* Can't use LargeObjectExists here because it always uses SnapshotNow */ + /* Can't use LargeObjectExists here because we need to specify snapshot */ if (!myLargeObjectExists(lobjId, retval->snapshot)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), @@ -288,9 +288,8 @@ inv_close(LargeObjectDesc *obj_desc) { Assert(PointerIsValid(obj_desc)); - if (obj_desc->snapshot != SnapshotNow) - UnregisterSnapshotFromOwner(obj_desc->snapshot, - TopTransactionResourceOwner); + UnregisterSnapshotFromOwner(obj_desc->snapshot, + TopTransactionResourceOwner); pfree(obj_desc); } diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index 4c4e1ed820..5ddeffe482 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -697,7 +697,7 @@ pg_size_pretty_numeric(PG_FUNCTION_ARGS) * That leads to a couple of choices. We work from the pg_class row alone * rather than actually opening each relation, for efficiency. We don't * fail if we can't find the relation --- some rows might be visible in - * the query's MVCC snapshot but already dead according to SnapshotNow. + * the query's MVCC snapshot even though the relations have been dropped. * (Note: we could avoid using the catcache, but there's little point * because the relation mapper also works "in the now".) We also don't * fail if the relation doesn't have storage. In all these cases it diff --git a/src/backend/utils/adt/regproc.c b/src/backend/utils/adt/regproc.c index 0d1ff61bf9..fa61f5a512 100644 --- a/src/backend/utils/adt/regproc.c +++ b/src/backend/utils/adt/regproc.c @@ -104,7 +104,7 @@ regprocin(PG_FUNCTION_ARGS) hdesc = heap_open(ProcedureRelationId, AccessShareLock); sysscan = systable_beginscan(hdesc, ProcedureNameArgsNspIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); while (HeapTupleIsValid(tuple = systable_getnext(sysscan))) { @@ -472,7 +472,7 @@ regoperin(PG_FUNCTION_ARGS) hdesc = heap_open(OperatorRelationId, AccessShareLock); sysscan = systable_beginscan(hdesc, OperatorNameNspIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); while (HeapTupleIsValid(tuple = systable_getnext(sysscan))) { @@ -843,7 +843,7 @@ regclassin(PG_FUNCTION_ARGS) hdesc = heap_open(RelationRelationId, AccessShareLock); sysscan = systable_beginscan(hdesc, ClassNameNspIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); if (HeapTupleIsValid(tuple = systable_getnext(sysscan))) result = HeapTupleGetOid(tuple); @@ -1007,7 +1007,7 @@ regtypein(PG_FUNCTION_ARGS) hdesc = heap_open(TypeRelationId, AccessShareLock); sysscan = systable_beginscan(hdesc, TypeNameNspIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); if (HeapTupleIsValid(tuple = systable_getnext(sysscan))) result = HeapTupleGetOid(tuple); diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index a1ed7813f2..cf9ce3f3a1 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -704,7 +704,7 @@ pg_get_triggerdef_worker(Oid trigid, bool pretty) ObjectIdGetDatum(trigid)); tgscan = systable_beginscan(tgrel, TriggerOidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); ht_trig = systable_getnext(tgscan); @@ -1796,7 +1796,7 @@ pg_get_serial_sequence(PG_FUNCTION_ARGS) Int32GetDatum(attnum)); scan = systable_beginscan(depRel, DependReferenceIndexId, true, - SnapshotNow, 3, key); + NULL, 3, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index cc91406582..d12da7615a 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -1182,7 +1182,7 @@ SearchCatCache(CatCache *cache, scandesc = systable_beginscan(relation, cache->cc_indexoid, IndexScanOK(cache, cur_skey), - SnapshotNow, + NULL, cache->cc_nkeys, cur_skey); @@ -1461,7 +1461,7 @@ SearchCatCacheList(CatCache *cache, scandesc = systable_beginscan(relation, cache->cc_indexoid, IndexScanOK(cache, cur_skey), - SnapshotNow, + NULL, nkeys, cur_skey); diff --git a/src/backend/utils/cache/evtcache.c b/src/backend/utils/cache/evtcache.c index 2180f2abcc..c2242c47e0 100644 --- a/src/backend/utils/cache/evtcache.c +++ b/src/backend/utils/cache/evtcache.c @@ -129,13 +129,11 @@ BuildEventTriggerCache(void) HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); /* - * Prepare to scan pg_event_trigger in name order. We use an MVCC - * snapshot to avoid getting inconsistent results if the table is being - * concurrently updated. + * Prepare to scan pg_event_trigger in name order. */ rel = relation_open(EventTriggerRelationId, AccessShareLock); irel = index_open(EventTriggerNameIndexId, AccessShareLock); - scan = systable_beginscan_ordered(rel, irel, GetLatestSnapshot(), 0, NULL); + scan = systable_beginscan_ordered(rel, irel, NULL, 0, NULL); /* * Build a cache item for each pg_event_trigger tuple, and append each one diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index e0dc126707..3356d0fe1e 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -9,8 +9,8 @@ * consider that it is *still valid* so long as we are in the same command, * ie, until the next CommandCounterIncrement() or transaction commit. * (See utils/time/tqual.c, and note that system catalogs are generally - * scanned under SnapshotNow rules by the system, or plain user snapshots - * for user queries.) At the command boundary, the old tuple stops + * scanned under the most current snapshot available, rather than the + * transaction snapshot.) At the command boundary, the old tuple stops * being valid and the new version, if any, becomes valid. Therefore, * we cannot simply flush a tuple from the system caches during heap_update() * or heap_delete(). The tuple is still good at that point; what's more, @@ -106,6 +106,7 @@ #include "utils/memutils.h" #include "utils/rel.h" #include "utils/relmapper.h" +#include "utils/snapmgr.h" #include "utils/syscache.h" @@ -372,6 +373,29 @@ AddRelcacheInvalidationMessage(InvalidationListHeader *hdr, AddInvalidationMessage(&hdr->rclist, &msg); } +/* + * Add a snapshot inval entry + */ +static void +AddSnapshotInvalidationMessage(InvalidationListHeader *hdr, + Oid dbId, Oid relId) +{ + SharedInvalidationMessage msg; + + /* Don't add a duplicate item */ + /* We assume dbId need not be checked because it will never change */ + ProcessMessageList(hdr->rclist, + if (msg->sn.id == SHAREDINVALSNAPSHOT_ID && + msg->sn.relId == relId) + return); + + /* OK, add the item */ + msg.sn.id = SHAREDINVALSNAPSHOT_ID; + msg.sn.dbId = dbId; + msg.sn.relId = relId; + AddInvalidationMessage(&hdr->rclist, &msg); +} + /* * Append one list of invalidation messages to another, resetting * the source list to empty. @@ -468,6 +492,19 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId) transInvalInfo->RelcacheInitFileInval = true; } +/* + * RegisterSnapshotInvalidation + * + * Register a invalidation event for MVCC scans against a given catalog. + * Only needed for catalogs that don't have catcaches. + */ +static void +RegisterSnapshotInvalidation(Oid dbId, Oid relId) +{ + AddSnapshotInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, + dbId, relId); +} + /* * LocalExecuteInvalidationMessage * @@ -482,6 +519,8 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) { if (msg->cc.dbId == MyDatabaseId || msg->cc.dbId == InvalidOid) { + InvalidateCatalogSnapshot(); + CatalogCacheIdInvalidate(msg->cc.id, msg->cc.hashValue); CallSyscacheCallbacks(msg->cc.id, msg->cc.hashValue); @@ -491,6 +530,8 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) { if (msg->cat.dbId == MyDatabaseId || msg->cat.dbId == InvalidOid) { + InvalidateCatalogSnapshot(); + CatalogCacheFlushCatalog(msg->cat.catId); /* CatalogCacheFlushCatalog calls CallSyscacheCallbacks as needed */ @@ -532,6 +573,14 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) else if (msg->rm.dbId == MyDatabaseId) RelationMapInvalidate(false); } + else if (msg->id == SHAREDINVALSNAPSHOT_ID) + { + /* We only care about our own database and shared catalogs */ + if (msg->rm.dbId == InvalidOid) + InvalidateCatalogSnapshot(); + else if (msg->rm.dbId == MyDatabaseId) + InvalidateCatalogSnapshot(); + } else elog(FATAL, "unrecognized SI message ID: %d", msg->id); } @@ -552,6 +601,7 @@ InvalidateSystemCaches(void) { int i; + InvalidateCatalogSnapshot(); ResetCatalogCaches(); RelationCacheInvalidate(); /* gets smgr and relmap too */ @@ -1006,8 +1056,15 @@ CacheInvalidateHeapTuple(Relation relation, /* * First let the catcache do its thing */ - PrepareToInvalidateCacheTuple(relation, tuple, newtuple, - RegisterCatcacheInvalidation); + tupleRelId = RelationGetRelid(relation); + if (RelationInvalidatesSnapshotsOnly(tupleRelId)) + { + databaseId = IsSharedRelation(tupleRelId) ? InvalidOid : MyDatabaseId; + RegisterSnapshotInvalidation(databaseId, tupleRelId); + } + else + PrepareToInvalidateCacheTuple(relation, tuple, newtuple, + RegisterCatcacheInvalidation); /* * Now, is this tuple one of the primary definers of a relcache entry? @@ -1015,8 +1072,6 @@ CacheInvalidateHeapTuple(Relation relation, * Note we ignore newtuple here; we assume an update cannot move a tuple * from being part of one relcache entry to being part of another. */ - tupleRelId = RelationGetRelid(relation); - if (tupleRelId == RelationRelationId) { Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple); diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index f114038588..66fb63b9e4 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -265,8 +265,10 @@ static void unlink_initfile(const char *initfilename); * This is used by RelationBuildDesc to find a pg_class * tuple matching targetRelId. The caller must hold at least * AccessShareLock on the target relid to prevent concurrent-update - * scenarios --- else our SnapshotNow scan might fail to find any - * version that it thinks is live. + * scenarios; it isn't guaranteed that all scans used to build the + * relcache entry will use the same snapshot. If, for example, + * an attribute were to be added after scanning pg_class and before + * scanning pg_attribute, relnatts wouldn't match. * * NB: the returned tuple has been copied into palloc'd storage * and must eventually be freed with heap_freetuple. @@ -305,7 +307,7 @@ ScanPgRelation(Oid targetRelId, bool indexOK) pg_class_desc = heap_open(RelationRelationId, AccessShareLock); pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId, indexOK && criticalRelcachesBuilt, - SnapshotNow, + NULL, 1, key); pg_class_tuple = systable_getnext(pg_class_scan); @@ -480,7 +482,7 @@ RelationBuildTupleDesc(Relation relation) pg_attribute_scan = systable_beginscan(pg_attribute_desc, AttributeRelidNumIndexId, criticalRelcachesBuilt, - SnapshotNow, + NULL, 2, skey); /* @@ -663,7 +665,7 @@ RelationBuildRuleLock(Relation relation) rewrite_tupdesc = RelationGetDescr(rewrite_desc); rewrite_scan = systable_beginscan(rewrite_desc, RewriteRelRulenameIndexId, - true, SnapshotNow, + true, NULL, 1, &key); while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan))) @@ -1313,7 +1315,7 @@ LookupOpclassInfo(Oid operatorClassOid, ObjectIdGetDatum(operatorClassOid)); rel = heap_open(OperatorClassRelationId, AccessShareLock); scan = systable_beginscan(rel, OpclassOidIndexId, indexOK, - SnapshotNow, 1, skey); + NULL, 1, skey); if (HeapTupleIsValid(htup = systable_getnext(scan))) { @@ -1348,7 +1350,7 @@ LookupOpclassInfo(Oid operatorClassOid, ObjectIdGetDatum(opcentry->opcintype)); rel = heap_open(AccessMethodProcedureRelationId, AccessShareLock); scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK, - SnapshotNow, 3, skey); + NULL, 3, skey); while (HeapTupleIsValid(htup = systable_getnext(scan))) { @@ -3317,7 +3319,7 @@ AttrDefaultFetch(Relation relation) adrel = heap_open(AttrDefaultRelationId, AccessShareLock); adscan = systable_beginscan(adrel, AttrDefaultIndexId, true, - SnapshotNow, 1, &skey); + NULL, 1, &skey); found = 0; while (HeapTupleIsValid(htup = systable_getnext(adscan))) @@ -3384,7 +3386,7 @@ CheckConstraintFetch(Relation relation) conrel = heap_open(ConstraintRelationId, AccessShareLock); conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); while (HeapTupleIsValid(htup = systable_getnext(conscan))) { @@ -3487,7 +3489,7 @@ RelationGetIndexList(Relation relation) indrel = heap_open(IndexRelationId, AccessShareLock); indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true, - SnapshotNow, 1, &skey); + NULL, 1, &skey); while (HeapTupleIsValid(htup = systable_getnext(indscan))) { @@ -3938,7 +3940,7 @@ RelationGetExclusionInfo(Relation indexRelation, conrel = heap_open(ConstraintRelationId, AccessShareLock); conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true, - SnapshotNow, 1, skey); + NULL, 1, skey); found = false; while (HeapTupleIsValid(htup = systable_getnext(conscan))) diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index ecb0f96d46..1ff2f2b5af 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -33,7 +33,10 @@ #include "catalog/pg_constraint.h" #include "catalog/pg_conversion.h" #include "catalog/pg_database.h" +#include "catalog/pg_db_role_setting.h" #include "catalog/pg_default_acl.h" +#include "catalog/pg_depend.h" +#include "catalog/pg_description.h" #include "catalog/pg_enum.h" #include "catalog/pg_event_trigger.h" #include "catalog/pg_foreign_data_wrapper.h" @@ -47,6 +50,10 @@ #include "catalog/pg_proc.h" #include "catalog/pg_range.h" #include "catalog/pg_rewrite.h" +#include "catalog/pg_seclabel.h" +#include "catalog/pg_shdepend.h" +#include "catalog/pg_shdescription.h" +#include "catalog/pg_shseclabel.h" #include "catalog/pg_statistic.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_ts_config.h" @@ -796,6 +803,10 @@ static CatCache *SysCache[ static int SysCacheSize = lengthof(cacheinfo); static bool CacheInitialized = false; +static Oid SysCacheRelationOid[lengthof(cacheinfo)]; +static int SysCacheRelationOidSize; + +static int oid_compare(const void *a, const void *b); /* * InitCatalogCache - initialize the caches @@ -809,6 +820,8 @@ void InitCatalogCache(void) { int cacheId; + int i, + j = 0; Assert(!CacheInitialized); @@ -825,11 +838,23 @@ InitCatalogCache(void) if (!PointerIsValid(SysCache[cacheId])) elog(ERROR, "could not initialize cache %u (%d)", cacheinfo[cacheId].reloid, cacheId); + SysCacheRelationOid[SysCacheRelationOidSize++] = + cacheinfo[cacheId].reloid; + /* see comments for RelationInvalidatesSnapshotsOnly */ + Assert(!RelationInvalidatesSnapshotsOnly(cacheinfo[cacheId].reloid)); } + + /* Sort and dedup OIDs. */ + pg_qsort(SysCacheRelationOid, SysCacheRelationOidSize, + sizeof(Oid), oid_compare); + for (i = 1; i < SysCacheRelationOidSize; ++i) + if (SysCacheRelationOid[i] != SysCacheRelationOid[j]) + SysCacheRelationOid[++j] = SysCacheRelationOid[i]; + SysCacheRelationOidSize = j + 1; + CacheInitialized = true; } - /* * InitCatalogCachePhase2 - finish initializing the caches * @@ -1113,3 +1138,73 @@ SearchSysCacheList(int cacheId, int nkeys, return SearchCatCacheList(SysCache[cacheId], nkeys, key1, key2, key3, key4); } + +/* + * Certain relations that do not have system caches send snapshot invalidation + * messages in lieu of catcache messages. This is for the benefit of + * GetCatalogSnapshot(), which can then reuse its existing MVCC snapshot + * for scanning one of those catalogs, rather than taking a new one, if no + * invalidation has been received. + * + * Relations that have syscaches need not (and must not) be listed here. The + * catcache invalidation messages will also flush the snapshot. If you add a + * syscache for one of these relations, remove it from this list. + */ +bool +RelationInvalidatesSnapshotsOnly(Oid relid) +{ + switch (relid) + { + case DbRoleSettingRelationId: + case DependRelationId: + case SharedDependRelationId: + case DescriptionRelationId: + case SharedDescriptionRelationId: + case SecLabelRelationId: + case SharedSecLabelRelationId: + return true; + default: + break; + } + + return false; +} + +/* + * Test whether a relation has a system cache. + */ +bool +RelationHasSysCache(Oid relid) +{ + int low = 0, + high = SysCacheRelationOidSize - 1; + + while (low <= high) + { + int middle = low + (high - low) / 2; + + if (SysCacheRelationOid[middle] == relid) + return true; + if (SysCacheRelationOid[middle] < relid) + low = middle + 1; + else + high = middle - 1; + } + + return false; +} + + +/* + * OID comparator for pg_qsort + */ +static int +oid_compare(const void *a, const void *b) +{ + Oid oa = *((Oid *) a); + Oid ob = *((Oid *) b); + + if (oa == ob) + return 0; + return (oa > ob) ? 1 : -1; +} diff --git a/src/backend/utils/cache/ts_cache.c b/src/backend/utils/cache/ts_cache.c index 65a8ad7be1..4e79247218 100644 --- a/src/backend/utils/cache/ts_cache.c +++ b/src/backend/utils/cache/ts_cache.c @@ -484,7 +484,7 @@ lookup_ts_config_cache(Oid cfgId) maprel = heap_open(TSConfigMapRelationId, AccessShareLock); mapidx = index_open(TSConfigMapIndexId, AccessShareLock); mapscan = systable_beginscan_ordered(maprel, mapidx, - SnapshotNow, 1, &mapskey); + NULL, 1, &mapskey); while ((maptup = systable_getnext_ordered(mapscan, ForwardScanDirection)) != NULL) { diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c index 2fa6d33535..04cb74c485 100644 --- a/src/backend/utils/cache/typcache.c +++ b/src/backend/utils/cache/typcache.c @@ -1082,12 +1082,7 @@ load_enum_cache_data(TypeCacheEntry *tcache) items = (EnumItem *) palloc(sizeof(EnumItem) * maxitems); numitems = 0; - /* - * Scan pg_enum for the members of the target enum type. We use a current - * MVCC snapshot, *not* SnapshotNow, so that we see a consistent set of - * rows even if someone commits a renumbering of the enum meanwhile. See - * comments for RenumberEnumType in catalog/pg_enum.c for more info. - */ + /* Scan pg_enum for the members of the target enum type. */ ScanKeyInit(&skey, Anum_pg_enum_enumtypid, BTEqualStrategyNumber, F_OIDEQ, @@ -1096,7 +1091,7 @@ load_enum_cache_data(TypeCacheEntry *tcache) enum_rel = heap_open(EnumRelationId, AccessShareLock); enum_scan = systable_beginscan(enum_rel, EnumTypIdLabelIndexId, - true, GetLatestSnapshot(), + true, NULL, 1, &skey); while (HeapTupleIsValid(enum_tuple = systable_getnext(enum_scan))) diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index e0ea2e9ecf..127f9273e8 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -111,7 +111,7 @@ GetDatabaseTuple(const char *dbname) relation = heap_open(DatabaseRelationId, AccessShareLock); scan = systable_beginscan(relation, DatabaseNameIndexId, criticalSharedRelcachesBuilt, - SnapshotNow, + NULL, 1, key); tuple = systable_getnext(scan); @@ -154,7 +154,7 @@ GetDatabaseTupleByOid(Oid dboid) relation = heap_open(DatabaseRelationId, AccessShareLock); scan = systable_beginscan(relation, DatabaseOidIndexId, criticalSharedRelcachesBuilt, - SnapshotNow, + NULL, 1, key); tuple = systable_getnext(scan); @@ -997,18 +997,23 @@ static void process_settings(Oid databaseid, Oid roleid) { Relation relsetting; + Snapshot snapshot; if (!IsUnderPostmaster) return; relsetting = heap_open(DbRoleSettingRelationId, AccessShareLock); + /* read all the settings under the same snapsot for efficiency */ + snapshot = RegisterSnapshot(GetCatalogSnapshot(DbRoleSettingRelationId)); + /* Later settings are ignored if set earlier. */ - ApplySetting(databaseid, roleid, relsetting, PGC_S_DATABASE_USER); - ApplySetting(InvalidOid, roleid, relsetting, PGC_S_USER); - ApplySetting(databaseid, InvalidOid, relsetting, PGC_S_DATABASE); - ApplySetting(InvalidOid, InvalidOid, relsetting, PGC_S_GLOBAL); + ApplySetting(snapshot, databaseid, roleid, relsetting, PGC_S_DATABASE_USER); + ApplySetting(snapshot, InvalidOid, roleid, relsetting, PGC_S_USER); + ApplySetting(snapshot, databaseid, InvalidOid, relsetting, PGC_S_DATABASE); + ApplySetting(snapshot, InvalidOid, InvalidOid, relsetting, PGC_S_GLOBAL); + UnregisterSnapshot(snapshot); heap_close(relsetting, AccessShareLock); } @@ -1078,7 +1083,7 @@ ThereIsAtLeastOneRole(void) pg_authid_rel = heap_open(AuthIdRelationId, AccessShareLock); - scan = heap_beginscan(pg_authid_rel, SnapshotNow, 0, NULL); + scan = heap_beginscan_catalog(pg_authid_rel, 0, NULL); result = (heap_getnext(scan, ForwardScanDirection) != NULL); heap_endscan(scan); diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index e739d2d192..584d70c805 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -46,10 +46,12 @@ #include "storage/predicate.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "storage/sinval.h" #include "utils/builtins.h" #include "utils/memutils.h" #include "utils/resowner_private.h" #include "utils/snapmgr.h" +#include "utils/syscache.h" #include "utils/tqual.h" @@ -58,17 +60,26 @@ * mode, and to the latest one taken in a read-committed transaction. * SecondarySnapshot is a snapshot that's always up-to-date as of the current * instant, even in transaction-snapshot mode. It should only be used for - * special-purpose code (say, RI checking.) + * special-purpose code (say, RI checking.) CatalogSnapshot points to an + * MVCC snapshot intended to be used for catalog scans; we must refresh it + * whenever a system catalog change occurs. * * These SnapshotData structs are static to simplify memory allocation * (see the hack in GetSnapshotData to avoid repeated malloc/free). */ static SnapshotData CurrentSnapshotData = {HeapTupleSatisfiesMVCC}; static SnapshotData SecondarySnapshotData = {HeapTupleSatisfiesMVCC}; +static SnapshotData CatalogSnapshotData = {HeapTupleSatisfiesMVCC}; /* Pointers to valid snapshots */ static Snapshot CurrentSnapshot = NULL; static Snapshot SecondarySnapshot = NULL; +static Snapshot CatalogSnapshot = NULL; + +/* + * Staleness detection for CatalogSnapshot. + */ +static bool CatalogSnapshotStale = true; /* * These are updated by GetSnapshotData. We initialize them this way @@ -177,6 +188,9 @@ GetTransactionSnapshot(void) else CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); + /* Don't allow catalog snapshot to be older than xact snapshot. */ + CatalogSnapshotStale = true; + FirstSnapshotSet = true; return CurrentSnapshot; } @@ -184,6 +198,9 @@ GetTransactionSnapshot(void) if (IsolationUsesXactSnapshot()) return CurrentSnapshot; + /* Don't allow catalog snapshot to be older than xact snapshot. */ + CatalogSnapshotStale = true; + CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); return CurrentSnapshot; @@ -206,6 +223,54 @@ GetLatestSnapshot(void) return SecondarySnapshot; } +/* + * GetCatalogSnapshot + * Get a snapshot that is sufficiently up-to-date for scan of the + * system catalog with the specified OID. + */ +Snapshot +GetCatalogSnapshot(Oid relid) +{ + /* + * If the caller is trying to scan a relation that has no syscache, + * no catcache invalidations will be sent when it is updated. For a + * a few key relations, snapshot invalidations are sent instead. If + * we're trying to scan a relation for which neither catcache nor + * snapshot invalidations are sent, we must refresh the snapshot every + * time. + */ + if (!CatalogSnapshotStale && !RelationInvalidatesSnapshotsOnly(relid) && + !RelationHasSysCache(relid)) + CatalogSnapshotStale = true; + + if (CatalogSnapshotStale) + { + /* Get new snapshot. */ + CatalogSnapshot = GetSnapshotData(&CatalogSnapshotData); + + /* + * Mark new snapshost as valid. We must do this last, in case an + * ERROR occurs inside GetSnapshotData(). + */ + CatalogSnapshotStale = false; + } + + return CatalogSnapshot; +} + +/* + * Mark the current catalog snapshot as invalid. We could change this API + * to allow the caller to provide more fine-grained invalidation details, so + * that a change to relation A wouldn't prevent us from using our cached + * snapshot to scan relation B, but so far there's no evidence that the CPU + * cycles we spent tracking such fine details would be well-spent. + */ +void +InvalidateCatalogSnapshot() +{ + CatalogSnapshotStale = true; +} + /* * SnapshotSetCommandId * Propagate CommandCounterIncrement into the static snapshots, if set diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index becc82be91..9ee9ea2baf 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -14,13 +14,13 @@ * Note that pg_dump runs in a transaction-snapshot mode transaction, * so it sees a consistent snapshot of the database including system * catalogs. However, it relies in part on various specialized backend - * functions like pg_get_indexdef(), and those things tend to run on - * SnapshotNow time, ie they look at the currently committed state. So - * it is possible to get 'cache lookup failed' error if someone - * performs DDL changes while a dump is happening. The window for this - * sort of thing is from the acquisition of the transaction snapshot to - * getSchemaData() (when pg_dump acquires AccessShareLock on every - * table it intends to dump). It isn't very large, but it can happen. + * functions like pg_get_indexdef(), and those things tend to look at + * the currently committed state. So it is possible to get 'cache + * lookup failed' error if someone performs DDL changes while a dump is + * happening. The window for this sort of thing is from the acquisition + * of the transaction snapshot to getSchemaData() (when pg_dump acquires + * AccessShareLock on every table it intends to dump). It isn't very large, + * but it can happen. * * http://archives.postgresql.org/pgsql-bugs/2010-02/msg00187.php * diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index baa8c50add..0d403985e2 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -105,6 +105,8 @@ typedef struct HeapScanDescData *HeapScanDesc; extern HeapScanDesc heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key); +extern HeapScanDesc heap_beginscan_catalog(Relation relation, int nkeys, + ScanKey key); extern HeapScanDesc heap_beginscan_strat(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, bool allow_strat, bool allow_sync); diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 5b5802820d..3a86ca4230 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -32,6 +32,7 @@ typedef struct HeapScanDescData bool rs_pageatatime; /* verify visibility page-at-a-time? */ bool rs_allow_strat; /* allow or disallow use of access strategy */ bool rs_allow_sync; /* allow or disallow use of syncscan */ + bool rs_temp_snap; /* unregister snapshot at scan end? */ /* state set up at initscan time */ BlockNumber rs_nblocks; /* number of blocks to scan */ @@ -101,6 +102,7 @@ typedef struct SysScanDescData Relation irel; /* NULL if doing heap scan */ HeapScanDesc scan; /* only valid in heap-scan case */ IndexScanDesc iscan; /* only valid in index-scan case */ + Snapshot snapshot; /* snapshot to unregister at end of scan */ } SysScanDescData; #endif /* RELSCAN_H */ diff --git a/src/include/catalog/objectaccess.h b/src/include/catalog/objectaccess.h index 8394401fc5..c8a95a6dbb 100644 --- a/src/include/catalog/objectaccess.h +++ b/src/include/catalog/objectaccess.h @@ -24,8 +24,8 @@ * * OAT_POST_ALTER should be invoked just after the object is altered, * but before the command counter is incremented. An extension using the - * hook can use SnapshotNow and SnapshotSelf to get the old and new - * versions of the tuple. + * hook can use a current MVCC snapshot to get the old version of the tuple, + * and can use SnapshotSelf to get the new version of the tuple. * * OAT_NAMESPACE_SEARCH should be invoked prior to object name lookup under * a particular namespace. This event is equivalent to usage permission diff --git a/src/include/catalog/pg_db_role_setting.h b/src/include/catalog/pg_db_role_setting.h index 070cbc81ac..649f5c4281 100644 --- a/src/include/catalog/pg_db_role_setting.h +++ b/src/include/catalog/pg_db_role_setting.h @@ -62,7 +62,7 @@ typedef FormData_pg_db_role_setting *Form_pg_db_role_setting; */ extern void AlterSetting(Oid databaseid, Oid roleid, VariableSetStmt *setstmt); extern void DropSetting(Oid databaseid, Oid roleid); -extern void ApplySetting(Oid databaseid, Oid roleid, Relation relsetting, - GucSource source); +extern void ApplySetting(Snapshot snapshot, Oid databaseid, Oid roleid, + Relation relsetting, GucSource source); #endif /* PG_DB_ROLE_SETTING_H */ diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h index 9e833ca0f8..7e70e57a7e 100644 --- a/src/include/storage/sinval.h +++ b/src/include/storage/sinval.h @@ -24,6 +24,7 @@ * * invalidate a relcache entry for a specific logical relation * * invalidate an smgr cache entry for a specific physical relation * * invalidate the mapped-relation mapping for a given database + * * invalidate any saved snapshot that might be used to scan a given relation * More types could be added if needed. The message type is identified by * the first "int8" field of the message struct. Zero or positive means a * specific-catcache inval message (and also serves as the catcache ID field). @@ -43,11 +44,11 @@ * catcache inval messages must be generated for each of its caches, since * the hash keys will generally be different. * - * Catcache and relcache invalidations are transactional, and so are sent - * to other backends upon commit. Internally to the generating backend, - * they are also processed at CommandCounterIncrement so that later commands - * in the same transaction see the new state. The generating backend also - * has to process them at abort, to flush out any cache state it's loaded + * Catcache, relcache, and snapshot invalidations are transactional, and so + * are sent to other backends upon commit. Internally to the generating + * backend, they are also processed at CommandCounterIncrement so that later + * commands in the same transaction see the new state. The generating backend + * also has to process them at abort, to flush out any cache state it's loaded * from no-longer-valid entries. * * smgr and relation mapping invalidations are non-transactional: they are @@ -98,6 +99,15 @@ typedef struct Oid dbId; /* database ID, or 0 for shared catalogs */ } SharedInvalRelmapMsg; +#define SHAREDINVALSNAPSHOT_ID (-5) + +typedef struct +{ + int8 id; /* type field --- must be first */ + Oid dbId; /* database ID, or 0 if a shared relation */ + Oid relId; /* relation ID */ +} SharedInvalSnapshotMsg; + typedef union { int8 id; /* type field --- must be first */ @@ -106,6 +116,7 @@ typedef union SharedInvalRelcacheMsg rc; SharedInvalSmgrMsg sm; SharedInvalRelmapMsg rm; + SharedInvalSnapshotMsg sn; } SharedInvalidationMessage; diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h index bfbd8dd8ff..81a286c398 100644 --- a/src/include/utils/snapmgr.h +++ b/src/include/utils/snapmgr.h @@ -28,6 +28,9 @@ extern Snapshot GetTransactionSnapshot(void); extern Snapshot GetLatestSnapshot(void); extern void SnapshotSetCommandId(CommandId curcid); +extern Snapshot GetCatalogSnapshot(Oid relid); +extern void InvalidateCatalogSnapshot(void); + extern void PushActiveSnapshot(Snapshot snapshot); extern void PushCopiedSnapshot(Snapshot snapshot); extern void UpdateActiveSnapshotCommandId(void); diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h index d1d8abe0a9..e41b3d2e38 100644 --- a/src/include/utils/syscache.h +++ b/src/include/utils/syscache.h @@ -125,6 +125,9 @@ struct catclist; extern struct catclist *SearchSysCacheList(int cacheId, int nkeys, Datum key1, Datum key2, Datum key3, Datum key4); +extern bool RelationInvalidatesSnapshotsOnly(Oid); +extern bool RelationHasSysCache(Oid); + /* * The use of the macros below rather than direct calls to the corresponding * functions is encouraged, as it insulates the caller from changes in the