*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.298 2010/01/12 02:42:52 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.299 2010/01/12 18:12:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/* non-export function prototypes */
+static void RelationDestroyRelation(Relation relation);
static void RelationClearRelation(Relation relation, bool rebuild);
static void RelationReloadIndexInfo(Relation relation);
int natts, const FormData_pg_attribute *attrs);
static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK);
-static Relation AllocateRelationDesc(Relation relation, Form_pg_class relp);
+static Relation AllocateRelationDesc(Form_pg_class relp);
static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
static void RelationBuildTupleDesc(Relation relation);
-static Relation RelationBuildDesc(Oid targetRelId, Relation oldrelation);
+static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
static void RelationInitPhysicalAddr(Relation relation);
static void load_critical_index(Oid indexoid);
static TupleDesc GetPgClassDescriptor(void);
* AllocateRelationDesc
*
* This is used to allocate memory for a new relation descriptor
- * and initialize the rd_rel field.
- *
- * If 'relation' is NULL, allocate a new RelationData object.
- * If not, reuse the given object (that path is taken only when
- * we have to rebuild a relcache entry during RelationClearRelation).
+ * and initialize the rd_rel field from the given pg_class tuple.
*/
static Relation
-AllocateRelationDesc(Relation relation, Form_pg_class relp)
+AllocateRelationDesc(Form_pg_class relp)
{
+ Relation relation;
MemoryContext oldcxt;
Form_pg_class relationForm;
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
/*
- * allocate space for new relation descriptor, if needed
+ * allocate and zero space for new relation descriptor
*/
- if (relation == NULL)
- relation = (Relation) palloc(sizeof(RelationData));
+ relation = (Relation) palloc0(sizeof(RelationData));
/*
- * clear all fields of reldesc
+ * clear fields of reldesc that should initialize to something non-zero
*/
- MemSet(relation, 0, sizeof(RelationData));
relation->rd_targblock = InvalidBlockNumber;
relation->rd_fsm_nblocks = InvalidBlockNumber;
relation->rd_vm_nblocks = InvalidBlockNumber;
{
case RELKIND_RELATION:
case RELKIND_TOASTVALUE:
- case RELKIND_UNCATALOGED:
case RELKIND_INDEX:
break;
default:
relation->rd_options = MemoryContextAlloc(CacheMemoryContext,
VARSIZE(options));
memcpy(relation->rd_options, options, VARSIZE(options));
+ pfree(options);
}
}
/*
* RelationBuildDesc
*
- * Build a relation descriptor --- either a new one, or by
- * recycling the given old relation object. The latter case
- * supports rebuilding a relcache entry without invalidating
- * pointers to it. The caller must hold at least
+ * Build a relation descriptor. The caller must hold at least
* AccessShareLock on the target relid.
*
+ * The new descriptor is inserted into the hash table if insertIt is true.
+ *
* Returns NULL if no pg_class row could be found for the given relid
* (suggesting we are trying to access a just-deleted relation).
* Any other error is reported via elog.
*/
static Relation
-RelationBuildDesc(Oid targetRelId, Relation oldrelation)
+RelationBuildDesc(Oid targetRelId, bool insertIt)
{
Relation relation;
Oid relid;
HeapTuple pg_class_tuple;
Form_pg_class relp;
- MemoryContext oldcxt;
/*
* find the tuple in pg_class corresponding to the given relation id
* allocate storage for the relation descriptor, and copy pg_class_tuple
* to relation->rd_rel.
*/
- relation = AllocateRelationDesc(oldrelation, relp);
+ relation = AllocateRelationDesc(relp);
/*
* initialize the relation's relation id (relation->rd_id)
heap_freetuple(pg_class_tuple);
/*
- * Insert newly created relation into relcache hash tables.
+ * Insert newly created relation into relcache hash table, if requested.
*/
- oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
- RelationCacheInsert(relation);
- MemoryContextSwitchTo(oldcxt);
+ if (insertIt)
+ RelationCacheInsert(relation);
/* It's fully valid */
relation->rd_isvalid = true;
if (RelationIsValid(rd))
{
RelationIncrementReferenceCount(rd);
- /* revalidate nailed index if necessary */
+ /* revalidate cache entry if necessary */
if (!rd->rd_isvalid)
- RelationReloadIndexInfo(rd);
+ {
+ /*
+ * Indexes only have a limited number of possible schema changes,
+ * and we don't want to use the full-blown procedure because it's
+ * a headache for indexes that reload itself depends on.
+ */
+ if (rd->rd_rel->relkind == RELKIND_INDEX)
+ RelationReloadIndexInfo(rd);
+ else
+ RelationClearRelation(rd, true);
+ }
return rd;
}
* no reldesc in the cache, so have RelationBuildDesc() build one and add
* it.
*/
- rd = RelationBuildDesc(relationId, NULL);
+ rd = RelationBuildDesc(relationId, true);
if (RelationIsValid(rd))
RelationIncrementReferenceCount(rd);
return rd;
relation->rd_isvalid = true;
}
+/*
+ * RelationDestroyRelation
+ *
+ * Physically delete a relation cache entry and all subsidiary data.
+ * Caller must already have unhooked the entry from the hash table.
+ */
+static void
+RelationDestroyRelation(Relation relation)
+{
+ Assert(RelationHasReferenceCountZero(relation));
+
+ /*
+ * Make sure smgr and lower levels close the relation's files, if they
+ * weren't closed already. (This was probably done by caller, but let's
+ * just be real sure.)
+ */
+ RelationCloseSmgr(relation);
+
+ /*
+ * Free all the subsidiary data structures of the relcache entry,
+ * then the entry itself.
+ */
+ if (relation->rd_rel)
+ pfree(relation->rd_rel);
+ /* can't use DecrTupleDescRefCount here */
+ Assert(relation->rd_att->tdrefcount > 0);
+ if (--relation->rd_att->tdrefcount == 0)
+ FreeTupleDesc(relation->rd_att);
+ list_free(relation->rd_indexlist);
+ bms_free(relation->rd_indexattr);
+ FreeTriggerDesc(relation->trigdesc);
+ if (relation->rd_options)
+ pfree(relation->rd_options);
+ if (relation->rd_indextuple)
+ pfree(relation->rd_indextuple);
+ if (relation->rd_am)
+ pfree(relation->rd_am);
+ if (relation->rd_indexcxt)
+ MemoryContextDelete(relation->rd_indexcxt);
+ if (relation->rd_rulescxt)
+ MemoryContextDelete(relation->rd_rulescxt);
+ pfree(relation);
+}
+
/*
* RelationClearRelation
*
RelationClearRelation(Relation relation, bool rebuild)
{
Oid old_reltype = relation->rd_rel->reltype;
- MemoryContext oldcxt;
/*
* Make sure smgr and lower levels close the relation's files, if they
* Never, never ever blow away a nailed-in system relation, because we'd
* be unable to recover. However, we must reset rd_targblock, in case we
* got called because of a relation cache flush that was triggered by
- * VACUUM.
+ * VACUUM. Likewise reset the fsm and vm size info.
*
* If it's a nailed index, then we need to re-read the pg_class row to see
* if its relfilenode changed. We can't necessarily do that here, because
return;
}
- /*
- * Remove relation from hash tables
- *
- * Note: we might be reinserting it momentarily, but we must not have it
- * visible in the hash tables until it's valid again, so don't try to
- * optimize this away...
- */
- oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
- RelationCacheDelete(relation);
- MemoryContextSwitchTo(oldcxt);
-
- /* Clear out catcache's entries for this relation */
- CatalogCacheFlushRelation(RelationGetRelid(relation));
+ /* Mark it invalid until we've finished rebuild */
+ relation->rd_isvalid = false;
/*
- * Free all the subsidiary data structures of the relcache entry. We
- * cannot free rd_att if we are trying to rebuild the entry, however,
- * because pointers to it may be cached in various places. The rule
- * manager might also have pointers into the rewrite rules. So to begin
- * with, we can only get rid of these fields:
+ * Clear out catcache's entries for this relation. This is a bit of
+ * a hack, but it's a convenient place to do it.
*/
- FreeTriggerDesc(relation->trigdesc);
- if (relation->rd_indextuple)
- pfree(relation->rd_indextuple);
- if (relation->rd_am)
- pfree(relation->rd_am);
- if (relation->rd_rel)
- pfree(relation->rd_rel);
- if (relation->rd_options)
- pfree(relation->rd_options);
- list_free(relation->rd_indexlist);
- bms_free(relation->rd_indexattr);
- if (relation->rd_indexcxt)
- MemoryContextDelete(relation->rd_indexcxt);
+ CatalogCacheFlushRelation(RelationGetRelid(relation));
/*
* If we're really done with the relcache entry, blow it away. But if
*/
if (!rebuild)
{
- /* ok to zap remaining substructure */
+ /* Flush any rowtype cache entry */
flush_rowtype_cache(old_reltype);
- /* can't use DecrTupleDescRefCount here */
- Assert(relation->rd_att->tdrefcount > 0);
- if (--relation->rd_att->tdrefcount == 0)
- FreeTupleDesc(relation->rd_att);
- if (relation->rd_rulescxt)
- MemoryContextDelete(relation->rd_rulescxt);
- pfree(relation);
+
+ /* Remove it from the hash table */
+ RelationCacheDelete(relation);
+
+ /* And release storage */
+ RelationDestroyRelation(relation);
}
else
{
/*
- * When rebuilding an open relcache entry, must preserve ref count and
- * rd_createSubid/rd_newRelfilenodeSubid state. Also attempt to
- * preserve the tupledesc and rewrite-rule substructures in place.
- * (Note: the refcount mechanism for tupledescs may eventually ensure
- * that we don't really need to preserve the tupledesc in-place, but
- * for now there are still a lot of places that assume an open rel's
- * tupledesc won't move.)
+ * Our strategy for rebuilding an open relcache entry is to build
+ * a new entry from scratch, swap its contents with the old entry,
+ * and finally delete the new entry (along with any infrastructure
+ * swapped over from the old entry). This is to avoid trouble in case
+ * an error causes us to lose control partway through. The old entry
+ * will still be marked !rd_isvalid, so we'll try to rebuild it again
+ * on next access. Meanwhile it's not any less valid than it was
+ * before, so any code that might expect to continue accessing it
+ * isn't hurt by the rebuild failure. (Consider for example a
+ * subtransaction that ALTERs a table and then gets cancelled partway
+ * through the cache entry rebuild. The outer transaction should
+ * still see the not-modified cache entry as valid.) The worst
+ * consequence of an error is leaking the necessarily-unreferenced
+ * new entry, and this shouldn't happen often enough for that to be
+ * a big problem.
+ *
+ * When rebuilding an open relcache entry, we must preserve ref count
+ * and rd_createSubid/rd_newRelfilenodeSubid state. Also attempt to
+ * preserve the pg_class entry (rd_rel), tupledesc, and rewrite-rule
+ * substructures in place, because various places assume that these
+ * structures won't move while they are working with an open relcache
+ * entry. (Note: the refcount mechanism for tupledescs might someday
+ * allow us to remove this hack for the tupledesc.)
*
* Note that this process does not touch CurrentResourceOwner; which
* is good because whatever ref counts the entry may have do not
* necessarily belong to that resource owner.
*/
+ Relation newrel;
Oid save_relid = RelationGetRelid(relation);
- int old_refcnt = relation->rd_refcnt;
- SubTransactionId old_createSubid = relation->rd_createSubid;
- SubTransactionId old_newRelfilenodeSubid = relation->rd_newRelfilenodeSubid;
- struct PgStat_TableStatus *old_pgstat_info = relation->pgstat_info;
- TupleDesc old_att = relation->rd_att;
- RuleLock *old_rules = relation->rd_rules;
- MemoryContext old_rulescxt = relation->rd_rulescxt;
-
- if (RelationBuildDesc(save_relid, relation) != relation)
+ bool keep_tupdesc;
+ bool keep_rules;
+
+ /* Build temporary entry, but don't link it into hashtable */
+ newrel = RelationBuildDesc(save_relid, false);
+ if (newrel == NULL)
{
/* Should only get here if relation was deleted */
flush_rowtype_cache(old_reltype);
- Assert(old_att->tdrefcount > 0);
- if (--old_att->tdrefcount == 0)
- FreeTupleDesc(old_att);
- if (old_rulescxt)
- MemoryContextDelete(old_rulescxt);
- pfree(relation);
+ RelationCacheDelete(relation);
+ RelationDestroyRelation(relation);
elog(ERROR, "relation %u deleted while still in use", save_relid);
}
- relation->rd_refcnt = old_refcnt;
- relation->rd_createSubid = old_createSubid;
- relation->rd_newRelfilenodeSubid = old_newRelfilenodeSubid;
- relation->pgstat_info = old_pgstat_info;
- if (equalTupleDescs(old_att, relation->rd_att))
- {
- /* needn't flush typcache here */
- Assert(relation->rd_att->tdrefcount == 1);
- if (--relation->rd_att->tdrefcount == 0)
- FreeTupleDesc(relation->rd_att);
- relation->rd_att = old_att;
- }
- else
- {
+ keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
+ keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
+ if (!keep_tupdesc)
flush_rowtype_cache(old_reltype);
- Assert(old_att->tdrefcount > 0);
- if (--old_att->tdrefcount == 0)
- FreeTupleDesc(old_att);
- }
- if (equalRuleLocks(old_rules, relation->rd_rules))
+
+ /*
+ * Perform swapping of the relcache entry contents. Within this
+ * process the old entry is momentarily invalid, so there *must*
+ * be no possibility of CHECK_FOR_INTERRUPTS within this sequence.
+ * Do it in all-in-line code for safety.
+ *
+ * Since the vast majority of fields should be swapped, our method
+ * is to swap the whole structures and then re-swap those few fields
+ * we didn't want swapped.
+ */
+#define SWAPFIELD(fldtype, fldname) \
+ do { \
+ fldtype _tmp = newrel->fldname; \
+ newrel->fldname = relation->fldname; \
+ relation->fldname = _tmp; \
+ } while (0)
+
+ /* swap all Relation struct fields */
{
- if (relation->rd_rulescxt)
- MemoryContextDelete(relation->rd_rulescxt);
- relation->rd_rules = old_rules;
- relation->rd_rulescxt = old_rulescxt;
+ RelationData tmpstruct;
+
+ memcpy(&tmpstruct, newrel, sizeof(RelationData));
+ memcpy(newrel, relation, sizeof(RelationData));
+ memcpy(relation, &tmpstruct, sizeof(RelationData));
}
- else
+
+ /* rd_smgr must not be swapped, due to back-links from smgr level */
+ SWAPFIELD(SMgrRelation, rd_smgr);
+ /* rd_refcnt must be preserved */
+ SWAPFIELD(int, rd_refcnt);
+ /* isnailed shouldn't change */
+ Assert(newrel->rd_isnailed == relation->rd_isnailed);
+ /* creation sub-XIDs must be preserved */
+ SWAPFIELD(SubTransactionId, rd_createSubid);
+ SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
+ /* un-swap rd_rel pointers, swap contents instead */
+ SWAPFIELD(Form_pg_class, rd_rel);
+ /* ... but actually, we don't have to update newrel->rd_rel */
+ memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
+ /* preserve old tupledesc and rules if no logical change */
+ if (keep_tupdesc)
+ SWAPFIELD(TupleDesc, rd_att);
+ if (keep_rules)
{
- if (old_rulescxt)
- MemoryContextDelete(old_rulescxt);
+ SWAPFIELD(RuleLock *, rd_rules);
+ SWAPFIELD(MemoryContext, rd_rulescxt);
}
+ /* pgstat_info must be preserved */
+ SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
+
+#undef SWAPFIELD
+
+ /* And now we can throw away the temporary entry */
+ RelationDestroyRelation(newrel);
}
}
Relation ird;
LockRelationOid(indexoid, AccessShareLock);
- ird = RelationBuildDesc(indexoid, NULL);
+ ird = RelationBuildDesc(indexoid, true);
if (ird == NULL)
elog(PANIC, "could not open critical system index %u", indexoid);
ird->rd_isnailed = true;
fix_opfuncids((Node *) result);
/* Now save a copy of the completed tree in the relcache entry. */
- oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+ oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
relation->rd_indexprs = (List *) copyObject(result);
MemoryContextSwitchTo(oldcxt);
fix_opfuncids((Node *) result);
/* Now save a copy of the completed tree in the relcache entry. */
- oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+ oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
relation->rd_indpred = (List *) copyObject(result);
MemoryContextSwitchTo(oldcxt);