From: Tom Lane Date: Wed, 8 Nov 2000 22:10:03 +0000 (+0000) Subject: Make DROP TABLE rollback-able: postpone physical file delete until commit. X-Git-Tag: REL7_1_BETA~224 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3908473c80;p=postgresql Make DROP TABLE rollback-able: postpone physical file delete until commit. (WAL logging for this is not done yet, however.) Clean up a number of really crufty things that are no longer needed now that DROP behaves nicely. Make temp table mapper do the right things when drop or rename affecting a temp table is rolled back. Also, remove "relation modified while in use" error check, in favor of locking tables at first reference and holding that lock throughout the statement. --- diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c index 9b59947a88..1ed2366efd 100644 --- a/src/backend/access/common/tupdesc.c +++ b/src/backend/access/common/tupdesc.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.67 2000/10/05 19:48:20 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.68 2000/11/08 22:09:53 tgl Exp $ * * NOTES * some of the executor utility code such as "ExecTypeFromTL" should be @@ -228,7 +228,9 @@ FreeTupleDesc(TupleDesc tupdesc) bool equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2) { - int i; + int i, + j, + n; if (tupdesc1->natts != tupdesc2->natts) return false; @@ -240,7 +242,9 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2) /* * We do not need to check every single field here, and in fact * some fields such as attdispersion probably shouldn't be - * compared. + * compared. We can also disregard attnum (it was used to + * place the row in the attrs array) and everything derived + * from the column datatype. */ if (strcmp(NameStr(attr1->attname), NameStr(attr2->attname)) != 0) return false; @@ -260,32 +264,53 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2) if (constr2 == NULL) return false; - if (constr1->num_defval != constr2->num_defval) + if (constr1->has_not_null != constr2->has_not_null) + return false; + n = constr1->num_defval; + if (n != (int) constr2->num_defval) return false; - for (i = 0; i < (int) constr1->num_defval; i++) + for (i = 0; i < n; i++) { AttrDefault *defval1 = constr1->defval + i; - AttrDefault *defval2 = constr2->defval + i; + AttrDefault *defval2 = constr2->defval; - if (defval1->adnum != defval2->adnum) + /* + * We can't assume that the items are always read from the + * system catalogs in the same order; so use the adnum field to + * identify the matching item to compare. + */ + for (j = 0; j < n; defval2++, j++) + { + if (defval1->adnum == defval2->adnum) + break; + } + if (j >= n) return false; if (strcmp(defval1->adbin, defval2->adbin) != 0) return false; } - if (constr1->num_check != constr2->num_check) + n = constr1->num_check; + if (n != (int) constr2->num_check) return false; - for (i = 0; i < (int) constr1->num_check; i++) + for (i = 0; i < n; i++) { ConstrCheck *check1 = constr1->check + i; - ConstrCheck *check2 = constr2->check + i; + ConstrCheck *check2 = constr2->check; - if (strcmp(check1->ccname, check2->ccname) != 0) - return false; - if (strcmp(check1->ccbin, check2->ccbin) != 0) + /* + * Similarly, don't assume that the checks are always read + * in the same order; match them up by name and contents. + * (The name *should* be unique, but...) + */ + for (j = 0; j < n; check2++, j++) + { + if (strcmp(check1->ccname, check2->ccname) == 0 && + strcmp(check1->ccbin, check2->ccbin) == 0) + break; + } + if (j >= n) return false; } - if (constr1->has_not_null != constr2->has_not_null) - return false; } else if (tupdesc2->constr != NULL) return false; diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 560f28743f..d7bfeb1287 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -6,7 +6,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.63 2000/10/21 15:43:09 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.64 2000/11/08 22:09:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -266,13 +266,12 @@ gistbuild(PG_FUNCTION_ARGS) { Oid hrelid = RelationGetRelid(heap); Oid irelid = RelationGetRelid(index); - bool inplace = IsReindexProcessing(); heap_close(heap, NoLock); index_close(index); - UpdateStats(hrelid, nhtups, inplace); - UpdateStats(irelid, nitups, inplace); - if (oldPred != NULL && !inplace) + UpdateStats(hrelid, nhtups); + UpdateStats(irelid, nitups); + if (oldPred != NULL) { if (nitups == nhtups) pred = NULL; diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 8db80d5154..333199a898 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.43 2000/10/21 15:43:11 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.44 2000/11/08 22:09:54 tgl Exp $ * * NOTES * This file contains only the public interface routines. @@ -217,13 +217,12 @@ hashbuild(PG_FUNCTION_ARGS) { Oid hrelid = RelationGetRelid(heap); Oid irelid = RelationGetRelid(index); - bool inplace = IsReindexProcessing(); heap_close(heap, NoLock); index_close(index); - UpdateStats(hrelid, nhtups, inplace); - UpdateStats(irelid, nitups, inplace); - if (oldPred != NULL && !inplace) + UpdateStats(hrelid, nhtups); + UpdateStats(irelid, nitups); + if (oldPred != NULL) { if (nitups == nhtups) pred = NULL; diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index e796762ef1..5f450f9152 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.92 2000/10/29 18:33:39 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.93 2000/11/08 22:09:54 tgl Exp $ * * * INTERFACE ROUTINES @@ -780,19 +780,13 @@ heap_beginscan(Relation relation, /* ---------------- * increment relation ref count while scanning relation - * ---------------- - */ - RelationIncrementReferenceCount(relation); - - /* ---------------- - * Acquire AccessShareLock for the duration of the scan * - * Note: we could get an SI inval message here and consequently have - * to rebuild the relcache entry. The refcount increment above - * ensures that we will rebuild it and not just flush it... + * This is just to make really sure the relcache entry won't go away + * while the scan has a pointer to it. Caller should be holding the + * rel open anyway, so this is redundant in all normal scenarios... * ---------------- */ - LockRelation(relation, AccessShareLock); + RelationIncrementReferenceCount(relation); /* XXX someday assert SelfTimeQual if relkind == RELKIND_UNCATALOGED */ if (relation->rd_rel->relkind == RELKIND_UNCATALOGED) @@ -809,13 +803,11 @@ heap_beginscan(Relation relation, scan->rs_snapshot = snapshot; scan->rs_nkeys = (short) nkeys; + /* + * we do this here instead of in initscan() because heap_rescan + * also calls initscan() and we don't want to allocate memory again + */ if (nkeys) - - /* - * we do this here instead of in initscan() because heap_rescan - * also calls initscan() and we don't want to allocate memory - * again - */ scan->rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); else scan->rs_key = NULL; @@ -841,8 +833,6 @@ heap_rescan(HeapScanDesc scan, IncrHeapAccessStat(local_rescan); IncrHeapAccessStat(global_rescan); - /* Note: set relation level read lock is still set */ - /* ---------------- * unpin scan buffers * ---------------- @@ -853,7 +843,7 @@ heap_rescan(HeapScanDesc scan, * reinitialize scan descriptor * ---------------- */ - scan->rs_atend = (bool) scanFromEnd; + scan->rs_atend = scanFromEnd; initscan(scan, scan->rs_rd, scanFromEnd, scan->rs_nkeys, key); } @@ -882,12 +872,6 @@ heap_endscan(HeapScanDesc scan) */ unpinscan(scan); - /* ---------------- - * Release AccessShareLock acquired by heap_beginscan() - * ---------------- - */ - UnlockRelation(scan->rs_rd, AccessShareLock); - /* ---------------- * decrement relation reference count and free scan descriptor storage * ---------------- diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index fb437ac99f..ab942844af 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.69 2000/11/01 20:39:58 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.70 2000/11/08 22:09:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -340,19 +340,16 @@ btbuild(PG_FUNCTION_ARGS) { Oid hrelid = RelationGetRelid(heap); Oid irelid = RelationGetRelid(index); - bool inplace = IsReindexProcessing(); heap_close(heap, NoLock); index_close(index); - - UpdateStats(hrelid, nhtups, inplace); - UpdateStats(irelid, nitups, inplace); + UpdateStats(hrelid, nhtups); + UpdateStats(irelid, nitups); if (oldPred != NULL) { if (nitups == nhtups) pred = NULL; - if (!inplace) - UpdateIndexPredicate(irelid, oldPred, pred); + UpdateIndexPredicate(irelid, oldPred, pred); } } diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c index 60f6a2f6ca..ee5f621c0c 100644 --- a/src/backend/access/rtree/rtree.c +++ b/src/backend/access/rtree/rtree.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.54 2000/10/21 15:43:20 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.55 2000/11/08 22:09:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -237,13 +237,12 @@ rtbuild(PG_FUNCTION_ARGS) { Oid hrelid = RelationGetRelid(heap); Oid irelid = RelationGetRelid(index); - bool inplace = IsReindexProcessing(); heap_close(heap, NoLock); index_close(index); - UpdateStats(hrelid, nhtups, inplace); - UpdateStats(irelid, nitups, inplace); - if (oldPred != NULL && !inplace) + UpdateStats(hrelid, nhtups); + UpdateStats(irelid, nitups); + if (oldPred != NULL) { if (nitups == nhtups) pred = NULL; diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index f29b41d749..8c7d98ca70 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.31 2000/11/03 11:39:35 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.32 2000/11/08 22:09:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -130,7 +130,7 @@ VariableRelationPutNextXid(TransactionId xid) TransactionIdStore(xid, &(var->nextXidData)); - FlushBuffer(buf, TRUE); + FlushBuffer(buf, true, true); } /* -------------------------------- diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index cc4209fa51..97ff91fc44 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.80 2000/11/05 22:50:19 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.81 2000/11/08 22:09:55 tgl Exp $ * * NOTES * Transaction aborts can now occur two ways: @@ -167,6 +167,7 @@ #include "miscadmin.h" #include "storage/proc.h" #include "storage/sinval.h" +#include "storage/smgr.h" #include "utils/inval.h" #include "utils/memutils.h" #include "utils/portal.h" @@ -1105,6 +1106,9 @@ CommitTransaction(void) } RelationPurgeLocalRelation(true); + AtEOXact_temp_relations(true); + smgrDoPendingDeletes(true); + AtEOXact_SPI(); AtEOXact_nbtree(); AtCommit_Cache(); @@ -1181,8 +1185,11 @@ AbortTransaction(void) CloseSequences(); AtEOXact_portals(); RecordTransactionAbort(); + RelationPurgeLocalRelation(false); - remove_temp_rel_in_myxid(); + AtEOXact_temp_relations(false); + smgrDoPendingDeletes(false); + AtEOXact_SPI(); AtEOXact_nbtree(); AtAbort_Cache(); diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index 1fdc7e83fb..b644278733 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -171,9 +171,8 @@ XLogOpenLogRelation(void) sprintf(RelationGetPhysicalRelationName(logRelation), "pg_log"); logRelation->rd_node.tblNode = InvalidOid; logRelation->rd_node.relNode = RelOid_pg_log; - logRelation->rd_unlinked = false; /* must exists */ logRelation->rd_fd = -1; - logRelation->rd_fd = smgropen(DEFAULT_SMGR, logRelation); + logRelation->rd_fd = smgropen(DEFAULT_SMGR, logRelation, false); if (logRelation->rd_fd < 0) elog(STOP, "XLogOpenLogRelation: failed to open pg_log"); LogRelation = logRelation; @@ -384,9 +383,9 @@ XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode) hentry->rdesc = res; - res->reldata.rd_unlinked = true; /* look smgropen */ res->reldata.rd_fd = -1; - res->reldata.rd_fd = smgropen(DEFAULT_SMGR, &(res->reldata)); + res->reldata.rd_fd = smgropen(DEFAULT_SMGR, &(res->reldata), + true /* allow failure */); } res->moreRecently = &(_xlrelarr[0]); diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index de4cc3dd99..e4e26d0c3c 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/bootstrap/bootstrap.c,v 1.96 2000/11/04 12:43:23 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/bootstrap/bootstrap.c,v 1.97 2000/11/08 22:09:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1151,7 +1151,7 @@ build_indices() * -mer */ if (!BootstrapAlreadySeen(RelationGetRelid(heap))) - UpdateStats(RelationGetRelid(heap), 0, true); + UpdateStats(RelationGetRelid(heap), 0); /* XXX Probably we ought to close the heap and index here? */ } diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index f2df9c3c07..2c4a9e515b 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.150 2000/10/22 23:32:38 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.151 2000/11/08 22:09:56 tgl Exp $ * * * INTERFACE ROUTINES @@ -289,8 +289,7 @@ heap_create(char *relname, */ rel = (Relation) palloc(sizeof(RelationData)); MemSet((char *) rel, 0, sizeof(RelationData)); - rel->rd_fd = -1; /* table is not open */ - rel->rd_unlinked = true; /* table is not created yet */ + rel->rd_fd = -1; /* physical file is not open */ RelationSetReferenceCount(rel, 1); @@ -345,8 +344,6 @@ heap_create(char *relname, * have the storage manager create the relation. * ---------------- */ - - /* smgrcreate() is moved to heap_storage_create() */ if (storage_create) heap_storage_create(rel); @@ -355,18 +352,12 @@ heap_create(char *relname, return rel; } -bool +void heap_storage_create(Relation rel) { - bool smgrcall = false; - - if (rel->rd_unlinked) - { - rel->rd_fd = (File) smgrcreate(DEFAULT_SMGR, rel); - rel->rd_unlinked = false; - smgrcall = true; - } - return smgrcall; + Assert(rel->rd_fd < 0); + rel->rd_fd = smgrcreate(DEFAULT_SMGR, rel); + Assert(rel->rd_fd >= 0); } /* ---------------------------------------------------------------- @@ -1062,7 +1053,11 @@ RelationRemoveIndexes(Relation relation) &entry); while (HeapTupleIsValid(tuple = heap_getnext(scan, 0))) + { index_drop(((Form_pg_index) GETSTRUCT(tuple))->indexrelid); + /* advance cmd counter to make catalog changes visible */ + CommandCounterIncrement(); + } heap_endscan(scan); heap_close(indexRelation, RowExclusiveLock); @@ -1165,10 +1160,10 @@ RelationTruncateIndexes(Oid heapId) LockRelation(currentIndex, AccessExclusiveLock); /* - * Release any buffers associated with this index. If they're + * Drop any buffers associated with this index. If they're * dirty, they're just dropped without bothering to flush to disk. */ - ReleaseRelationBuffers(currentIndex); + DropRelationBuffers(currentIndex); /* Now truncate the actual data and set blocks to zero */ smgrtruncate(DEFAULT_SMGR, currentIndex, 0); @@ -1212,24 +1207,19 @@ heap_truncate(char *relname) /* ---------------- * TRUNCATE TABLE within a transaction block is dangerous, because * if the transaction is later rolled back we have no way to - * undo truncation of the relation's physical file. For now, allow it - * but emit a warning message. - * Someday we might want to consider postponing the physical truncate - * until transaction commit, but that's a lot of work... - * The only case that actually works right is for relations created - * in the current transaction, since the post-abort state would be that - * they don't exist anyway. So, no warning in that case. + * undo truncation of the relation's physical file. Disallow it + * except for a rel created in the current xact (which would be deleted + * on abort, anyway). * ---------------- */ if (IsTransactionBlock() && !rel->rd_myxactonly) - elog(NOTICE, "Caution: TRUNCATE TABLE cannot be rolled back, so don't abort now"); + elog(ERROR, "TRUNCATE TABLE cannot run inside a BEGIN/END block"); /* * Release any buffers associated with this relation. If they're * dirty, they're just dropped without bothering to flush to disk. */ - - ReleaseRelationBuffers(rel); + DropRelationBuffers(rel); /* Now truncate the actual data and set blocks to zero */ @@ -1416,8 +1406,9 @@ heap_drop_with_catalog(const char *relname, { Relation rel; Oid rid; - bool istemp = (get_temp_rel_by_username(relname) != NULL); bool has_toasttable; + bool istemp = (get_temp_rel_by_username(relname) != NULL); + int i; /* ---------------- * Open and lock the relation. @@ -1425,6 +1416,7 @@ heap_drop_with_catalog(const char *relname, */ rel = heap_openr(relname, AccessExclusiveLock); rid = RelationGetRelid(rel); + has_toasttable = rel->rd_rel->reltoastrelid != InvalidOid; /* ---------------- * prevent deletion of system relations @@ -1433,46 +1425,40 @@ heap_drop_with_catalog(const char *relname, /* allow temp of pg_class? Guess so. */ if (!istemp && !allow_system_table_mods && IsSystemRelationName(RelationGetRelationName(rel))) - elog(ERROR, "System relation '%s' cannot be destroyed", + elog(ERROR, "System relation \"%s\" may not be dropped", RelationGetRelationName(rel)); /* ---------------- - * DROP TABLE within a transaction block is dangerous, because - * if the transaction is later rolled back there will be no way to - * undo the unlink of the relation's physical file. For now, allow it - * but emit a warning message. - * Someday we might want to consider postponing the physical unlink - * until transaction commit, but that's a lot of work... - * The only case that actually works right is for relations created - * in the current transaction, since the post-abort state would be that - * they don't exist anyway. So, no warning in that case. + * Release all buffers that belong to this relation, after writing + * any that are dirty * ---------------- */ - if (IsTransactionBlock() && !rel->rd_myxactonly) - elog(NOTICE, "Caution: DROP TABLE cannot be rolled back, so don't abort now"); + i = FlushRelationBuffers(rel, (BlockNumber) 0); + if (i < 0) + elog(ERROR, "heap_drop_with_catalog: FlushRelationBuffers returned %d", + i); /* ---------------- - * remove inheritance information + * remove rules if necessary * ---------------- */ - RelationRemoveInheritance(rel); + if (rel->rd_rules != NULL) + RelationRemoveRules(rid); + + /* triggers */ + RelationRemoveTriggers(rel); /* ---------------- - * remove indexes if necessary + * remove inheritance information * ---------------- */ - /* should ignore relhasindex */ - RelationRemoveIndexes(rel); + RelationRemoveInheritance(rel); /* ---------------- - * remove rules if necessary + * remove indexes if necessary * ---------------- */ - if (rel->rd_rules != NULL) - RelationRemoveRules(rid); - - /* triggers */ - RelationRemoveTriggers(rel); + RelationRemoveIndexes(rel); /* ---------------- * delete attribute tuples @@ -1502,23 +1488,12 @@ heap_drop_with_catalog(const char *relname, */ DeleteRelationTuple(rel); - /* - * release dirty buffers of this relation; don't bother to write them - */ - ReleaseRelationBuffers(rel); - /* ---------------- * unlink the relation's physical file and finish up. * ---------------- */ - if (rel->rd_rel->relkind != RELKIND_VIEW && ! rel->rd_unlinked) + if (rel->rd_rel->relkind != RELKIND_VIEW) smgrunlink(DEFAULT_SMGR, rel); - rel->rd_unlinked = true; - - /* - * Remember if there is a toast relation for below - */ - has_toasttable = rel->rd_rel->reltoastrelid != InvalidOid; /* * Close relcache entry, but *keep* AccessExclusiveLock on the @@ -1533,6 +1508,7 @@ heap_drop_with_catalog(const char *relname, */ RelationForgetRelation(rid); + /* and from the temp-table map */ if (istemp) remove_temp_rel_by_relid(rid); diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 33aa67fe45..3833c961f4 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.128 2000/10/11 21:28:18 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.129 2000/11/08 22:09:56 tgl Exp $ * * * INTERFACE ROUTINES @@ -26,6 +26,7 @@ #include "access/heapam.h" #include "access/istrat.h" #include "bootstrap/bootstrap.h" +#include "catalog/catalog.h" #include "catalog/catname.h" #include "catalog/heap.h" #include "catalog/index.h" @@ -43,10 +44,10 @@ #include "utils/builtins.h" #include "utils/catcache.h" #include "utils/fmgroids.h" +#include "utils/inval.h" #include "utils/relcache.h" #include "utils/syscache.h" #include "utils/temprel.h" -#include "utils/inval.h" /* * macros used in guessing how many tuples are on a page. @@ -927,6 +928,13 @@ index_create(char *heapRelationName, indexRelation = heap_create(indexRelationName, indexTupDesc, istemp, false, allow_system_table_mods); + /* + * Obtain exclusive lock on it. Although no other backends can see it + * until we commit, this prevents deadlock-risk complaints from lock + * manager in cases such as CLUSTER. + */ + LockRelation(indexRelation, AccessExclusiveLock); + /* ---------------- * construct the index relation descriptor * @@ -990,7 +998,8 @@ index_create(char *heapRelationName, * * In normal processing mode, the heap and index relations are closed by * index_build() --- but we continue to hold the ShareLock on the heap - * that we acquired above, until end of transaction. + * and the exclusive lock on the index that we acquired above, until + * end of transaction. */ if (IsBootstrapProcessingMode()) { @@ -1020,6 +1029,7 @@ index_drop(Oid indexId) Relation attributeRelation; HeapTuple tuple; int16 attnum; + int i; Assert(OidIsValid(indexId)); @@ -1040,19 +1050,11 @@ index_drop(Oid indexId) LockRelation(userIndexRelation, AccessExclusiveLock); /* ---------------- - * DROP INDEX within a transaction block is dangerous, because - * if the transaction is later rolled back there will be no way to - * undo the unlink of the relation's physical file. For now, allow it - * but emit a warning message. - * Someday we might want to consider postponing the physical unlink - * until transaction commit, but that's a lot of work... - * The only case that actually works right is for relations created - * in the current transaction, since the post-abort state would be that - * they don't exist anyway. So, no warning in that case. + * Note: unlike heap_drop_with_catalog, we do not need to prevent + * deletion of system indexes here; that's checked for upstream. + * If we did check it here, deletion of TOAST tables would fail... * ---------------- */ - if (IsTransactionBlock() && !userIndexRelation->rd_myxactonly) - elog(NOTICE, "Caution: DROP INDEX cannot be rolled back, so don't abort now"); /* ---------------- * fix DESCRIPTION relation @@ -1077,20 +1079,14 @@ index_drop(Oid indexId) heap_freetuple(tuple); /* - * Find the pg_class tuple for the owning relation. We do not attempt - * to clear relhasindex, since we are too lazy to test whether any other - * indexes remain (the next VACUUM will fix it if necessary). But we - * must send out a shared-cache-inval notice on the owning relation - * to ensure other backends update their relcache lists of indexes. + * Update the pg_class tuple for the owning relation. We are presently + * too lazy to attempt to compute the new correct value of relhasindex + * (the next VACUUM will fix it if necessary). But we must send out a + * shared-cache-inval notice on the owning relation to ensure other + * backends update their relcache lists of indexes. So, unconditionally + * do setRelhasindex(true). */ - tuple = SearchSysCacheTupleCopy(RELOID, - ObjectIdGetDatum(heapId), - 0, 0, 0); - - Assert(HeapTupleIsValid(tuple)); - - ImmediateInvalidateSharedHeapTuple(relationRelation, tuple); - heap_freetuple(tuple); + setRelhasindex(heapId, true); heap_close(relationRelation, RowExclusiveLock); @@ -1131,10 +1127,11 @@ index_drop(Oid indexId) /* * flush buffer cache and physically remove the file */ - ReleaseRelationBuffers(userIndexRelation); + i = FlushRelationBuffers(userIndexRelation, (BlockNumber) 0); + if (i < 0) + elog(ERROR, "index_drop: FlushRelationBuffers returned %d", i); - if (smgrunlink(DEFAULT_SMGR, userIndexRelation) != SM_SUCCESS) - elog(ERROR, "index_drop: unlink: %m"); + smgrunlink(DEFAULT_SMGR, userIndexRelation); /* * Close rels, but keep locks @@ -1144,7 +1141,7 @@ index_drop(Oid indexId) RelationForgetRelation(indexId); - /* does something only if it is a temp index */ + /* if it's a temp index, clear the temp mapping table entry */ remove_temp_rel_by_relid(indexId); } @@ -1331,7 +1328,11 @@ LockClassinfoForUpdate(Oid relid, HeapTuple rtup, return false; rtup->t_self = classTuple->t_self; pgcform = (Form_pg_class) GETSTRUCT(classTuple); - relationRelation = heap_openr(RelationRelationName, RowShareLock); + /* + * NOTE: get and hold RowExclusiveLock on pg_class, because caller will + * probably modify the rel's pg_class tuple later on. + */ + relationRelation = heap_openr(RelationRelationName, RowExclusiveLock); test = heap_mark4update(relationRelation, rtup, buffer); switch (test) { @@ -1388,57 +1389,38 @@ IndexesAreActive(Oid relid, bool confirmCommitted) if (!heap_getnext(scan, 0)) isactive = true; heap_endscan(scan); - heap_close(indexRelation, NoLock); + heap_close(indexRelation, AccessShareLock); return isactive; } /* ---------------- - * set relhasindex of pg_class in place + * set relhasindex of relation's pg_class entry + * + * NOTE: an important side-effect of this operation is that an SI invalidation + * message is sent out to all backends --- including me --- causing relcache + * entries to be flushed or updated with the new hasindex data. + * Therefore, we execute the update even if relhasindex has the right value + * already. Possible future improvement: skip the disk update and just send + * an SI message in that case. * ---------------- */ void -setRelhasindexInplace(Oid relid, bool hasindex, bool immediate) +setRelhasindex(Oid relid, bool hasindex) { - Relation whichRel; Relation pg_class; HeapTuple tuple; - Form_pg_class rd_rel; HeapScanDesc pg_class_scan = NULL; - /* ---------------- - * This routine handles updates for only the heap relation - * hasindex. In order to guarantee that we're able to *see* the index - * relation tuple, we bump the command counter id here. - * ---------------- - */ - CommandCounterIncrement(); - - /* ---------------- - * CommandCounterIncrement() flushes invalid cache entries, including - * those for the heap and index relations for which we're updating - * statistics. Now that the cache is flushed, it's safe to open the - * relation again. We need the relation open in order to figure out - * how many blocks it contains. - * ---------------- - */ - - whichRel = heap_open(relid, ShareLock); - - if (!RelationIsValid(whichRel)) - elog(ERROR, "setRelhasindexInplace: cannot open relation id %u", relid); - - /* ---------------- - * Find the RELATION relation tuple for the given relation. - * ---------------- + /* + * Find the tuple to update in pg_class. */ pg_class = heap_openr(RelationRelationName, RowExclusiveLock); - if (!RelationIsValid(pg_class)) - elog(ERROR, "setRelhasindexInplace: could not open RELATION relation"); if (!IsIgnoringSystemIndexes()) { tuple = SearchSysCacheTupleCopy(RELOID, - ObjectIdGetDatum(relid), 0, 0, 0); + ObjectIdGetDatum(relid), + 0, 0, 0); } else { @@ -1458,72 +1440,46 @@ setRelhasindexInplace(Oid relid, bool hasindex, bool immediate) if (pg_class_scan) heap_endscan(pg_class_scan); heap_close(pg_class, RowExclusiveLock); - elog(ERROR, "setRelhasindexInplace: cannot scan RELATION relation"); - } - - /* - * Confirm that target tuple is locked by this transaction in case of - * immediate updation. - */ - if (immediate) - { - HeapTupleHeader th = tuple->t_data; - - if (!(th->t_infomask & HEAP_XMIN_COMMITTED)) - elog(ERROR, "Immediate hasindex updation can be done only for committed tuples %x", th->t_infomask); - if (th->t_infomask & HEAP_XMAX_INVALID) - elog(ERROR, "Immediate hasindex updation can be done only for locked tuples %x", th->t_infomask); - if (th->t_infomask & HEAP_XMAX_COMMITTED) - elog(ERROR, "Immediate hasindex updation can be done only for locked tuples %x", th->t_infomask); - if (!(th->t_infomask & HEAP_MARKED_FOR_UPDATE)) - elog(ERROR, "Immediate hasindex updation can be done only for locked tuples %x", th->t_infomask); - if (!(TransactionIdIsCurrentTransactionId(th->t_xmax))) - elog(ERROR, "The updating tuple is already locked by another backend"); + elog(ERROR, "setRelhasindex: cannot find relation %u in pg_class", + relid); } - /* - * We shouldn't have to do this, but we do... Modify the reldesc in - * place with the new values so that the cache contains the latest - * copy. - */ - whichRel->rd_rel->relhasindex = hasindex; - /* ---------------- * Update hasindex in pg_class. * ---------------- */ + ((Form_pg_class) GETSTRUCT(tuple))->relhasindex = hasindex; + if (pg_class_scan) { - rd_rel = (Form_pg_class) GETSTRUCT(tuple); - rd_rel->relhasindex = hasindex; + /* Write the modified tuple in-place */ WriteNoReleaseBuffer(pg_class_scan->rs_cbuf); + /* Send out shared cache inval if necessary */ + if (!IsBootstrapProcessingMode()) + RelationInvalidateHeapTuple(pg_class, tuple); } else { - HeapTupleData htup; - Buffer buffer; - - htup.t_self = tuple->t_self; - heap_fetch(pg_class, SnapshotNow, &htup, &buffer); - rd_rel = (Form_pg_class) GETSTRUCT(&htup); - rd_rel->relhasindex = hasindex; - WriteBuffer(buffer); - } + heap_update(pg_class, &tuple->t_self, tuple, NULL); - /* - * Send out a shared-cache-inval message so other backends notice the - * update and fix their syscaches/relcaches. - */ - if (!IsBootstrapProcessingMode()) - ImmediateInvalidateSharedHeapTuple(pg_class, tuple); + /* Keep the catalog indices up to date */ + if (!IsIgnoringSystemIndexes()) + { + Relation idescs[Num_pg_class_indices]; + + CatalogOpenIndices(Num_pg_class_indices, Name_pg_class_indices, + idescs); + CatalogIndexInsert(idescs, Num_pg_class_indices, pg_class, tuple); + CatalogCloseIndices(Num_pg_class_indices, idescs); + } + } if (!pg_class_scan) heap_freetuple(tuple); else heap_endscan(pg_class_scan); - heap_close(pg_class, NoLock); - heap_close(whichRel, NoLock); + heap_close(pg_class, RowExclusiveLock); } /* ---------------- @@ -1531,7 +1487,7 @@ setRelhasindexInplace(Oid relid, bool hasindex, bool immediate) * ---------------- */ void -UpdateStats(Oid relid, long reltuples, bool inplace) +UpdateStats(Oid relid, long reltuples) { Relation whichRel; Relation pg_class; @@ -1573,6 +1529,7 @@ UpdateStats(Oid relid, long reltuples, bool inplace) if (!RelationIsValid(whichRel)) elog(ERROR, "UpdateStats: cannot open relation id %u", relid); + /* Grab lock to be held till end of xact (probably redundant...) */ LockRelation(whichRel, ShareLock); /* ---------------- @@ -1580,10 +1537,9 @@ UpdateStats(Oid relid, long reltuples, bool inplace) * ---------------- */ pg_class = heap_openr(RelationRelationName, RowExclusiveLock); - if (!RelationIsValid(pg_class)) - elog(ERROR, "UpdateStats: could not open RELATION relation"); - in_place_upd = (inplace || IsBootstrapProcessingMode()); + in_place_upd = (IsReindexProcessing() || IsBootstrapProcessingMode()); + if (!in_place_upd) { tuple = SearchSysCacheTupleCopy(RELOID, @@ -1608,7 +1564,8 @@ UpdateStats(Oid relid, long reltuples, bool inplace) if (pg_class_scan) heap_endscan(pg_class_scan); heap_close(pg_class, RowExclusiveLock); - elog(ERROR, "UpdateStats: cannot scan RELATION relation"); + elog(ERROR, "UpdateStats: cannot find relation %u in pg_class", + relid); } /* ---------------- @@ -1655,17 +1612,16 @@ UpdateStats(Oid relid, long reltuples, bool inplace) */ if (in_place_upd) { - /* * At bootstrap time, we don't need to worry about concurrency or - * visibility of changes, so we cheat. + * visibility of changes, so we cheat. Also cheat if REINDEX. */ - if (!IsBootstrapProcessingMode()) - ImmediateInvalidateSharedHeapTuple(pg_class, tuple); rd_rel = (Form_pg_class) GETSTRUCT(tuple); rd_rel->relpages = relpages; rd_rel->reltuples = reltuples; WriteNoReleaseBuffer(pg_class_scan->rs_cbuf); + if (!IsBootstrapProcessingMode()) + RelationInvalidateHeapTuple(pg_class, tuple); } else { @@ -1700,7 +1656,7 @@ UpdateStats(Oid relid, long reltuples, bool inplace) heap_close(pg_class, RowExclusiveLock); /* Cheating a little bit since we didn't open it with heap_open... */ - heap_close(whichRel, ShareLock); + heap_close(whichRel, NoLock); } @@ -1868,18 +1824,16 @@ DefaultBuild(Relation heapRelation, { Oid hrelid = RelationGetRelid(heapRelation); Oid irelid = RelationGetRelid(indexRelation); - bool inplace = IsReindexProcessing(); heap_close(heapRelation, NoLock); index_close(indexRelation); - UpdateStats(hrelid, reltuples, inplace); - UpdateStats(irelid, indtuples, inplace); + UpdateStats(hrelid, reltuples); + UpdateStats(irelid, indtuples); if (oldPred != NULL) { if (indtuples == reltuples) predicate = NULL; - if (!inplace) - UpdateIndexPredicate(irelid, oldPred, predicate); + UpdateIndexPredicate(irelid, oldPred, predicate); } } } @@ -1981,6 +1935,15 @@ reindex_index(Oid indexId, bool force) accessMethodId; bool old; + /* ---------------- + * REINDEX within a transaction block is dangerous, because + * if the transaction is later rolled back we have no way to + * undo truncation of the index's physical file. Disallow it. + * ---------------- + */ + if (IsTransactionBlock()) + elog(ERROR, "REINDEX cannot run inside a BEGIN/END block"); + old = SetReindexProcessing(true); /* Scan pg_index to find the index's pg_index entry */ @@ -2024,7 +1987,7 @@ reindex_index(Oid indexId, bool force) * Release any buffers associated with this index. If they're dirty, * they're just dropped without bothering to flush to disk. */ - ReleaseRelationBuffers(iRel); + DropRelationBuffers(iRel); /* Now truncate the actual data and set blocks to zero */ smgrtruncate(DEFAULT_SMGR, iRel, 0); @@ -2056,7 +2019,7 @@ activate_indexes_of_a_table(Oid relid, bool activate) if (IndexesAreActive(relid, true)) { if (!activate) - setRelhasindexInplace(relid, false, true); + setRelhasindex(relid, false); else return false; } @@ -2117,7 +2080,7 @@ reindex_relation(Oid relid, bool force) heap_endscan(scan); heap_close(indexRelation, AccessShareLock); if (reindexed) - setRelhasindexInplace(relid, true, false); + setRelhasindex(relid, true); SetReindexProcessing(old); return reindexed; } diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 5c176254d6..c02bafc322 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/cluster.c,v 1.58 2000/07/14 22:17:42 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/cluster.c,v 1.59 2000/11/08 22:09:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -34,20 +34,14 @@ #include "utils/builtins.h" #include "utils/syscache.h" -static Relation copy_heap(Oid OIDOldHeap); -static void copy_index(Oid OIDOldIndex, Oid OIDNewHeap); +static Oid copy_heap(Oid OIDOldHeap, char *NewName); +static void copy_index(Oid OIDOldIndex, Oid OIDNewHeap, char *NewIndexName); static void rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex); /* * cluster * - * Check that the relation is a relation in the appropriate user - * ACL. I will use the same security that limits users on the - * renamerel() function. - * - * Check that the index specified is appropriate for the task - * ( ie it's an index over this relation ). This is trickier. - * + * STILL TO DO: * Create a list of all the other indicies on this relation. Because * the cluster will wreck all the tids, I'll need to destroy bogus * indicies. The user will have to re-create them. Not nice, but @@ -55,14 +49,6 @@ static void rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex); * destroy re-build. This may be possible. I'll check out what the * index create functiond want in the way of paramaters. On the other * hand, re-creating n indicies may blow out the space. - * - * Create new (temporary) relations for the base heap and the new - * index. - * - * Exclusively lock the relations. - * - * Create new clustered index and base heap relation. - * */ void cluster(char *oldrelname, char *oldindexname) @@ -70,101 +56,93 @@ cluster(char *oldrelname, char *oldindexname) Oid OIDOldHeap, OIDOldIndex, OIDNewHeap; - Relation OldHeap, OldIndex; - Relation NewHeap; - - char NewIndexName[NAMEDATALEN]; + HeapTuple tuple; char NewHeapName[NAMEDATALEN]; + char NewIndexName[NAMEDATALEN]; char saveoldrelname[NAMEDATALEN]; char saveoldindexname[NAMEDATALEN]; /* - * Copy the arguments into local storage, because they are probably - * in palloc'd storage that will go away when we commit a transaction. + * Copy the arguments into local storage, just to be safe. */ - strcpy(saveoldrelname, oldrelname); - strcpy(saveoldindexname, oldindexname); + StrNCpy(saveoldrelname, oldrelname, NAMEDATALEN); + StrNCpy(saveoldindexname, oldindexname, NAMEDATALEN); /* - * Like vacuum, cluster spans transactions, so I'm going to handle it - * in the same way: commit and restart transactions where needed. - * * We grab exclusive access to the target rel and index for the duration - * of the initial transaction. + * of the transaction. */ - OldHeap = heap_openr(saveoldrelname, AccessExclusiveLock); OIDOldHeap = RelationGetRelid(OldHeap); - OldIndex = index_openr(saveoldindexname); /* Open old index relation */ + OldIndex = index_openr(saveoldindexname); LockRelation(OldIndex, AccessExclusiveLock); OIDOldIndex = RelationGetRelid(OldIndex); /* - * XXX Should check that index is in fact an index on this relation? + * Check that index is in fact an index on the given relation */ - - heap_close(OldHeap, NoLock);/* do NOT give up the locks */ + tuple = SearchSysCacheTuple(INDEXRELID, + ObjectIdGetDatum(OIDOldIndex), + 0, 0, 0); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "CLUSTER: no pg_index entry for index %u", + OIDOldIndex); + if (((Form_pg_index) GETSTRUCT(tuple))->indrelid != OIDOldHeap) + elog(ERROR, "CLUSTER: \"%s\" is not an index for table \"%s\"", + saveoldindexname, saveoldrelname); + + /* Drop relcache refcnts, but do NOT give up the locks */ + heap_close(OldHeap, NoLock); index_close(OldIndex); /* - * I need to build the copies of the heap and the index. The Commit() - * between here is *very* bogus. If someone is appending stuff, they - * will get the lock after being blocked and add rows which won't be - * present in the new table. Bleagh! I'd be best to try and ensure - * that no-one's in the tables for the entire duration of this process - * with a pg_vlock. XXX Isn't the above comment now invalid? + * Create the new heap with a temporary name. */ - NewHeap = copy_heap(OIDOldHeap); - OIDNewHeap = RelationGetRelid(NewHeap); - strcpy(NewHeapName, RelationGetRelationName(NewHeap)); + snprintf(NewHeapName, NAMEDATALEN, "temp_%u", OIDOldHeap); + + OIDNewHeap = copy_heap(OIDOldHeap, NewHeapName); /* To make the new heap visible (which is until now empty). */ CommandCounterIncrement(); + /* + * Copy the heap data into the new table in the desired order. + */ rebuildheap(OIDNewHeap, OIDOldHeap, OIDOldIndex); - /* To flush the filled new heap (and the statistics about it). */ + /* To make the new heap's data visible. */ CommandCounterIncrement(); /* Create new index over the tuples of the new heap. */ - copy_index(OIDOldIndex, OIDNewHeap); - snprintf(NewIndexName, NAMEDATALEN, "temp_%x", OIDOldIndex); + snprintf(NewIndexName, NAMEDATALEN, "temp_%u", OIDOldIndex); - /* - * make this really happen. Flush all the buffers. (Believe me, it is - * necessary ... ended up in a mess without it.) - */ - CommitTransactionCommand(); - StartTransactionCommand(); + copy_index(OIDOldIndex, OIDNewHeap, NewIndexName); + + CommandCounterIncrement(); /* Destroy old heap (along with its index) and rename new. */ heap_drop_with_catalog(saveoldrelname, allowSystemTableMods); - CommitTransactionCommand(); - StartTransactionCommand(); + CommandCounterIncrement(); renamerel(NewHeapName, saveoldrelname); + + /* This one might be unnecessary, but let's be safe. */ + CommandCounterIncrement(); + renamerel(NewIndexName, saveoldindexname); } -static Relation -copy_heap(Oid OIDOldHeap) +static Oid +copy_heap(Oid OIDOldHeap, char *NewName) { - char NewName[NAMEDATALEN]; TupleDesc OldHeapDesc, tupdesc; Oid OIDNewHeap; - Relation NewHeap, - OldHeap; - - /* - * Create a new heap relation with a temporary name, which has the - * same tuple description as the old one. - */ - snprintf(NewName, NAMEDATALEN, "temp_%x", OIDOldHeap); + Relation OldHeap; OldHeap = heap_open(OIDOldHeap, AccessExclusiveLock); OldHeapDesc = RelationGetDescr(OldHeap); @@ -173,7 +151,6 @@ copy_heap(Oid OIDOldHeap) * Need to make a copy of the tuple descriptor, * heap_create_with_catalog modifies it. */ - tupdesc = CreateTupleDescCopy(OldHeapDesc); OIDNewHeap = heap_create_with_catalog(NewName, tupdesc, @@ -181,19 +158,15 @@ copy_heap(Oid OIDOldHeap) allowSystemTableMods); if (!OidIsValid(OIDNewHeap)) - elog(ERROR, "clusterheap: cannot create temporary heap relation\n"); + elog(ERROR, "copy_heap: cannot create temporary heap relation"); - /* XXX why are we bothering to do this: */ - NewHeap = heap_open(OIDNewHeap, AccessExclusiveLock); - - heap_close(NewHeap, AccessExclusiveLock); - heap_close(OldHeap, AccessExclusiveLock); + heap_close(OldHeap, NoLock); - return NewHeap; + return OIDNewHeap; } static void -copy_index(Oid OIDOldIndex, Oid OIDNewHeap) +copy_index(Oid OIDOldIndex, Oid OIDNewHeap, char *NewIndexName) { Relation OldIndex, NewHeap; @@ -202,18 +175,17 @@ copy_index(Oid OIDOldIndex, Oid OIDNewHeap) Form_pg_index Old_pg_index_Form; Form_pg_class Old_pg_index_relation_Form; IndexInfo *indexInfo; - char *NewIndexName; NewHeap = heap_open(OIDNewHeap, AccessExclusiveLock); OldIndex = index_open(OIDOldIndex); /* - * OK. Create a new (temporary) index for the one that's already here. + * Create a new (temporary) index like the one that's already here. * To do this I get the info from pg_index, and add a new index with * a temporary name. */ Old_pg_index_Tuple = SearchSysCacheTupleCopy(INDEXRELID, - ObjectIdGetDatum(RelationGetRelid(OldIndex)), + ObjectIdGetDatum(OIDOldIndex), 0, 0, 0); Assert(Old_pg_index_Tuple); Old_pg_index_Form = (Form_pg_index) GETSTRUCT(Old_pg_index_Tuple); @@ -221,15 +193,11 @@ copy_index(Oid OIDOldIndex, Oid OIDNewHeap) indexInfo = BuildIndexInfo(Old_pg_index_Tuple); Old_pg_index_relation_Tuple = SearchSysCacheTupleCopy(RELOID, - ObjectIdGetDatum(RelationGetRelid(OldIndex)), + ObjectIdGetDatum(OIDOldIndex), 0, 0, 0); Assert(Old_pg_index_relation_Tuple); Old_pg_index_relation_Form = (Form_pg_class) GETSTRUCT(Old_pg_index_relation_Tuple); - /* Set the name. */ - NewIndexName = palloc(NAMEDATALEN); /* XXX */ - snprintf(NewIndexName, NAMEDATALEN, "temp_%x", OIDOldIndex); - index_create(RelationGetRelationName(NewHeap), NewIndexName, indexInfo, @@ -239,10 +207,10 @@ copy_index(Oid OIDOldIndex, Oid OIDNewHeap) Old_pg_index_Form->indisprimary, allowSystemTableMods); - setRelhasindexInplace(OIDNewHeap, true, false); + setRelhasindex(OIDNewHeap, true); index_close(OldIndex); - heap_close(NewHeap, AccessExclusiveLock); + heap_close(NewHeap, NoLock); } @@ -294,6 +262,6 @@ rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex) index_endscan(ScanDesc); index_close(LocalOldIndex); - heap_close(LocalOldHeap, AccessExclusiveLock); - heap_close(LocalNewHeap, AccessExclusiveLock); + heap_close(LocalOldHeap, NoLock); + heap_close(LocalNewHeap, NoLock); } diff --git a/src/backend/commands/command.c b/src/backend/commands/command.c index 4446c9f5cb..54b913dcac 100644 --- a/src/backend/commands/command.c +++ b/src/backend/commands/command.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.108 2000/10/26 21:34:44 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.109 2000/11/08 22:09:57 tgl Exp $ * * NOTES * The PerformAddAttribute() code, like most of the relation @@ -1661,9 +1661,13 @@ AlterTableCreateToastTable(const char *relationName, bool silent) /* * Update toast rel's pg_class entry to show that it has an index. - * NOTE this also does CommandCounterIncrement() to make index visible. */ - setRelhasindexInplace(toast_relid, true, false); + setRelhasindex(toast_relid, true); + + /* + * Make index visible + */ + CommandCounterIncrement(); /* * Get the OID of the newly created index diff --git a/src/backend/commands/comment.c b/src/backend/commands/comment.c index 6dd3c4dfab..bff2b897c6 100644 --- a/src/backend/commands/comment.c +++ b/src/backend/commands/comment.c @@ -356,10 +356,8 @@ CommentAttribute(char *relname, char *attrname, char *comment) attrtuple = SearchSysCacheTuple(ATTNAME, ObjectIdGetDatum(relation->rd_id), PointerGetDatum(attrname), 0, 0); if (!HeapTupleIsValid(attrtuple)) - { elog(ERROR, "'%s' is not an attribute of class '%s'", attrname, relname); - } oid = attrtuple->t_data->t_oid; /*** Call CreateComments() to create/drop the comments ***/ @@ -368,8 +366,7 @@ CommentAttribute(char *relname, char *attrname, char *comment) /*** Now, close the heap relation and return ***/ - heap_close(relation, AccessShareLock); - + heap_close(relation, NoLock); } /*------------------------------------------------------------------ @@ -840,6 +837,5 @@ CommentTrigger(char *trigger, char *relname, char *comment) heap_endscan(scan); heap_close(pg_trigger, AccessShareLock); - heap_close(relation, AccessShareLock); - + heap_close(relation, NoLock); } diff --git a/src/backend/commands/creatinh.c b/src/backend/commands/creatinh.c index b6485850eb..75fd047392 100644 --- a/src/backend/commands/creatinh.c +++ b/src/backend/commands/creatinh.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/Attic/creatinh.c,v 1.64 2000/09/12 21:06:47 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/Attic/creatinh.c,v 1.65 2000/11/08 22:09:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -149,6 +149,20 @@ DefineRelation(CreateStmt *stmt, char relkind) StoreCatalogInheritance(relationId, inheritList); + /* + * We must bump the command counter to make the newly-created relation + * tuple visible for opening. + */ + CommandCounterIncrement(); + + /* + * Open the new relation and acquire exclusive lock on it. This isn't + * really necessary for locking out other backends (since they can't + * see the new rel anyway until we commit), but it keeps the lock manager + * from complaining about deadlock risks. + */ + rel = heap_openr(relname, AccessExclusiveLock); + /* * Now add any newly specified column default values and CHECK * constraints to the new relation. These are passed to us in the @@ -181,25 +195,11 @@ DefineRelation(CreateStmt *stmt, char relkind) rawDefaults = lappend(rawDefaults, rawEnt); } - /* If no raw defaults and no constraints, nothing to do. */ - if (rawDefaults == NIL && stmt->constraints == NIL) - return; - - /* - * We must bump the command counter to make the newly-created relation - * tuple visible for opening. - */ - CommandCounterIncrement(); - - /* - * Open the new relation. - */ - rel = heap_openr(relname, AccessExclusiveLock); - /* - * Parse and add the defaults/constraints. + * Parse and add the defaults/constraints, if any. */ - AddRelationRawConstraints(rel, rawDefaults, stmt->constraints); + if (rawDefaults || stmt->constraints) + AddRelationRawConstraints(rel, rawDefaults, stmt->constraints); /* * Clean up. We keep lock on new relation (although it shouldn't be diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index f9d1a92c75..fff6d56975 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/indexcmds.c,v 1.39 2000/10/22 23:32:39 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/indexcmds.c,v 1.40 2000/11/08 22:09:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -214,7 +214,7 @@ DefineIndex(char *heapRelationName, * backends to flush their relcache entries and in particular their * cached lists of the indexes for this relation. */ - setRelhasindexInplace(relationId, true, false); + setRelhasindex(relationId, true); } @@ -635,6 +635,15 @@ ReindexIndex(const char *name, bool force /* currently unused */ ) { HeapTuple tuple; + /* ---------------- + * REINDEX within a transaction block is dangerous, because + * if the transaction is later rolled back we have no way to + * undo truncation of the index's physical file. Disallow it. + * ---------------- + */ + if (IsTransactionBlock()) + elog(ERROR, "REINDEX cannot run inside a BEGIN/END block"); + tuple = SearchSysCacheTuple(RELNAME, PointerGetDatum(name), 0, 0, 0); @@ -666,6 +675,15 @@ ReindexTable(const char *name, bool force) { HeapTuple tuple; + /* ---------------- + * REINDEX within a transaction block is dangerous, because + * if the transaction is later rolled back we have no way to + * undo truncation of the index's physical file. Disallow it. + * ---------------- + */ + if (IsTransactionBlock()) + elog(ERROR, "REINDEX cannot run inside a BEGIN/END block"); + tuple = SearchSysCacheTuple(RELNAME, PointerGetDatum(name), 0, 0, 0); diff --git a/src/backend/commands/rename.c b/src/backend/commands/rename.c index 0f41cac1dc..6a9de4abf0 100644 --- a/src/backend/commands/rename.c +++ b/src/backend/commands/rename.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/Attic/rename.c,v 1.51 2000/10/22 23:32:39 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/Attic/rename.c,v 1.52 2000/11/08 22:09:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -183,16 +183,9 @@ renamerel(const char *oldrelname, const char *newrelname) Oid reloid; char relkind; Relation irelations[Num_pg_class_indices]; -#ifdef OLD_FILE_NAMING - int i; - char oldpath[MAXPGPATH], - newpath[MAXPGPATH], - toldpath[MAXPGPATH + 10], - tnewpath[MAXPGPATH + 10]; -#endif if (!allowSystemTableMods && IsSystemRelationName(oldrelname)) - elog(ERROR, "renamerel: system relation \"%s\" not renamed", + elog(ERROR, "renamerel: system relation \"%s\" may not be renamed", oldrelname); if (!allowSystemTableMods && IsSystemRelationName(newrelname)) @@ -201,7 +194,7 @@ renamerel(const char *oldrelname, const char *newrelname) /* * Check for renaming a temp table, which only requires altering - * the temp-table mapping, not the physical table. + * the temp-table mapping, not the underlying table. */ if (rename_temp_relation(oldrelname, newrelname)) return; /* all done... */ @@ -213,7 +206,7 @@ renamerel(const char *oldrelname, const char *newrelname) targetrelation = RelationNameGetRelation(oldrelname); if (!RelationIsValid(targetrelation)) - elog(ERROR, "Relation '%s' does not exist", oldrelname); + elog(ERROR, "Relation \"%s\" does not exist", oldrelname); /* * Grab an exclusive lock on the target table, which we will NOT @@ -221,46 +214,9 @@ renamerel(const char *oldrelname, const char *newrelname) */ LockRelation(targetrelation, AccessExclusiveLock); - /* ---------------- - * RENAME TABLE within a transaction block is dangerous, because - * if the transaction is later rolled back we have no way to - * undo the rename of the relation's physical file. For now, allow it - * but emit a warning message. - * Someday we might want to consider postponing the physical rename - * until transaction commit, but that's a lot of work... - * The only case that actually works right is for relations created - * in the current transaction, since the post-abort state would be that - * they don't exist anyway. So, no warning in that case. - * ---------------- - */ - if (IsTransactionBlock() && !targetrelation->rd_myxactonly) - elog(NOTICE, "Caution: RENAME TABLE cannot be rolled back, so don't abort now"); - reloid = RelationGetRelid(targetrelation); relkind = targetrelation->rd_rel->relkind; - /* - * Flush all blocks of the relation out of the buffer pool. We need - * this because the blocks are marked with the relation's name as well - * as OID. If some backend tries to write a dirty buffer with - * mdblindwrt after we've renamed the physical file, we'll be in big - * trouble. - * - * Since we hold the exclusive lock on the relation, we don't have to - * worry about more blocks being read in while we finish the rename. - */ - if (FlushRelationBuffers(targetrelation, (BlockNumber) 0) < 0) - elog(ERROR, "renamerel: unable to flush relation from buffer pool"); - - /* - * Make sure smgr and lower levels close the relation's files. (Next - * access to rel will reopen them.) - * - * Note: we rely on shared cache invalidation message to make other - * backends close and re-open the files. - */ - smgrclose(DEFAULT_SMGR, targetrelation); - /* * Close rel, but keep exclusive lock! */ @@ -271,8 +227,9 @@ renamerel(const char *oldrelname, const char *newrelname) * the right instant). It'll get rebuilt on next access to relation. * * XXX What if relation is myxactonly? + * + * XXX this is probably not necessary anymore? */ - targetrelation = NULL; /* make sure I don't touch it again */ RelationIdInvalidateRelationCacheByRelationId(reloid); /* @@ -291,7 +248,8 @@ renamerel(const char *oldrelname, const char *newrelname) elog(ERROR, "renamerel: relation \"%s\" exists", newrelname); /* - * Update pg_class tuple with new relname. + * Update pg_class tuple with new relname. (Scribbling on oldreltup + * is OK because it's a copy...) */ StrNCpy(NameStr(((Form_pg_class) GETSTRUCT(oldreltup))->relname), newrelname, NAMEDATALEN); @@ -310,36 +268,4 @@ renamerel(const char *oldrelname, const char *newrelname) */ if (relkind != RELKIND_INDEX) TypeRename(oldrelname, newrelname); - -#ifdef OLD_FILE_NAMING - /* - * Perform physical rename of files. If this fails, we haven't yet - * done anything irreversible. NOTE that this MUST be the last step; - * an error occurring afterwards would leave the relation hosed! - * - * XXX smgr.c ought to provide an interface for this; doing it directly - * is bletcherous. - */ - strcpy(oldpath, relpath(oldrelname)); - strcpy(newpath, relpath(newrelname)); - if (rename(oldpath, newpath) < 0) - elog(ERROR, "renamerel: unable to rename %s to %s: %m", - oldpath, newpath); - - /* rename additional segments of relation, too */ - for (i = 1;; i++) - { - sprintf(toldpath, "%s.%d", oldpath, i); - sprintf(tnewpath, "%s.%d", newpath, i); - if (rename(toldpath, tnewpath) < 0) - { - /* expected case is that there's not another segment file */ - if (errno == ENOENT) - break; - /* otherwise we're up the creek... */ - elog(ERROR, "renamerel: unable to rename %s to %s: %m", - toldpath, tnewpath); - } - } -#endif } diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 059bc42987..33340291e1 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/trigger.c,v 1.78 2000/10/16 17:08:05 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/trigger.c,v 1.79 2000/11/08 22:09:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -388,6 +388,7 @@ RelationRemoveTriggers(Relation rel) HeapScanDesc tgscan; ScanKeyData key; HeapTuple tup; + bool found = false; tgrel = heap_openr(TriggerRelationName, RowExclusiveLock); ScanKeyEntryInitialize(&key, 0, Anum_pg_trigger_tgrelid, @@ -403,17 +404,44 @@ RelationRemoveTriggers(Relation rel) DeleteComments(tup->t_data->t_oid); heap_delete(tgrel, &tup->t_self, NULL); + + found = true; } heap_endscan(tgscan); /* ---------- - * Need to bump it here so the following doesn't see - * the already deleted triggers again for a self-referencing - * table. + * If we deleted any triggers, must update pg_class entry and + * advance command counter to make the updated entry visible. + * This is fairly annoying, since we'e just going to drop the + * durn thing later, but it's necessary to have a consistent + * state in case we do CommandCounterIncrement() below --- + * if RelationBuildTriggers() runs, it will complain otherwise. + * Perhaps RelationBuildTriggers() shouldn't be so picky... * ---------- */ - CommandCounterIncrement(); + if (found) + { + Relation pgrel; + Relation ridescs[Num_pg_class_indices]; + + pgrel = heap_openr(RelationRelationName, RowExclusiveLock); + tup = SearchSysCacheTupleCopy(RELOID, + RelationGetRelid(rel), + 0, 0, 0); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "RelationRemoveTriggers: relation %u not found in pg_class", + RelationGetRelid(rel)); + + ((Form_pg_class) GETSTRUCT(tup))->reltriggers = 0; + heap_update(pgrel, &tup->t_self, tup, NULL); + CatalogOpenIndices(Num_pg_class_indices, Name_pg_class_indices, ridescs); + CatalogIndexInsert(ridescs, Num_pg_class_indices, pgrel, tup); + CatalogCloseIndices(Num_pg_class_indices, ridescs); + heap_freetuple(tup); + heap_close(pgrel, RowExclusiveLock); + CommandCounterIncrement(); + } /* ---------- * Also drop all constraint triggers referencing this relation @@ -431,22 +459,21 @@ RelationRemoveTriggers(Relation rel) pg_trigger = (Form_pg_trigger) GETSTRUCT(tup); - refrel = heap_open(pg_trigger->tgrelid, NoLock); + stmt.trigname = pstrdup(NameStr(pg_trigger->tgname)); + + /* May as well grab AccessExclusiveLock, since DropTrigger will. */ + refrel = heap_open(pg_trigger->tgrelid, AccessExclusiveLock); stmt.relname = pstrdup(RelationGetRelationName(refrel)); heap_close(refrel, NoLock); - stmt.trigname = DatumGetCString(DirectFunctionCall1(nameout, - NameGetDatum(&pg_trigger->tgname))); - - elog(NOTICE, "DROP TABLE implicitly drops referential integrity trigger from table \"%s\"", stmt.relname); DropTrigger(&stmt); /* ---------- * Need to do a command counter increment here to show up - * new pg_class.reltriggers in the next loop invocation already - * (there are multiple referential integrity action + * new pg_class.reltriggers in the next loop iteration + * (in case there are multiple referential integrity action * triggers for the same FK table defined on the PK table). * ---------- */ @@ -747,9 +774,6 @@ equalTriggerDescs(TriggerDesc *trigdesc1, TriggerDesc *trigdesc2) * We need not examine the "index" data, just the trigger array * itself; if we have the same triggers with the same types, the * derived index data should match. - * - * XXX It seems possible that the same triggers could appear in different - * orders in the two trigger arrays; do we need to handle that? */ if (trigdesc1 != NULL) { diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index cb47eda5c6..a396ec7871 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: execAmi.c,v 1.54 2000/10/26 21:35:15 tgl Exp $ + * $Id: execAmi.c,v 1.55 2000/11/08 22:09:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -61,11 +61,8 @@ static Pointer ExecBeginScan(Relation relation, int nkeys, ScanKey skeys, * nkeys -- number of keys * skeys -- keys to restrict scanning * isindex -- if this is true, the relation is the relid of - * an index relation, else it is an index into the - * range table. + * an index relation, else it is a heap relation. * Returns the relation as(relDesc scanDesc) - * If this structure is changed, need to modify the access macros - * defined in execInt.h. * ---------------------------------------------------------------- */ void @@ -90,16 +87,19 @@ ExecOpenScanR(Oid relOid, */ /* ---------------- - * open the relation with the correct call depending + * Open the relation with the correct call depending * on whether this is a heap relation or an index relation. * - * Do not lock the rel here; beginscan will acquire AccessShareLock. + * For a table, acquire AccessShareLock for the duration of the query + * execution. For indexes, acquire no lock here; the index machinery + * does its own locks and unlocks. (We rely on having some kind of + * lock on the parent table to ensure the index won't go away!) * ---------------- */ if (isindex) relation = index_open(relOid); else - relation = heap_open(relOid, NoLock); + relation = heap_open(relOid, AccessShareLock); scanDesc = ExecBeginScan(relation, nkeys, @@ -136,8 +136,6 @@ ExecBeginScan(Relation relation, { Pointer scanDesc; - scanDesc = NULL; - /* ---------------- * open the appropriate type of scan. * @@ -183,12 +181,11 @@ ExecCloseR(Plan *node) HeapScanDesc scanDesc; /* ---------------- - * shut down the heap scan and close the heap relation + * get state for node and shut down the heap scan, if any * ---------------- */ switch (nodeTag(node)) { - case T_SeqScan: state = ((SeqScan *) node)->scanstate; break; @@ -212,18 +209,9 @@ ExecCloseR(Plan *node) if (scanDesc != NULL) heap_endscan(scanDesc); - /* - * endscan released AccessShareLock acquired by beginscan. If we are - * holding any stronger locks on the rel, they should be held till end - * of xact. Therefore, we need only close the rel and not release - * locks. - */ - if (relation != NULL) - heap_close(relation, NoLock); - /* ---------------- * if this is an index scan then we have to take care - * of the index relations as well.. + * of the index relations as well. * ---------------- */ if (IsA(node, IndexScan)) @@ -242,7 +230,7 @@ ExecCloseR(Plan *node) for (i = 0; i < numIndices; i++) { /* ---------------- - * shut down each of the scans and + * shut down each of the index scans and * close each of the index relations * ---------------- */ @@ -253,6 +241,16 @@ ExecCloseR(Plan *node) index_close(indexRelationDescs[i]); } } + + /* + * Finally, close the heap relation. + * + * Currently, we do not release the AccessShareLock acquired by + * ExecOpenScanR. This lock should be held till end of transaction. + * (There is a faction that considers this too much locking, however.) + */ + if (relation != NULL) + heap_close(relation, NoLock); } /* ---------------------------------------------------------------- diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 93edc9f878..63f39f2e4c 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: analyze.c,v 1.164 2000/11/05 01:42:07 tgl Exp $ + * $Id: analyze.c,v 1.165 2000/11/08 22:09:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -115,7 +115,7 @@ static void release_pstate_resources(ParseState *pstate) { if (pstate->p_target_relation != NULL) - heap_close(pstate->p_target_relation, AccessShareLock); + heap_close(pstate->p_target_relation, NoLock); pstate->p_target_relation = NULL; pstate->p_target_rangetblentry = NULL; } @@ -292,6 +292,7 @@ transformDeleteStmt(ParseState *pstate, DeleteStmt *stmt) qry->commandType = CMD_DELETE; /* set up a range table */ + lockTargetTable(pstate, stmt->relname); makeRangeTable(pstate, NIL); setTargetTable(pstate, stmt->relname, stmt->inh, true); @@ -331,6 +332,13 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) qry->commandType = CMD_INSERT; pstate->p_is_insert = true; + /* + * Must get write lock on target table before scanning SELECT, + * else we will grab the wrong kind of initial lock if the target + * table is also mentioned in the SELECT part. + */ + lockTargetTable(pstate, stmt->relname); + /* * Is it INSERT ... SELECT or INSERT ... VALUES? */ @@ -1521,6 +1529,16 @@ transformRuleStmt(ParseState *pstate, RuleStmt *stmt) qry->commandType = CMD_UTILITY; qry->utilityStmt = (Node *) stmt; + /* + * To avoid deadlock, make sure the first thing we do is grab + * AccessExclusiveLock on the target relation. This will be + * needed by DefineQueryRewrite(), and we don't want to grab a lesser + * lock beforehand. We don't need to hold a refcount on the relcache + * entry, however. + */ + heap_close(heap_openr(stmt->object->relname, AccessExclusiveLock), + NoLock); + /* * NOTE: 'OLD' must always have a varno equal to 1 and 'NEW' * equal to 2. Set up their RTEs in the main pstate for use @@ -1727,6 +1745,9 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt) qry->isBinary = FALSE; } + /* make FOR UPDATE clause available to addRangeTableEntry */ + pstate->p_forUpdate = stmt->forUpdate; + /* set up a range table */ makeRangeTable(pstate, stmt->fromClause); @@ -1765,7 +1786,7 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt) qry->rtable = pstate->p_rtable; qry->jointree = makeFromExpr(pstate->p_joinlist, qual); - if (stmt->forUpdate != NULL) + if (stmt->forUpdate != NIL) transformForUpdate(qry, stmt->forUpdate); return qry; @@ -1951,7 +1972,7 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt) qry->rtable = pstate->p_rtable; qry->jointree = makeFromExpr(pstate->p_joinlist, NULL); - if (forUpdate != NULL) + if (forUpdate != NIL) transformForUpdate(qry, forUpdate); return qry; @@ -2159,6 +2180,7 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt) * used in FROM, we'd fail to notice that it should be marked * checkForRead as well as checkForWrite. See setTargetTable(). */ + lockTargetTable(pstate, stmt->relname); makeRangeTable(pstate, stmt->fromClause); setTargetTable(pstate, stmt->relname, stmt->inh, true); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 4fbc628c58..5572828d25 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.207 2000/11/08 21:28:06 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.208 2000/11/08 22:09:58 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -334,7 +334,7 @@ static void doNegateFloat(Value *v); * when some sort of pg_privileges relation is introduced. * - Todd A. Brandys 1998-01-01? */ -%token ABORT_TRANS, ACCESS, AFTER, AGGREGATE, ANALYZE, ANALYSE /* British */ +%token ABORT_TRANS, ACCESS, AFTER, AGGREGATE, ANALYZE, ANALYSE, BACKWARD, BEFORE, BINARY, BIT, CACHE, CHECKPOINT, CLUSTER, COMMENT, COPY, CREATEDB, CREATEUSER, CYCLE, DATABASE, DELIMITERS, DO, @@ -2466,11 +2466,7 @@ ExtendStmt: EXTEND INDEX index_name where_clause /* NOT USED RecipeStmt: EXECUTE RECIPE recipe_name { - RecipeStmt *n; - if (!IsTransactionBlock()) - elog(ERROR,"EXECUTE RECIPE may only be used in begin/end transaction blocks"); - - n = makeNode(RecipeStmt); + RecipeStmt *n = makeNode(RecipeStmt); n->recipeName = $3; $$ = (Node *)n; } @@ -2633,8 +2629,6 @@ oper_argtypes: Typename ReindexStmt: REINDEX reindex_type name opt_force { ReindexStmt *n = makeNode(ReindexStmt); - if (IsTransactionBlock()) - elog(ERROR,"REINDEX command could only be used outside begin/end transaction blocks"); n->reindexType = $2; n->name = $3; n->force = $4; diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index 38dc3ea097..60521d1347 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/parse_clause.c,v 1.70 2000/10/07 00:58:17 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/parse_clause.c,v 1.71 2000/11/08 22:09:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -87,6 +87,34 @@ makeRangeTable(ParseState *pstate, List *frmList) } } +/* + * lockTargetTable + * Find the target relation of INSERT/UPDATE/DELETE and acquire write + * lock on it. This must be done before building the range table, + * in case the target is also mentioned as a source relation --- we + * want to be sure to grab the write lock before any read lock. + * + * The ParseState's link to the target relcache entry is also set here. + */ +void +lockTargetTable(ParseState *pstate, char *relname) +{ + /* Close old target; this could only happen for multi-action rules */ + if (pstate->p_target_relation != NULL) + heap_close(pstate->p_target_relation, NoLock); + pstate->p_target_relation = NULL; + pstate->p_target_rangetblentry = NULL; /* setTargetTable will set this */ + + /* + * Open target rel and grab suitable lock (which we will hold till + * end of transaction). + * + * analyze.c will eventually do the corresponding heap_close(), + * but *not* release the lock. + */ + pstate->p_target_relation = heap_openr(relname, RowExclusiveLock); +} + /* * setTargetTable * Add the target relation of INSERT/UPDATE/DELETE to the range table, @@ -133,13 +161,10 @@ setTargetTable(ParseState *pstate, char *relname, bool inh, bool inJoinSet) if (inJoinSet) addRTEtoJoinList(pstate, rte); - /* This could only happen for multi-action rules */ - if (pstate->p_target_relation != NULL) - heap_close(pstate->p_target_relation, AccessShareLock); + /* lockTargetTable should have been called earlier */ + Assert(pstate->p_target_relation != NULL); pstate->p_target_rangetblentry = rte; - pstate->p_target_relation = heap_open(rte->relid, AccessShareLock); - /* will close relation later, see analyze.c */ } diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index 3fccd95cb1..984485f9b4 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.49 2000/09/29 18:21:36 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.50 2000/11/08 22:09:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -34,6 +34,7 @@ static Node *scanRTEForColumn(ParseState *pstate, RangeTblEntry *rte, char *colname); static Node *scanJoinForColumn(JoinExpr *join, char *colname, int sublevels_up); +static bool isForUpdate(ParseState *pstate, char *relname); static List *expandNamesVars(ParseState *pstate, List *names, List *vars); static void warnAutoRange(ParseState *pstate, char *refname); @@ -477,6 +478,7 @@ addRangeTableEntry(ParseState *pstate, bool inFromCl) { char *refname = alias ? alias->relname : relname; + LOCKMODE lockmode; Relation rel; RangeTblEntry *rte; Attr *eref; @@ -502,17 +504,22 @@ addRangeTableEntry(ParseState *pstate, /* * Get the rel's OID. This access also ensures that we have an - * up-to-date relcache entry for the rel. We don't need to keep it - * open, however. Since this is open anyway, let's check that the - * number of column aliases is reasonable. - Thomas 2000-02-04 + * up-to-date relcache entry for the rel. Since this is typically + * the first access to a rel in a statement, be careful to get the + * right access level depending on whether we're doing SELECT FOR UPDATE. */ - rel = heap_openr(relname, AccessShareLock); + lockmode = isForUpdate(pstate, relname) ? RowShareLock : AccessShareLock; + rel = heap_openr(relname, lockmode); rte->relid = RelationGetRelid(rel); - maxattrs = RelationGetNumberOfAttributes(rel); eref = alias ? (Attr *) copyObject(alias) : makeAttr(refname, NULL); numaliases = length(eref->attrs); + /* + * Since the rel is open anyway, let's check that the + * number of column aliases is reasonable. - Thomas 2000-02-04 + */ + maxattrs = RelationGetNumberOfAttributes(rel); if (maxattrs < numaliases) elog(ERROR, "Table \"%s\" has %d columns available but %d columns specified", refname, maxattrs, numaliases); @@ -527,7 +534,12 @@ addRangeTableEntry(ParseState *pstate, } rte->eref = eref; - heap_close(rel, AccessShareLock); + /* + * Drop the rel refcount, but keep the access lock till end of transaction + * so that the table can't be deleted or have its schema modified + * underneath us. + */ + heap_close(rel, NoLock); /*---------- * Flags: @@ -643,6 +655,41 @@ addRangeTableEntryForSubquery(ParseState *pstate, return rte; } +/* + * Has the specified relname been selected FOR UPDATE? + */ +static bool +isForUpdate(ParseState *pstate, char *relname) +{ + /* Outer loop to check parent query levels as well as this one */ + while (pstate != NULL) + { + if (pstate->p_forUpdate != NIL) + { + if (lfirst(pstate->p_forUpdate) == NULL) + { + /* all tables used in query */ + return true; + } + else + { + /* just the named tables */ + List *l; + + foreach(l, pstate->p_forUpdate) + { + char *rname = lfirst(l); + + if (strcmp(relname, rname) == 0) + return true; + } + } + } + pstate = pstate->parentParseState; + } + return false; +} + /* * Add the given RTE as a top-level entry in the pstate's join list, * unless there already is an entry for it. diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index d0fe6a5ee1..a0a9d5671e 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/rewrite/rewriteHandler.c,v 1.82 2000/10/05 19:11:34 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/rewrite/rewriteHandler.c,v 1.83 2000/11/08 22:09:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -363,14 +363,6 @@ static Query * fireRIRrules(Query *parsetree) { int rt_index; - RangeTblEntry *rte; - Relation rel; - List *locks; - RuleLock *rules; - RewriteRule *rule; - bool relIsUsed; - int i; - List *l; /* * don't try to convert this into a foreach loop, because rtable list @@ -379,6 +371,16 @@ fireRIRrules(Query *parsetree) rt_index = 0; while (rt_index < length(parsetree->rtable)) { + RangeTblEntry *rte; + Relation rel; + List *locks; + RuleLock *rules; + RewriteRule *rule; + LOCKMODE lockmode; + bool relIsUsed; + int i; + List *l; + ++rt_index; rte = rt_fetch(rt_index, parsetree->rtable); @@ -406,11 +408,32 @@ fireRIRrules(Query *parsetree) if (!relIsUsed && rt_index != parsetree->resultRelation) continue; - rel = heap_openr(rte->relname, AccessShareLock); + /* + * This may well be the first access to the relation during + * the current statement (it will be, if this Query was extracted + * from a rule or somehow got here other than via the parser). + * Therefore, grab the appropriate lock type for the relation, + * and do not release it until end of transaction. This protects + * the rewriter and planner against schema changes mid-query. + * + * If the relation is the query's result relation, then RewriteQuery() + * already got the right lock on it, so we need no additional lock. + * Otherwise, check to see if the relation is accessed FOR UPDATE + * or not. + */ + if (rt_index == parsetree->resultRelation) + lockmode = NoLock; + else if (intMember(rt_index, parsetree->rowMarks)) + lockmode = RowShareLock; + else + lockmode = AccessShareLock; + + rel = heap_openr(rte->relname, lockmode); + rules = rel->rd_rules; if (rules == NULL) { - heap_close(rel, AccessShareLock); + heap_close(rel, NoLock); continue; } @@ -450,7 +473,7 @@ fireRIRrules(Query *parsetree) relIsUsed); } - heap_close(rel, AccessShareLock); + heap_close(rel, NoLock); } /* @@ -761,8 +784,19 @@ RewriteQuery(Query *parsetree, bool *instead_flag, List **qual_products) * the statement is an update, insert or delete - fire rules on it. */ result_relation = parsetree->resultRelation; + Assert(result_relation != 0); rt_entry = rt_fetch(result_relation, parsetree->rtable); - rt_entry_relation = heap_openr(rt_entry->relname, AccessShareLock); + + /* + * This may well be the first access to the result relation during + * the current statement (it will be, if this Query was extracted + * from a rule or somehow got here other than via the parser). + * Therefore, grab the appropriate lock type for a result relation, + * and do not release it until end of transaction. This protects the + * rewriter and planner against schema changes mid-query. + */ + rt_entry_relation = heap_openr(rt_entry->relname, RowExclusiveLock); + rt_entry_locks = rt_entry_relation->rd_rules; if (rt_entry_locks != NULL) @@ -778,7 +812,7 @@ RewriteQuery(Query *parsetree, bool *instead_flag, List **qual_products) qual_products); } - heap_close(rt_entry_relation, AccessShareLock); + heap_close(rt_entry_relation, NoLock); /* keep lock! */ return product_queries; } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 9c9bda5035..8d40e8d952 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.92 2000/10/28 16:20:55 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.93 2000/11/08 22:09:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -709,23 +709,28 @@ refcount = %ld, file: %s, line: %d\n", #endif /* - * FlushBuffer -- like WriteBuffer, but force the page to disk. + * FlushBuffer -- like WriteBuffer, but write the page immediately, + * rather than just marking it dirty. On success return, the buffer will + * no longer be dirty. * * 'buffer' is known to be dirty/pinned, so there should not be a * problem reading the BufferDesc members without the BufMgrLock * (nobody should be able to change tags out from under us). * - * Unpin if 'release' is TRUE. + * If 'sync' is true, a synchronous write is wanted (wait for buffer to hit + * the disk). Otherwise it's sufficient to issue the kernel write call. + * + * Unpin buffer if 'release' is true. */ int -FlushBuffer(Buffer buffer, bool release) +FlushBuffer(Buffer buffer, bool sync, bool release) { BufferDesc *bufHdr; Relation bufrel; int status; if (BufferIsLocal(buffer)) - return FlushLocalBuffer(buffer, release) ? STATUS_OK : STATUS_ERROR; + return FlushLocalBuffer(buffer, sync, release) ? STATUS_OK : STATUS_ERROR; if (BAD_BUFFER_ID(buffer)) return STATUS_ERROR; @@ -755,12 +760,16 @@ FlushBuffer(Buffer buffer, bool release) */ LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_SHARE); - status = smgrflush(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); + if (sync) + status = smgrflush(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + else + status = smgrwrite(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_UNLOCK); - /* drop relcache refcnt incremented by RelationIdCacheGetRelation */ + /* drop relcache refcnt incremented by RelationNodeCacheGetRelation */ RelationDecrementReferenceCount(bufrel); if (status == SM_FAIL) @@ -926,7 +935,7 @@ SetBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr) /* * drop relcache refcnt incremented by - * RelationIdCacheGetRelation + * RelationNodeCacheGetRelation */ RelationDecrementReferenceCount(reln); } @@ -1123,7 +1132,7 @@ BufferSync() bufHdr->flags &= ~BM_DIRTY; } - /* drop refcnt obtained by RelationIdCacheGetRelation */ + /* drop refcnt obtained by RelationNodeCacheGetRelation */ if (reln != (Relation) NULL) RelationDecrementReferenceCount(reln); } @@ -1154,7 +1163,7 @@ BufferSync() /* * drop relcache refcnt incremented by - * RelationIdCacheGetRelation + * RelationNodeCacheGetRelation */ RelationDecrementReferenceCount(reln); @@ -1458,7 +1467,7 @@ BufferReplace(BufferDesc *bufHdr) SpinAcquire(BufMgrLock); - /* drop relcache refcnt incremented by RelationIdCacheGetRelation */ + /* drop relcache refcnt incremented by RelationNodeCacheGetRelation */ if (reln != (Relation) NULL) RelationDecrementReferenceCount(reln); @@ -1495,21 +1504,23 @@ RelationGetNumberOfBlocks(Relation relation) } /* --------------------------------------------------------------------- - * ReleaseRelationBuffers + * DropRelationBuffers * * This function removes all the buffered pages for a relation * from the buffer pool. Dirty pages are simply dropped, without - * bothering to write them out first. This is used when the - * relation is about to be deleted. We assume that the caller - * holds an exclusive lock on the relation, which should assure - * that no new buffers will be acquired for the rel meanwhile. + * bothering to write them out first. This is NOT rollback-able, + * and so should be used only with extreme caution! + * + * We assume that the caller holds an exclusive lock on the relation, + * which should assure that no new buffers will be acquired for the rel + * meanwhile. * * XXX currently it sequentially searches the buffer pool, should be * changed to more clever ways of searching. * -------------------------------------------------------------------- */ void -ReleaseRelationBuffers(Relation rel) +DropRelationBuffers(Relation rel) { int i; BufferDesc *bufHdr; @@ -1589,7 +1600,104 @@ recheck: * this rel, since we hold exclusive lock on this rel. */ if (RelFileNodeEquals(rel->rd_node, - BufferTagLastDirtied[i - 1].rnode)) + BufferTagLastDirtied[i - 1].rnode)) + BufferDirtiedByMe[i - 1] = false; + } + + SpinRelease(BufMgrLock); +} + +/* --------------------------------------------------------------------- + * DropRelFileNodeBuffers + * + * This is the same as DropRelationBuffers, except that the target + * relation is specified by RelFileNode. + * + * This is NOT rollback-able. One legitimate use is to clear the + * buffer cache of buffers for a relation that is being deleted + * during transaction abort. + * -------------------------------------------------------------------- + */ +void +DropRelFileNodeBuffers(RelFileNode rnode) +{ + int i; + BufferDesc *bufHdr; + + /* We have to search both local and shared buffers... */ + + for (i = 0; i < NLocBuffer; i++) + { + bufHdr = &LocalBufferDescriptors[i]; + if (RelFileNodeEquals(bufHdr->tag.rnode, rnode)) + { + bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); + LocalRefCount[i] = 0; + bufHdr->tag.rnode.relNode = InvalidOid; + } + } + + SpinAcquire(BufMgrLock); + for (i = 1; i <= NBuffers; i++) + { + bufHdr = &BufferDescriptors[i - 1]; +recheck: + if (RelFileNodeEquals(bufHdr->tag.rnode, rnode)) + { + + /* + * If there is I/O in progress, better wait till it's done; + * don't want to delete the relation out from under someone + * who's just trying to flush the buffer! + */ + if (bufHdr->flags & BM_IO_IN_PROGRESS) + { + WaitIO(bufHdr, BufMgrLock); + + /* + * By now, the buffer very possibly belongs to some other + * rel, so check again before proceeding. + */ + goto recheck; + } + /* Now we can do what we came for */ + bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); + + /* + * Release any refcount we may have. + * + * This is very probably dead code, and if it isn't then it's + * probably wrong. I added the Assert to find out --- tgl + * 11/99. + */ + if (!(bufHdr->flags & BM_FREE)) + { + /* Assert checks that buffer will actually get freed! */ + Assert(PrivateRefCount[i - 1] == 1 && + bufHdr->refcount == 1); + /* ReleaseBuffer expects we do not hold the lock at entry */ + SpinRelease(BufMgrLock); + ReleaseBuffer(i); + SpinAcquire(BufMgrLock); + } + /* + * And mark the buffer as no longer occupied by this rel. + */ + BufTableDelete(bufHdr); + } + + /* + * Also check to see if BufferDirtiedByMe info for this buffer + * refers to the target relation, and clear it if so. This is + * independent of whether the current contents of the buffer + * belong to the target relation! + * + * NOTE: we have no way to clear BufferDirtiedByMe info in other + * backends, but hopefully there are none with that bit set for + * this rel, since we hold exclusive lock on this rel. + */ + if (RelFileNodeEquals(rnode, + BufferTagLastDirtied[i - 1].rnode)) BufferDirtiedByMe[i - 1] = false; } @@ -1604,7 +1712,7 @@ recheck: * bothering to write them out first. This is used when we destroy a * database, to avoid trying to flush data to disk when the directory * tree no longer exists. Implementation is pretty similar to - * ReleaseRelationBuffers() which is for destroying just one relation. + * DropRelationBuffers() which is for destroying just one relation. * -------------------------------------------------------------------- */ void @@ -1757,33 +1865,32 @@ BufferPoolBlowaway() /* --------------------------------------------------------------------- * FlushRelationBuffers * - * This function flushes all dirty pages of a relation out to disk. + * This function writes all dirty pages of a relation out to disk. * Furthermore, pages that have blocknumber >= firstDelBlock are * actually removed from the buffer pool. An error code is returned * if we fail to dump a dirty buffer or if we find one of * the target pages is pinned into the cache. * - * This is used by VACUUM before truncating the relation to the given - * number of blocks. (TRUNCATE TABLE also uses it in the same way.) - * It might seem unnecessary to flush dirty pages before firstDelBlock, - * since VACUUM should already have committed its changes. However, - * it is possible for there still to be dirty pages: if some page - * had unwritten on-row tuple status updates from a prior transaction, - * and VACUUM had no additional changes to make to that page, then - * VACUUM won't have written it. This is harmless in most cases but - * will break pg_upgrade, which relies on VACUUM to ensure that *all* - * tuples have correct on-row status. So, we check and flush all - * dirty pages of the rel regardless of block number. - * - * This is also used by RENAME TABLE (with firstDelBlock = 0) - * to clear out the buffer cache before renaming the physical files of - * a relation. Without that, some other backend might try to do a - * blind write of a buffer page (relying on the BlindId of the buffer) - * and fail because it's not got the right filename anymore. + * This is called by DROP TABLE to clear buffers for the relation + * from the buffer pool. Note that we must write dirty buffers, + * rather than just dropping the changes, because our transaction + * might abort later on; we want to roll back safely in that case. + * + * This is also called by VACUUM before truncating the relation to the + * given number of blocks. It might seem unnecessary for VACUUM to + * write dirty pages before firstDelBlock, since VACUUM should already + * have committed its changes. However, it is possible for there still + * to be dirty pages: if some page had unwritten on-row tuple status + * updates from a prior transaction, and VACUUM had no additional + * changes to make to that page, then VACUUM won't have written it. + * This is harmless in most cases but will break pg_upgrade, which + * relies on VACUUM to ensure that *all* tuples have correct on-row + * status. So, we check and flush all dirty pages of the rel + * regardless of block number. * * In all cases, the caller should be holding AccessExclusiveLock on * the target relation to ensure that no other backend is busy reading - * more blocks of the relation. + * more blocks of the relation (or might do so before we commit). * * Formerly, we considered it an error condition if we found dirty * buffers here. However, since BufferSync no longer forces out all @@ -1812,7 +1919,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) { if (bufHdr->flags & BM_DIRTY) { - if (FlushBuffer(-i - 1, false) != STATUS_OK) + if (FlushBuffer(-i - 1, false, false) != STATUS_OK) { elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is dirty, could not flush it", RelationGetRelationName(rel), firstDelBlock, @@ -1840,15 +1947,17 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) for (i = 0; i < NBuffers; i++) { bufHdr = &BufferDescriptors[i]; -recheck: if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node)) { if (bufHdr->flags & BM_DIRTY) { PinBuffer(bufHdr); SpinRelease(BufMgrLock); - if (FlushBuffer(i + 1, true) != STATUS_OK) + if (FlushBuffer(i + 1, false, false) != STATUS_OK) { + SpinAcquire(BufMgrLock); + UnpinBuffer(bufHdr); + SpinRelease(BufMgrLock); elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is dirty (private %ld, global %d), could not flush it", RelationGetRelationName(rel), firstDelBlock, bufHdr->tag.blockNum, @@ -1856,12 +1965,7 @@ recheck: return -1; } SpinAcquire(BufMgrLock); - - /* - * Buffer could already be reassigned, so must recheck - * whether it still belongs to rel before freeing it! - */ - goto recheck; + UnpinBuffer(bufHdr); } if (!(bufHdr->flags & BM_FREE)) { diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index faa3304b4f..352f519bdc 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -16,7 +16,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/localbuf.c,v 1.33 2000/10/28 16:20:56 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/localbuf.c,v 1.34 2000/11/08 22:09:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -183,7 +183,7 @@ WriteLocalBuffer(Buffer buffer, bool release) * flushes a local buffer */ int -FlushLocalBuffer(Buffer buffer, bool release) +FlushLocalBuffer(Buffer buffer, bool sync, bool release) { int bufid; Relation bufrel; @@ -199,13 +199,18 @@ FlushLocalBuffer(Buffer buffer, bool release) bufHdr = &LocalBufferDescriptors[bufid]; bufHdr->flags &= ~BM_DIRTY; bufrel = RelationNodeCacheGetRelation(bufHdr->tag.rnode); - Assert(bufrel != NULL); - smgrflush(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); + + if (sync) + smgrflush(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + else + smgrwrite(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + LocalBufferFlushCount++; - /* drop relcache refcount incremented by RelationIdCacheGetRelation */ + /* drop relcache refcount incremented by RelationNodeCacheGetRelation */ RelationDecrementReferenceCount(bufrel); if (release) diff --git a/src/backend/storage/buffer/xlog_bufmgr.c b/src/backend/storage/buffer/xlog_bufmgr.c index dcd377b7eb..15c4321405 100644 --- a/src/backend/storage/buffer/xlog_bufmgr.c +++ b/src/backend/storage/buffer/xlog_bufmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/Attic/xlog_bufmgr.c,v 1.1 2000/10/28 16:20:56 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/Attic/xlog_bufmgr.c,v 1.2 2000/11/08 22:09:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -838,7 +838,7 @@ BufferSync() SpinRelease(BufMgrLock); - /* drop refcnt obtained by RelationIdCacheGetRelation */ + /* drop refcnt obtained by RelationNodeCacheGetRelation */ if (reln != (Relation) NULL) { RelationDecrementReferenceCount(reln); @@ -1128,7 +1128,7 @@ BufferReplace(BufferDesc *bufHdr) false); /* no fsync */ } - /* drop relcache refcnt incremented by RelationIdCacheGetRelation */ + /* drop relcache refcnt incremented by RelationNodeCacheGetRelation */ if (reln != (Relation) NULL) RelationDecrementReferenceCount(reln); @@ -1159,21 +1159,23 @@ RelationGetNumberOfBlocks(Relation relation) } /* --------------------------------------------------------------------- - * ReleaseRelationBuffers + * DropRelationBuffers * * This function removes all the buffered pages for a relation * from the buffer pool. Dirty pages are simply dropped, without - * bothering to write them out first. This is used when the - * relation is about to be deleted. We assume that the caller - * holds an exclusive lock on the relation, which should assure - * that no new buffers will be acquired for the rel meanwhile. + * bothering to write them out first. This is NOT rollback-able, + * and so should be used only with extreme caution! + * + * We assume that the caller holds an exclusive lock on the relation, + * which should assure that no new buffers will be acquired for the rel + * meanwhile. * * XXX currently it sequentially searches the buffer pool, should be * changed to more clever ways of searching. * -------------------------------------------------------------------- */ void -ReleaseRelationBuffers(Relation rel) +DropRelationBuffers(Relation rel) { int i; BufferDesc *bufHdr; @@ -1248,6 +1250,91 @@ recheck: SpinRelease(BufMgrLock); } +/* --------------------------------------------------------------------- + * DropRelFileNodeBuffers + * + * This is the same as DropRelationBuffers, except that the target + * relation is specified by RelFileNode. + * + * This is NOT rollback-able. One legitimate use is to clear the + * buffer cache of buffers for a relation that is being deleted + * during transaction abort. + * -------------------------------------------------------------------- + */ +void +DropRelFileNodeBuffers(RelFileNode rnode) +{ + int i; + BufferDesc *bufHdr; + + /* We have to search both local and shared buffers... */ + + for (i = 0; i < NLocBuffer; i++) + { + bufHdr = &LocalBufferDescriptors[i]; + if (RelFileNodeEquals(bufHdr->tag.rnode, rnode)) + { + bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); + bufHdr->cntxDirty = false; + LocalRefCount[i] = 0; + bufHdr->tag.rnode.relNode = InvalidOid; + } + } + + SpinAcquire(BufMgrLock); + for (i = 1; i <= NBuffers; i++) + { + bufHdr = &BufferDescriptors[i - 1]; +recheck: + if (RelFileNodeEquals(bufHdr->tag.rnode, rnode)) + { + + /* + * If there is I/O in progress, better wait till it's done; + * don't want to delete the relation out from under someone + * who's just trying to flush the buffer! + */ + if (bufHdr->flags & BM_IO_IN_PROGRESS) + { + WaitIO(bufHdr, BufMgrLock); + + /* + * By now, the buffer very possibly belongs to some other + * rel, so check again before proceeding. + */ + goto recheck; + } + /* Now we can do what we came for */ + bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); + bufHdr->cntxDirty = false; + + /* + * Release any refcount we may have. + * + * This is very probably dead code, and if it isn't then it's + * probably wrong. I added the Assert to find out --- tgl + * 11/99. + */ + if (!(bufHdr->flags & BM_FREE)) + { + /* Assert checks that buffer will actually get freed! */ + Assert(PrivateRefCount[i - 1] == 1 && + bufHdr->refcount == 1); + /* ReleaseBuffer expects we do not hold the lock at entry */ + SpinRelease(BufMgrLock); + ReleaseBuffer(i); + SpinAcquire(BufMgrLock); + } + /* + * And mark the buffer as no longer occupied by this rel. + */ + BufTableDelete(bufHdr); + } + } + + SpinRelease(BufMgrLock); +} + /* --------------------------------------------------------------------- * DropBuffers * @@ -1256,7 +1343,7 @@ recheck: * bothering to write them out first. This is used when we destroy a * database, to avoid trying to flush data to disk when the directory * tree no longer exists. Implementation is pretty similar to - * ReleaseRelationBuffers() which is for destroying just one relation. + * DropRelationBuffers() which is for destroying just one relation. * -------------------------------------------------------------------- */ void @@ -1399,33 +1486,32 @@ BufferPoolBlowaway() /* --------------------------------------------------------------------- * FlushRelationBuffers * - * This function flushes all dirty pages of a relation out to disk. + * This function writes all dirty pages of a relation out to disk. * Furthermore, pages that have blocknumber >= firstDelBlock are * actually removed from the buffer pool. An error code is returned * if we fail to dump a dirty buffer or if we find one of * the target pages is pinned into the cache. * - * This is used by VACUUM before truncating the relation to the given - * number of blocks. (TRUNCATE TABLE also uses it in the same way.) - * It might seem unnecessary to flush dirty pages before firstDelBlock, - * since VACUUM should already have committed its changes. However, - * it is possible for there still to be dirty pages: if some page - * had unwritten on-row tuple status updates from a prior transaction, - * and VACUUM had no additional changes to make to that page, then - * VACUUM won't have written it. This is harmless in most cases but - * will break pg_upgrade, which relies on VACUUM to ensure that *all* - * tuples have correct on-row status. So, we check and flush all - * dirty pages of the rel regardless of block number. + * This is called by DROP TABLE to clear buffers for the relation + * from the buffer pool. Note that we must write dirty buffers, + * rather than just dropping the changes, because our transaction + * might abort later on; we want to roll back safely in that case. * - * This is also used by RENAME TABLE (with firstDelBlock = 0) - * to clear out the buffer cache before renaming the physical files of - * a relation. Without that, some other backend might try to do a - * blind write of a buffer page (relying on the BlindId of the buffer) - * and fail because it's not got the right filename anymore. + * This is also called by VACUUM before truncating the relation to the + * given number of blocks. It might seem unnecessary for VACUUM to + * write dirty pages before firstDelBlock, since VACUUM should already + * have committed its changes. However, it is possible for there still + * to be dirty pages: if some page had unwritten on-row tuple status + * updates from a prior transaction, and VACUUM had no additional + * changes to make to that page, then VACUUM won't have written it. + * This is harmless in most cases but will break pg_upgrade, which + * relies on VACUUM to ensure that *all* tuples have correct on-row + * status. So, we check and flush all dirty pages of the rel + * regardless of block number. * * In all cases, the caller should be holding AccessExclusiveLock on * the target relation to ensure that no other backend is busy reading - * more blocks of the relation. + * more blocks of the relation (or might do so before we commit). * * Formerly, we considered it an error condition if we found dirty * buffers here. However, since BufferSync no longer forces out all diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 23a2dcf1e2..14325e5318 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lock.c,v 1.71 2000/07/17 03:05:08 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lock.c,v 1.72 2000/11/08 22:10:00 tgl Exp $ * * NOTES * Outside modules can create a lock table and acquire/release @@ -453,7 +453,7 @@ LockMethodTableRename(LOCKMETHOD lockmethod) bool LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, LOCKMODE lockmode) { - XIDLookupEnt *result, + XIDLookupEnt *xident, item; HTAB *xidTable; bool found; @@ -559,9 +559,9 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, LOCKMODE lockmode) /* * Find or create an xid entry with this tag */ - result = (XIDLookupEnt *) hash_search(xidTable, (Pointer) &item, + xident = (XIDLookupEnt *) hash_search(xidTable, (Pointer) &item, HASH_ENTER, &found); - if (!result) + if (!xident) { SpinRelease(masterLock); elog(NOTICE, "LockAcquire: xid table corrupted"); @@ -573,16 +573,41 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, LOCKMODE lockmode) */ if (!found) { - result->nHolding = 0; - MemSet((char *) result->holders, 0, sizeof(int) * MAX_LOCKMODES); - ProcAddLock(&result->queue); - XID_PRINT("LockAcquire: new", result); + xident->nHolding = 0; + MemSet((char *) xident->holders, 0, sizeof(int) * MAX_LOCKMODES); + ProcAddLock(&xident->queue); + XID_PRINT("LockAcquire: new", xident); } else { - XID_PRINT("LockAcquire: found", result); - Assert((result->nHolding > 0) && (result->holders[lockmode] >= 0)); - Assert(result->nHolding <= lock->nActive); + int i; + + XID_PRINT("LockAcquire: found", xident); + Assert((xident->nHolding > 0) && (xident->holders[lockmode] >= 0)); + Assert(xident->nHolding <= lock->nActive); + /* + * Issue warning if we already hold a lower-level lock on this + * object and do not hold a lock of the requested level or higher. + * This indicates a deadlock-prone coding practice (eg, we'd have + * a deadlock if another backend were following the same code path + * at about the same time). + * + * XXX Doing numeric comparison on the lockmodes is a hack; + * it'd be better to use a table. For now, though, this works. + */ + for (i = lockMethodTable->ctl->numLockModes; i > 0; i--) + { + if (xident->holders[i] > 0) + { + if (i >= (int) lockmode) + break; /* safe: we have a lock >= req level */ + elog(DEBUG, "Deadlock risk: raising lock level" + " from %s to %s on object %u/%u/%u", + lock_types[i], lock_types[lockmode], + lock->tag.relId, lock->tag.dbId, lock->tag.objId.blkno); + break; + } + } } /* ---------------- @@ -601,12 +626,12 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, LOCKMODE lockmode) * hold this lock. * -------------------- */ - if (result->nHolding == lock->nActive || result->holders[lockmode] != 0) + if (xident->nHolding == lock->nActive || xident->holders[lockmode] != 0) { - result->holders[lockmode]++; - result->nHolding++; - XID_PRINT("LockAcquire: owning", result); - Assert((result->nHolding > 0) && (result->holders[lockmode] > 0)); + xident->holders[lockmode]++; + xident->nHolding++; + XID_PRINT("LockAcquire: owning", xident); + Assert((xident->nHolding > 0) && (xident->holders[lockmode] > 0)); GrantLock(lock, lockmode); SpinRelease(masterLock); return TRUE; @@ -623,27 +648,27 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, LOCKMODE lockmode) * If I don't hold locks or my locks don't conflict with waiters * then force to sleep. */ - if (result->nHolding > 0) + if (xident->nHolding > 0) { for (; i <= lockMethodTable->ctl->numLockModes; i++) { - if (result->holders[i] > 0 && + if (xident->holders[i] > 0 && lockMethodTable->ctl->conflictTab[i] & lock->waitMask) break; /* conflict */ } } - if (result->nHolding == 0 || i > lockMethodTable->ctl->numLockModes) + if (xident->nHolding == 0 || i > lockMethodTable->ctl->numLockModes) { XID_PRINT("LockAcquire: higher priority proc waiting", - result); + xident); status = STATUS_FOUND; } else - status = LockResolveConflicts(lockmethod, lock, lockmode, xid, result); + status = LockResolveConflicts(lockmethod, lock, lockmode, xid, xident); } else - status = LockResolveConflicts(lockmethod, lock, lockmode, xid, result); + status = LockResolveConflicts(lockmethod, lock, lockmode, xid, xident); if (status == STATUS_OK) GrantLock(lock, lockmode); @@ -657,17 +682,17 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, LOCKMODE lockmode) */ if (lockmethod == USER_LOCKMETHOD) { - if (!result->nHolding) + if (!xident->nHolding) { - SHMQueueDelete(&result->queue); - result = (XIDLookupEnt *) hash_search(xidTable, - (Pointer) result, + SHMQueueDelete(&xident->queue); + xident = (XIDLookupEnt *) hash_search(xidTable, + (Pointer) xident, HASH_REMOVE, &found); - if (!result || !found) + if (!xident || !found) elog(NOTICE, "LockAcquire: remove xid, table corrupted"); } else - XID_PRINT("LockAcquire: NHOLDING", result); + XID_PRINT("LockAcquire: NHOLDING", xident); lock->nHolding--; lock->holders[lockmode]--; LOCK_PRINT("LockAcquire: user lock failed", lock, lockmode); @@ -682,7 +707,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, LOCKMODE lockmode) * Construct bitmask of locks we hold before going to sleep. */ MyProc->holdLock = 0; - if (result->nHolding > 0) + if (xident->nHolding > 0) { int i, tmpMask = 2; @@ -690,7 +715,7 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, LOCKMODE lockmode) for (i = 1; i <= lockMethodTable->ctl->numLockModes; i++, tmpMask <<= 1) { - if (result->holders[i] > 0) + if (xident->holders[i] > 0) MyProc->holdLock |= tmpMask; } Assert(MyProc->holdLock != 0); @@ -702,15 +727,15 @@ LockAcquire(LOCKMETHOD lockmethod, LOCKTAG *locktag, LOCKMODE lockmode) * Check the xid entry status, in case something in the ipc * communication doesn't work correctly. */ - if (!((result->nHolding > 0) && (result->holders[lockmode] > 0))) + if (!((xident->nHolding > 0) && (xident->holders[lockmode] > 0))) { - XID_PRINT("LockAcquire: INCONSISTENT", result); + XID_PRINT("LockAcquire: INCONSISTENT", xident); LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode); /* Should we retry ? */ SpinRelease(masterLock); return FALSE; } - XID_PRINT("LockAcquire: granted", result); + XID_PRINT("LockAcquire: granted", xident); LOCK_PRINT("LockAcquire: granted", lock, lockmode); } @@ -738,7 +763,7 @@ LockResolveConflicts(LOCKMETHOD lockmethod, TransactionId xid, XIDLookupEnt *xidentP) /* xident ptr or NULL */ { - XIDLookupEnt *result, + XIDLookupEnt *xident, item; int *myHolders; int numLockModes; @@ -758,7 +783,7 @@ LockResolveConflicts(LOCKMETHOD lockmethod, * A pointer to the xid entry was supplied from the caller. * Actually only LockAcquire can do it. */ - result = xidentP; + xident = xidentP; } else { @@ -788,9 +813,9 @@ LockResolveConflicts(LOCKMETHOD lockmethod, /* * Find or create an xid entry with this tag */ - result = (XIDLookupEnt *) hash_search(xidTable, (Pointer) &item, + xident = (XIDLookupEnt *) hash_search(xidTable, (Pointer) &item, HASH_ENTER, &found); - if (!result) + if (!xident) { elog(NOTICE, "LockResolveConflicts: xid table corrupted"); return STATUS_ERROR; @@ -808,14 +833,14 @@ LockResolveConflicts(LOCKMETHOD lockmethod, * the lock stats. * --------------- */ - MemSet(result->holders, 0, numLockModes * sizeof(*(lock->holders))); - result->nHolding = 0; - XID_PRINT("LockResolveConflicts: NOT FOUND", result); + MemSet(xident->holders, 0, numLockModes * sizeof(*(lock->holders))); + xident->nHolding = 0; + XID_PRINT("LockResolveConflicts: NOT FOUND", xident); } else - XID_PRINT("LockResolveConflicts: found", result); + XID_PRINT("LockResolveConflicts: found", xident); } - Assert((result->nHolding >= 0) && (result->holders[lockmode] >= 0)); + Assert((xident->nHolding >= 0) && (xident->holders[lockmode] >= 0)); /* ---------------------------- * first check for global conflicts: If no locks conflict @@ -829,10 +854,10 @@ LockResolveConflicts(LOCKMETHOD lockmethod, */ if (!(LockMethodTable[lockmethod]->ctl->conflictTab[lockmode] & lock->mask)) { - result->holders[lockmode]++; - result->nHolding++; - XID_PRINT("LockResolveConflicts: no conflict", result); - Assert((result->nHolding > 0) && (result->holders[lockmode] > 0)); + xident->holders[lockmode]++; + xident->nHolding++; + XID_PRINT("LockResolveConflicts: no conflict", xident); + Assert((xident->nHolding > 0) && (xident->holders[lockmode] > 0)); return STATUS_OK; } @@ -842,7 +867,7 @@ LockResolveConflicts(LOCKMETHOD lockmethod, * that does not reflect our own locks. * ------------------------ */ - myHolders = result->holders; + myHolders = xident->holders; bitmask = 0; tmpMask = 2; for (i = 1; i <= numLockModes; i++, tmpMask <<= 1) @@ -861,14 +886,14 @@ LockResolveConflicts(LOCKMETHOD lockmethod, if (!(LockMethodTable[lockmethod]->ctl->conflictTab[lockmode] & bitmask)) { /* no conflict. Get the lock and go on */ - result->holders[lockmode]++; - result->nHolding++; - XID_PRINT("LockResolveConflicts: resolved", result); - Assert((result->nHolding > 0) && (result->holders[lockmode] > 0)); + xident->holders[lockmode]++; + xident->nHolding++; + XID_PRINT("LockResolveConflicts: resolved", xident); + Assert((xident->nHolding > 0) && (xident->holders[lockmode] > 0)); return STATUS_OK; } - XID_PRINT("LockResolveConflicts: conflicting", result); + XID_PRINT("LockResolveConflicts: conflicting", xident); return STATUS_FOUND; } @@ -965,7 +990,7 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, LOCKMODE lockmode) SPINLOCK masterLock; bool found; LOCKMETHODTABLE *lockMethodTable; - XIDLookupEnt *result, + XIDLookupEnt *xident, item; HTAB *xidTable; TransactionId xid; @@ -1053,9 +1078,9 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, LOCKMODE lockmode) * Find an xid entry with this tag */ xidTable = lockMethodTable->xidHash; - result = (XIDLookupEnt *) hash_search(xidTable, (Pointer) &item, + xident = (XIDLookupEnt *) hash_search(xidTable, (Pointer) &item, HASH_FIND_SAVE, &found); - if (!result || !found) + if (!xident || !found) { SpinRelease(masterLock); #ifdef USER_LOCKS @@ -1066,23 +1091,23 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, LOCKMODE lockmode) elog(NOTICE, "LockRelease: xid table corrupted"); return FALSE; } - XID_PRINT("LockRelease: found", result); - Assert(result->tag.lock == MAKE_OFFSET(lock)); + XID_PRINT("LockRelease: found", xident); + Assert(xident->tag.lock == MAKE_OFFSET(lock)); /* * Check that we are actually holding a lock of the type we want to * release. */ - if (!(result->holders[lockmode] > 0)) + if (!(xident->holders[lockmode] > 0)) { SpinRelease(masterLock); - XID_PRINT("LockAcquire: WRONGTYPE", result); + XID_PRINT("LockAcquire: WRONGTYPE", xident); elog(NOTICE, "LockRelease: you don't own a lock of type %s", lock_types[lockmode]); - Assert(result->holders[lockmode] >= 0); + Assert(xident->holders[lockmode] >= 0); return FALSE; } - Assert(result->nHolding > 0); + Assert(xident->nHolding > 0); /* * fix the general lock stats @@ -1147,27 +1172,27 @@ LockRelease(LOCKMETHOD lockmethod, LOCKTAG *locktag, LOCKMODE lockmode) * now check to see if I have any private locks. If I do, decrement * the counts associated with them. */ - result->holders[lockmode]--; - result->nHolding--; - XID_PRINT("LockRelease: updated", result); - Assert((result->nHolding >= 0) && (result->holders[lockmode] >= 0)); + xident->holders[lockmode]--; + xident->nHolding--; + XID_PRINT("LockRelease: updated", xident); + Assert((xident->nHolding >= 0) && (xident->holders[lockmode] >= 0)); /* * If this was my last hold on this lock, delete my entry in the XID * table. */ - if (!result->nHolding) + if (!xident->nHolding) { - if (result->queue.prev == INVALID_OFFSET) + if (xident->queue.prev == INVALID_OFFSET) elog(NOTICE, "LockRelease: xid.prev == INVALID_OFFSET"); - if (result->queue.next == INVALID_OFFSET) + if (xident->queue.next == INVALID_OFFSET) elog(NOTICE, "LockRelease: xid.next == INVALID_OFFSET"); - if (result->queue.next != INVALID_OFFSET) - SHMQueueDelete(&result->queue); - XID_PRINT("LockRelease: deleting", result); - result = (XIDLookupEnt *) hash_search(xidTable, (Pointer) &result, + if (xident->queue.next != INVALID_OFFSET) + SHMQueueDelete(&xident->queue); + XID_PRINT("LockRelease: deleting", xident); + xident = (XIDLookupEnt *) hash_search(xidTable, (Pointer) &xident, HASH_REMOVE_SAVED, &found); - if (!result || !found) + if (!xident || !found) { SpinRelease(masterLock); elog(NOTICE, "LockRelease: remove xid, table corrupted"); @@ -1196,7 +1221,7 @@ LockReleaseAll(LOCKMETHOD lockmethod, SHM_QUEUE *lockQueue) int done; XIDLookupEnt *xidLook = NULL; XIDLookupEnt *tmp = NULL; - XIDLookupEnt *result; + XIDLookupEnt *xident; SHMEM_OFFSET end = MAKE_OFFSET(lockQueue); SPINLOCK masterLock; LOCKMETHODTABLE *lockMethodTable; @@ -1371,11 +1396,11 @@ LockReleaseAll(LOCKMETHOD lockmethod, SHM_QUEUE *lockQueue) */ XID_PRINT("LockReleaseAll: deleting", xidLook); - result = (XIDLookupEnt *) hash_search(lockMethodTable->xidHash, + xident = (XIDLookupEnt *) hash_search(lockMethodTable->xidHash, (Pointer) xidLook, HASH_REMOVE, &found); - if (!result || !found) + if (!xident || !found) { SpinRelease(masterLock); elog(NOTICE, "LockReleaseAll: xid table corrupted"); diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index da466afe9f..c97a46ba4b 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,17 +8,17 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.77 2000/10/28 16:20:57 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.78 2000/11/08 22:10:00 tgl Exp $ * *------------------------------------------------------------------------- */ +#include "postgres.h" + #include #include #include #include -#include "postgres.h" - #include "catalog/catalog.h" #include "miscadmin.h" #include "storage/smgr.h" @@ -123,63 +123,39 @@ mdinit() int mdcreate(Relation reln) { + char *path; int fd, vfd; - char *path; - Assert(reln->rd_unlinked && reln->rd_fd < 0); + Assert(reln->rd_fd < 0); path = relpath(reln->rd_node); - fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600); - /* - * For cataloged relations, pg_class is guaranteed to have a unique - * record with the same relname by the unique index. So we are able to - * reuse existent files for new cataloged relations. Currently we reuse - * them in the following cases. 1. they are empty. 2. they are used - * for Index relations and their size == BLCKSZ * 2. - * - * During bootstrap processing, we skip that check, because pg_time, - * pg_variable, and pg_log get created before their .bki file entries - * are processed. - */ + fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600); if (fd < 0) { int save_errno = errno; - if (!IsBootstrapProcessingMode() && - reln->rd_rel->relkind == RELKIND_UNCATALOGED) - return -1; - - fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600); + /* + * During bootstrap, there are cases where a system relation will be + * accessed (by internal backend processes) before the bootstrap + * script nominally creates it. Therefore, allow the file to exist + * already, but in bootstrap mode only. (See also mdopen) + */ + if (IsBootstrapProcessingMode()) + fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600); if (fd < 0) { + pfree(path); /* be sure to return the error reported by create, not open */ errno = save_errno; return -1; } - if (!IsBootstrapProcessingMode()) - { - bool reuse = false; - long len = FileSeek(fd, 0L, SEEK_END); - - if (len == 0) - reuse = true; - else if (reln->rd_rel->relkind == RELKIND_INDEX && - len == BLCKSZ * 2) - reuse = true; - if (!reuse) - { - FileClose(fd); - /* be sure to return the error reported by create */ - errno = save_errno; - return -1; - } - } errno = 0; } - reln->rd_unlinked = false; + + pfree(path); vfd = _fdvec_alloc(); if (vfd < 0) @@ -187,12 +163,10 @@ mdcreate(Relation reln) Md_fdvec[vfd].mdfd_vfd = fd; Md_fdvec[vfd].mdfd_flags = (uint16) 0; + Md_fdvec[vfd].mdfd_lstbcnt = 0; #ifndef LET_OS_MANAGE_FILESIZE Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL; #endif - Md_fdvec[vfd].mdfd_lstbcnt = 0; - - pfree(path); return vfd; } @@ -201,65 +175,50 @@ mdcreate(Relation reln) * mdunlink() -- Unlink a relation. */ int -mdunlink(Relation reln) +mdunlink(RelFileNode rnode) { - int nblocks; - int fd; - MdfdVec *v; - - /* - * If the relation is already unlinked,we have nothing to do any more. - */ - if (reln->rd_unlinked && reln->rd_fd < 0) - return SM_SUCCESS; - - /* - * Force all segments of the relation to be opened, so that we won't - * miss deleting any of them. - */ - nblocks = mdnblocks(reln); + int status = SM_SUCCESS; + int save_errno = 0; + char *path; - /* - * Clean out the mdfd vector, letting fd.c unlink the physical files. - * - * NOTE: We truncate the file(s) before deleting 'em, because if other - * backends are holding the files open, the unlink will fail on some - * platforms (think Microsoft). Better a zero-size file gets left - * around than a big file. Those other backends will be forced to - * close the relation by cache invalidation, but that probably hasn't - * happened yet. - */ - fd = RelationGetFile(reln); - if (fd < 0) /* should not happen */ - elog(ERROR, "mdunlink: mdnblocks didn't open relation"); + path = relpath(rnode); - Md_fdvec[fd].mdfd_flags = (uint16) 0; + /* Delete the first segment, or only segment if not doing segmenting */ + if (unlink(path) < 0) + { + status = SM_FAIL; + save_errno = errno; + } #ifndef LET_OS_MANAGE_FILESIZE - for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL;) + /* Get the additional segments, if any */ + if (status == SM_SUCCESS) { - MdfdVec *ov = v; + char *segpath = (char *) palloc(strlen(path) + 12); + int segno; - FileTruncate(v->mdfd_vfd, 0); - FileUnlink(v->mdfd_vfd); - v = v->mdfd_chain; - if (ov != &Md_fdvec[fd]) - pfree(ov); + for (segno = 1; ; segno++) + { + sprintf(segpath, "%s.%d", path, segno); + if (unlink(segpath) < 0) + { + /* ENOENT is expected after the last segment... */ + if (errno != ENOENT) + { + status = SM_FAIL; + save_errno = errno; + } + break; + } + } + pfree(segpath); } - Md_fdvec[fd].mdfd_chain = (MdfdVec *) NULL; -#else - v = &Md_fdvec[fd]; - FileTruncate(v->mdfd_vfd, 0); - FileUnlink(v->mdfd_vfd); #endif - _fdvec_free(fd); - - /* be sure to mark relation closed && unlinked */ - reln->rd_fd = -1; - reln->rd_unlinked = true; + pfree(path); - return SM_SUCCESS; + errno = save_errno; + return status; } /* @@ -327,24 +286,29 @@ mdopen(Relation reln) int vfd; Assert(reln->rd_fd < 0); + path = relpath(reln->rd_node); fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600); + if (fd < 0) { - /* in bootstrap mode, accept mdopen as substitute for mdcreate */ + /* + * During bootstrap, there are cases where a system relation will be + * accessed (by internal backend processes) before the bootstrap + * script nominally creates it. Therefore, accept mdopen() as a + * substitute for mdcreate() in bootstrap mode only. (See mdcreate) + */ if (IsBootstrapProcessingMode()) fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600); if (fd < 0) { - elog(NOTICE, "mdopen: couldn't open %s: %m", path); - /* mark relation closed and unlinked */ - reln->rd_fd = -1; - reln->rd_unlinked = true; + pfree(path); return -1; } } - reln->rd_unlinked = false; + + pfree(path); vfd = _fdvec_alloc(); if (vfd < 0) @@ -362,8 +326,6 @@ mdopen(Relation reln) #endif #endif - pfree(path); - return vfd; } diff --git a/src/backend/storage/smgr/mm.c b/src/backend/storage/smgr/mm.c index a5b22cbcc5..d64aeb6a41 100644 --- a/src/backend/storage/smgr/mm.c +++ b/src/backend/storage/smgr/mm.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.19 2000/04/10 23:41:51 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.20 2000/11/08 22:10:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -204,9 +204,11 @@ mmcreate(Relation reln) /* * mmunlink() -- Unlink a relation. + * + * XXX currently broken: needs to accept RelFileNode, not Relation */ int -mmunlink(Relation reln) +mmunlink(RelFileNode rnode) { int i; Oid reldbid; diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index d2a940a76e..01a7877e80 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,13 +11,16 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.42 2000/10/28 16:20:57 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.43 2000/11/08 22:10:00 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#include "storage/bufmgr.h" #include "storage/smgr.h" +#include "utils/memutils.h" + static void smgrshutdown(void); @@ -26,7 +29,7 @@ typedef struct f_smgr int (*smgr_init) (void); /* may be NULL */ int (*smgr_shutdown) (void); /* may be NULL */ int (*smgr_create) (Relation reln); - int (*smgr_unlink) (Relation reln); + int (*smgr_unlink) (RelFileNode rnode); int (*smgr_extend) (Relation reln, char *buffer); int (*smgr_open) (Relation reln); int (*smgr_close) (Relation reln); @@ -60,10 +63,11 @@ static f_smgr smgrsw[] = { {mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose, mdread, mdwrite, mdflush, mdblindwrt, mdmarkdirty, mdblindmarkdirty, #ifdef XLOG - mdnblocks, mdtruncate, mdcommit, mdabort, mdsync}, + mdnblocks, mdtruncate, mdcommit, mdabort, mdsync #else - mdnblocks, mdtruncate, mdcommit, mdabort}, + mdnblocks, mdtruncate, mdcommit, mdabort #endif + }, #ifdef STABLE_MEMORY_STORAGE /* main memory */ @@ -93,6 +97,31 @@ static bool smgrwo[] = { static int NSmgr = lengthof(smgrsw); +/* + * We keep a list of all relations (represented as RelFileNode values) + * that have been created or deleted in the current transaction. When + * a relation is created, we create the physical file immediately, but + * remember it so that we can delete the file again if the current + * transaction is aborted. Conversely, a deletion request is NOT + * executed immediately, but is just entered in the list. When and if + * the transaction commits, we can delete the physical file. + * + * NOTE: the list is kept in TopMemoryContext to be sure it won't disappear + * unbetimes. It'd probably be OK to keep it in TopTransactionContext, + * but I'm being paranoid. + */ + +typedef struct PendingRelDelete +{ + RelFileNode relnode; /* relation that may need to be deleted */ + int16 which; /* which storage manager? */ + bool atCommit; /* T=delete at commit; F=delete at abort */ + struct PendingRelDelete *next; /* linked-list link */ +} PendingRelDelete; + +static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */ + + /* * smgrinit(), smgrshutdown() -- Initialize or shut down all storage * managers. @@ -147,27 +176,58 @@ int smgrcreate(int16 which, Relation reln) { int fd; + PendingRelDelete *pending; if ((fd = (*(smgrsw[which].smgr_create)) (reln)) < 0) elog(ERROR, "cannot create %s: %m", RelationGetRelationName(reln)); + /* Add the relation to the list of stuff to delete at abort */ + pending = (PendingRelDelete *) + MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); + pending->relnode = reln->rd_node; + pending->which = which; + pending->atCommit = false; /* delete if abort */ + pending->next = pendingDeletes; + pendingDeletes = pending; + return fd; } /* * smgrunlink() -- Unlink a relation. * - * The relation is removed from the store. + * The relation is removed from the store. Actually, we just remember + * that we want to do this at transaction commit. */ int smgrunlink(int16 which, Relation reln) { - int status; - - if ((status = (*(smgrsw[which].smgr_unlink)) (reln)) == SM_FAIL) - elog(ERROR, "cannot unlink %s: %m", RelationGetRelationName(reln)); + PendingRelDelete *pending; + + /* Make sure the file is closed */ + if (reln->rd_fd >= 0) + smgrclose(which, reln); + + /* Add the relation to the list of stuff to delete at commit */ + pending = (PendingRelDelete *) + MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); + pending->relnode = reln->rd_node; + pending->which = which; + pending->atCommit = true; /* delete if commit */ + pending->next = pendingDeletes; + pendingDeletes = pending; + + /* + * NOTE: if the relation was created in this transaction, it will now + * be present in the pending-delete list twice, once with atCommit true + * and once with atCommit false. Hence, it will be physically deleted + * at end of xact in either case (and the other entry will be ignored + * by smgrDoPendingDeletes, so no error will occur). We could instead + * remove the existing list entry and delete the physical file + * immediately, but for now I'll keep the logic simple. + */ - return status; + return SM_SUCCESS; } /* @@ -193,17 +253,18 @@ smgrextend(int16 which, Relation reln, char *buffer) /* * smgropen() -- Open a relation using a particular storage manager. * - * Returns the fd for the open relation on success, aborts the - * transaction on failure. + * Returns the fd for the open relation on success. + * + * On failure, returns -1 if failOK, else aborts the transaction. */ int -smgropen(int16 which, Relation reln) +smgropen(int16 which, Relation reln, bool failOK) { int fd; - if ((fd = (*(smgrsw[which].smgr_open)) (reln)) < 0 && - !reln->rd_unlinked) - elog(ERROR, "cannot open %s: %m", RelationGetRelationName(reln)); + if ((fd = (*(smgrsw[which].smgr_open)) (reln)) < 0) + if (! failOK) + elog(ERROR, "cannot open %s: %m", RelationGetRelationName(reln)); return fd; } @@ -211,12 +272,6 @@ smgropen(int16 which, Relation reln) /* * smgrclose() -- Close a relation. * - * NOTE: underlying manager should allow case where relation is - * already closed. Indeed relation may have been unlinked! - * This is currently called only from RelationFlushRelation() when - * the relation cache entry is about to be dropped; could be doing - * simple relation cache clear, or finishing up DROP TABLE. - * * Returns SM_SUCCESS on success, aborts on failure. */ int @@ -411,6 +466,41 @@ smgrtruncate(int16 which, Relation reln, int nblocks) return newblks; } +/* + * smgrDoPendingDeletes() -- take care of relation deletes at end of xact. + */ +int +smgrDoPendingDeletes(bool isCommit) +{ + while (pendingDeletes != NULL) + { + PendingRelDelete *pending = pendingDeletes; + + pendingDeletes = pending->next; + if (pending->atCommit == isCommit) + { + /* + * Get rid of any leftover buffers for the rel (shouldn't be + * any in the commit case, but there can be in the abort case). + */ + DropRelFileNodeBuffers(pending->relnode); + /* + * And delete the physical files. + * + * Note: we treat deletion failure as a NOTICE, not an error, + * because we've already decided to commit or abort the current + * xact. + */ + if ((*(smgrsw[pending->which].smgr_unlink)) (pending->relnode) == SM_FAIL) + elog(NOTICE, "cannot unlink %u/%u: %m", + pending->relnode.tblNode, pending->relnode.relNode); + } + pfree(pending); + } + + return SM_SUCCESS; +} + /* * smgrcommit(), smgrabort() -- Commit or abort changes made during the * current transaction. diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index e218daa748..8f4fd626f8 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/cache/inval.c,v 1.37 2000/06/08 19:51:03 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/cache/inval.c,v 1.38 2000/11/08 22:10:01 tgl Exp $ * * Note - this code is real crufty... * @@ -80,10 +80,10 @@ typedef InvalidationMessageData *InvalidationMessage; /* * ---------------- - * Invalidation info was devided into three parts. - * 1) shared invalidation to be registerd for all backends + * Invalidation info is divided into three parts. + * 1) shared invalidation to be registered for all backends * 2) local invalidation for the transaction itself - * 3) rollback information for the transaction itself + * 3) rollback information for the transaction itself (in case we abort) * ---------------- */ @@ -160,7 +160,9 @@ LocalInvalidRegister(LocalInvalid invalid, * -------------------------------- */ static void - LocalInvalidInvalidate(LocalInvalid invalid, void (*function) (), bool freemember) +LocalInvalidInvalidate(LocalInvalid invalid, + void (*function) (), + bool freemember) { InvalidationEntryData *entryDataP; @@ -216,15 +218,10 @@ elog(DEBUG, "CacheIdRegisterLocalInvalid(%d, %d, [%d, %d])", \ elog(DEBUG, "CacheIdRegisterLocalRollback(%d, %d, [%d, %d])", \ cacheId, hashIndex, ItemPointerGetBlockNumber(pointer), \ ItemPointerGetOffsetNumber(pointer)) -#define CacheIdImmediateRegisterSharedInvalid_DEBUG1 \ -elog(DEBUG, "CacheIdImmediateRegisterSharedInvalid(%d, %d, [%d, %d])", \ - cacheId, hashIndex, ItemPointerGetBlockNumber(pointer), \ - ItemPointerGetOffsetNumber(pointer)) #else #define CacheIdRegisterSpecifiedLocalInvalid_DEBUG1 #define CacheIdRegisterLocalInvalid_DEBUG1 #define CacheIdRegisterLocalRollback_DEBUG1 -#define CacheIdImmediateRegisterSharedInvalid_DEBUG1 #endif /* INVALIDDEBUG */ /* -------------------------------- @@ -233,7 +230,9 @@ elog(DEBUG, "CacheIdImmediateRegisterSharedInvalid(%d, %d, [%d, %d])", \ */ static LocalInvalid CacheIdRegisterSpecifiedLocalInvalid(LocalInvalid invalid, - Index cacheId, Index hashIndex, ItemPointer pointer) + Index cacheId, + Index hashIndex, + ItemPointer pointer) { InvalidationMessage message; @@ -317,43 +316,6 @@ CacheIdRegisterLocalRollback(Index cacheId, Index hashIndex, RollbackStack, cacheId, hashIndex, pointer); } -/* -------------------------------- - * CacheIdImmediateRegisterSharedInvalid - * -------------------------------- - */ -static void -CacheIdImmediateRegisterSharedInvalid(Index cacheId, Index hashIndex, - ItemPointer pointer) -{ - InvalidationMessage message; - - /* ---------------- - * debugging stuff - * ---------------- - */ - CacheIdImmediateRegisterSharedInvalid_DEBUG1; - - /* ---------------- - * create a message describing the system catalog tuple - * we wish to invalidate. - * ---------------- - */ - message = (InvalidationMessage) - InvalidationEntryAllocate(sizeof(InvalidationMessageData)); - - message->kind = 'c'; - message->any.catalog.cacheId = cacheId; - message->any.catalog.hashIndex = hashIndex; - - ItemPointerCopy(pointer, &message->any.catalog.pointerData); - /* ---------------- - * Register a shared catalog cache invalidation. - * ---------------- - */ - InvalidationMessageRegisterSharedInvalid(message); - free((Pointer) &((InvalidationUserData *) message)->dataP[-1]); -} - /* -------------------------------- * RelationIdRegisterSpecifiedLocalInvalid * -------------------------------- @@ -448,44 +410,6 @@ RelationIdRegisterLocalRollback(Oid relationId, Oid objectId) RollbackStack, relationId, objectId); } -/* -------------------------------- - * RelationIdImmediateRegisterSharedInvalid - * -------------------------------- - */ -static void -RelationIdImmediateRegisterSharedInvalid(Oid relationId, Oid objectId) -{ - InvalidationMessage message; - - /* ---------------- - * debugging stuff - * ---------------- - */ -#ifdef INVALIDDEBUG - elog(DEBUG, "RelationImmediateRegisterSharedInvalid(%u, %u)", relationId, - objectId); -#endif /* defined(INVALIDDEBUG) */ - - /* ---------------- - * create a message describing the relation descriptor - * we wish to invalidate. - * ---------------- - */ - message = (InvalidationMessage) - InvalidationEntryAllocate(sizeof(InvalidationMessageData)); - - message->kind = 'r'; - message->any.relation.relationId = relationId; - message->any.relation.objectId = objectId; - - /* ---------------- - * Register a shared catalog cache invalidation. - * ---------------- - */ - InvalidationMessageRegisterSharedInvalid(message); - free((Pointer) &((InvalidationUserData *) message)->dataP[-1]); -} - /* -------------------------------- * CacheIdInvalidate * @@ -890,55 +814,3 @@ RelationMark4RollbackHeapTuple(Relation relation, HeapTuple tuple) RelationIdRegisterLocalRollback, "RelationMark4RollbackHeapTuple"); } - -/* - * ImmediateInvalidateSharedHeapTuple - * Different from RelationInvalidateHeapTuple() - * this function queues shared invalidation info immediately. - */ -void -ImmediateInvalidateSharedHeapTuple(Relation relation, HeapTuple tuple) -{ - InvokeHeapTupleInvalidation(relation, tuple, - CacheIdImmediateRegisterSharedInvalid, - RelationIdImmediateRegisterSharedInvalid, - "ImmediateInvalidateSharedHeapTuple"); -} - -#ifdef NOT_USED -/* - * ImmediateSharedRelationCacheInvalidate - * Register shared relation cache invalidation immediately - * - * This is needed for smgrunlink()/smgrtruncate(). - * Those functions unlink/truncate the base file immediately - * and couldn't be rollbacked in case of abort/crash. - * So relation cache invalidation must be registerd immediately. - * Note: - * Assumes Relation is valid. - */ -void -ImmediateSharedRelationCacheInvalidate(Relation relation) -{ - /* ---------------- - * sanity checks - * ---------------- - */ - Assert(RelationIsValid(relation)); - - if (IsBootstrapProcessingMode()) - return; - - /* ---------------- - * debugging stuff - * ---------------- - */ -#ifdef INVALIDDEBUG - elog(DEBUG, "ImmediateSharedRelationCacheInvalidate(%s)", \ - RelationGetPhysicalRelationName(relation)); -#endif /* defined(INVALIDDEBUG) */ - - RelationIdImmediateRegisterSharedInvalid( - RelOid_pg_class, RelationGetRelid(relation)); -} -#endif diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index ea7a8d0212..be902d7842 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.114 2000/10/28 16:20:57 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.115 2000/11/08 22:10:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -954,7 +954,6 @@ static Relation RelationBuildDesc(RelationBuildDescInfo buildinfo, Relation oldrelation) { - File fd; Relation relation; Oid relid; Oid relam; @@ -1069,18 +1068,10 @@ RelationBuildDesc(RelationBuildDescInfo buildinfo, * by the storage manager code to rd_fd. * ---------------- */ - if (relation->rd_rel->relkind != RELKIND_VIEW) { - fd = smgropen(DEFAULT_SMGR, relation); - - Assert(fd >= -1); - if (fd == -1) - elog(NOTICE, "RelationBuildDesc: smgropen(%s): %m", - NameStr(relation->rd_rel->relname)); - - relation->rd_fd = fd; - } else { + if (relation->rd_rel->relkind != RELKIND_VIEW) + relation->rd_fd = smgropen(DEFAULT_SMGR, relation, false); + else relation->rd_fd = -1; - } /* ---------------- * insert newly created relation into proper relcaches, @@ -1337,14 +1328,11 @@ RelationIdCacheGetRelation(Oid relationId) if (RelationIsValid(rd)) { + /* re-open files if necessary */ if (rd->rd_fd == -1 && rd->rd_rel->relkind != RELKIND_VIEW) - { - rd->rd_fd = smgropen(DEFAULT_SMGR, rd); - Assert(rd->rd_fd != -1 || rd->rd_unlinked); - } + rd->rd_fd = smgropen(DEFAULT_SMGR, rd, false); RelationIncrementReferenceCount(rd); - } return rd; @@ -1371,14 +1359,11 @@ RelationNameCacheGetRelation(const char *relationName) if (RelationIsValid(rd)) { + /* re-open files if necessary */ if (rd->rd_fd == -1 && rd->rd_rel->relkind != RELKIND_VIEW) - { - rd->rd_fd = smgropen(DEFAULT_SMGR, rd); - Assert(rd->rd_fd != -1 || rd->rd_unlinked); - } + rd->rd_fd = smgropen(DEFAULT_SMGR, rd, false); RelationIncrementReferenceCount(rd); - } return rd; @@ -1393,14 +1378,11 @@ RelationNodeCacheGetRelation(RelFileNode rnode) if (RelationIsValid(rd)) { + /* re-open files if necessary */ if (rd->rd_fd == -1 && rd->rd_rel->relkind != RELKIND_VIEW) - { - rd->rd_fd = smgropen(DEFAULT_SMGR, rd); - Assert(rd->rd_fd != -1 || rd->rd_unlinked); - } + rd->rd_fd = smgropen(DEFAULT_SMGR, rd, false); RelationIncrementReferenceCount(rd); - } return rd; @@ -1536,15 +1518,13 @@ RelationClearRelation(Relation relation, bool rebuildIt) /* * Make sure smgr and lower levels close the relation's files, if they - * weren't closed already. We do this unconditionally; if the - * relation is not deleted, the next smgr access should reopen the - * files automatically. This ensures that the low-level file access - * state is updated after, say, a vacuum truncation. - * - * NOTE: this call is a no-op if the relation's smgr file is already - * closed or unlinked. + * weren't closed already. If the relation is not getting deleted, + * the next smgr access should reopen the files automatically. This + * ensures that the low-level file access state is updated after, say, + * a vacuum truncation. */ - smgrclose(DEFAULT_SMGR, relation); + if (relation->rd_fd >= 0) + smgrclose(DEFAULT_SMGR, relation); /* * Never, never ever blow away a nailed-in system relation, because @@ -1617,7 +1597,6 @@ RelationClearRelation(Relation relation, bool rebuildIt) MemoryContext old_rulescxt = relation->rd_rulescxt; TriggerDesc *old_trigdesc = relation->trigdesc; int old_nblocks = relation->rd_nblocks; - bool relDescChanged = false; RelationBuildDescInfo buildinfo; buildinfo.infotype = INFO_RELID; @@ -1644,7 +1623,6 @@ RelationClearRelation(Relation relation, bool rebuildIt) else { FreeTupleDesc(old_att); - relDescChanged = true; } if (equalRuleLocks(old_rules, relation->rd_rules)) { @@ -1657,7 +1635,6 @@ RelationClearRelation(Relation relation, bool rebuildIt) { if (old_rulescxt) MemoryContextDelete(old_rulescxt); - relDescChanged = true; } if (equalTriggerDescs(old_trigdesc, relation->trigdesc)) { @@ -1667,7 +1644,6 @@ RelationClearRelation(Relation relation, bool rebuildIt) else { FreeTriggerDesc(old_trigdesc); - relDescChanged = true; } relation->rd_nblocks = old_nblocks; @@ -1675,14 +1651,7 @@ RelationClearRelation(Relation relation, bool rebuildIt) * this is kind of expensive, but I think we must do it in case * relation has been truncated... */ - if (relation->rd_unlinked) - relation->rd_nblocks = 0; - else - relation->rd_nblocks = RelationGetNumberOfBlocks(relation); - - if (relDescChanged && !RelationHasReferenceCountZero(relation)) - elog(ERROR, "RelationClearRelation: relation %u modified while in use", - buildinfo.i.info_id); + relation->rd_nblocks = RelationGetNumberOfBlocks(relation); } } @@ -1934,9 +1903,6 @@ RelationRegisterRelation(Relation relation) void RelationPurgeLocalRelation(bool xactCommitted) { - if (newlyCreatedRelns == NULL) - return; - while (newlyCreatedRelns) { List *l = newlyCreatedRelns; @@ -1949,19 +1915,7 @@ RelationPurgeLocalRelation(bool xactCommitted) newlyCreatedRelns = lnext(newlyCreatedRelns); pfree(l); - if (!xactCommitted) - { - /* - * remove the file if we abort. This is so that files for - * tables created inside a transaction block get removed. - */ - if (! reln->rd_unlinked) - { - smgrunlink(DEFAULT_SMGR, reln); - reln->rd_unlinked = true; - } - } - + /* XXX is this step still needed? If so, why? */ if (!IsBootstrapProcessingMode()) RelationClearRelation(reln, false); } diff --git a/src/backend/utils/cache/temprel.c b/src/backend/utils/cache/temprel.c index 460cf56a40..31591663ce 100644 --- a/src/backend/utils/cache/temprel.c +++ b/src/backend/utils/cache/temprel.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/cache/Attic/temprel.c,v 1.29 2000/10/19 23:06:24 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/cache/Attic/temprel.c,v 1.30 2000/11/08 22:10:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -27,10 +27,10 @@ * to drop the underlying physical relations at session shutdown. */ -#include - #include "postgres.h" +#include + #include "catalog/heap.h" #include "catalog/index.h" #include "miscadmin.h" @@ -47,11 +47,19 @@ static List *temp_rels = NIL; typedef struct TempTable { - char *user_relname; /* logical name of temp table */ - char *relname; /* underlying unique name */ + NameData user_relname; /* logical name of temp table */ + NameData relname; /* underlying unique name */ Oid relid; /* needed properties of rel */ char relkind; - TransactionId xid; /* xact in which temp tab was created */ + /* + * If this entry was created during this xact, it should be deleted + * at xact abort. Conversely, if this entry was deleted during this + * xact, it should be removed at xact commit. We leave deleted entries + * in the list until commit so that we can roll back if needed --- + * but we ignore them for purposes of lookup! + */ + bool created_in_cur_xact; + bool deleted_in_cur_xact; } TempTable; @@ -71,26 +79,122 @@ create_temp_relation(const char *relname, HeapTuple pg_class_tuple) oldcxt = MemoryContextSwitchTo(CacheMemoryContext); temp_rel = (TempTable *) palloc(sizeof(TempTable)); - temp_rel->user_relname = (char *) palloc(NAMEDATALEN); - temp_rel->relname = (char *) palloc(NAMEDATALEN); - StrNCpy(temp_rel->user_relname, relname, NAMEDATALEN); - StrNCpy(temp_rel->relname, NameStr(pg_class_form->relname), NAMEDATALEN); + StrNCpy(NameStr(temp_rel->user_relname), relname, + NAMEDATALEN); + StrNCpy(NameStr(temp_rel->relname), NameStr(pg_class_form->relname), + NAMEDATALEN); temp_rel->relid = pg_class_tuple->t_data->t_oid; temp_rel->relkind = pg_class_form->relkind; - temp_rel->xid = GetCurrentTransactionId(); + temp_rel->created_in_cur_xact = true; + temp_rel->deleted_in_cur_xact = false; temp_rels = lcons(temp_rel, temp_rels); MemoryContextSwitchTo(oldcxt); } +/* + * Remove a temp relation map entry (part of DROP TABLE on a temp table). + * We don't actually remove the entry, just mark it dead. + * + * We don't have the relname for indexes, so we just pass the oid. + */ +void +remove_temp_rel_by_relid(Oid relid) +{ + List *l; + + foreach(l, temp_rels) + { + TempTable *temp_rel = (TempTable *) lfirst(l); + + if (temp_rel->relid == relid) + temp_rel->deleted_in_cur_xact = true; + /* Keep scanning 'cause there could be multiple matches; see RENAME */ + } +} + +/* + * To implement ALTER TABLE RENAME on a temp table, we shouldn't touch + * the underlying physical table at all, just change the map entry! + * + * This routine is invoked early in ALTER TABLE RENAME to check for + * the temp-table case. If oldname matches a temp table name, change + * the mapping to the new logical name and return TRUE (or elog if + * there is a conflict with another temp table name). If there is + * no match, return FALSE indicating that normal rename should proceed. + * + * We also reject an attempt to rename a normal table to a name in use + * as a temp table name. That would fail later on anyway when rename.c + * looks for a rename conflict, but we can give a more specific error + * message for the problem here. + * + * It might seem that we need to check for attempts to rename the physical + * file underlying a temp table, but that'll be rejected anyway because + * pg_tempXXX looks like a system table name. + */ +bool +rename_temp_relation(const char *oldname, + const char *newname) +{ + List *l; + + foreach(l, temp_rels) + { + TempTable *temp_rel = (TempTable *) lfirst(l); + MemoryContext oldcxt; + TempTable *new_temp_rel; + + if (temp_rel->deleted_in_cur_xact) + continue; /* ignore it if logically deleted */ + + if (strcmp(NameStr(temp_rel->user_relname), oldname) != 0) + continue; /* ignore non-matching entries */ + + /* We are renaming a temp table --- is it OK to do so? */ + if (get_temp_rel_by_username(newname) != NULL) + elog(ERROR, "Cannot rename temp table \"%s\": temp table \"%s\" already exists", + oldname, newname); + + /* + * Create a new mapping entry and mark the old one deleted in this + * xact. One of these entries will be deleted at xact end. + */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + + new_temp_rel = (TempTable *) palloc(sizeof(TempTable)); + memcpy(new_temp_rel, temp_rel, sizeof(TempTable)); + + StrNCpy(NameStr(new_temp_rel->user_relname), newname, NAMEDATALEN); + new_temp_rel->created_in_cur_xact = true; + + temp_rels = lcons(new_temp_rel, temp_rels); + + temp_rel->deleted_in_cur_xact = true; + + MemoryContextSwitchTo(oldcxt); + + return true; + } + + /* Old name does not match any temp table name, what about new? */ + if (get_temp_rel_by_username(newname) != NULL) + elog(ERROR, "Cannot rename \"%s\" to \"%s\": a temp table by that name already exists", + oldname, newname); + + return false; +} + + /* * Remove underlying relations for all temp rels at backend shutdown. */ void remove_all_temp_relations(void) { + List *l; + /* skip xact start overhead if nothing to do */ if (temp_rels == NIL) return; @@ -99,21 +203,24 @@ remove_all_temp_relations(void) StartTransactionCommand(); /* - * The way this works is that each time through the loop, we delete - * the frontmost entry. The DROP will call remove_temp_rel_by_relid() - * as a side effect, thereby removing the entry in the temp_rels list. - * So this is not an infinite loop, even though it looks like one. + * Scan the list and delete all entries not already deleted. + * We need not worry about list entries getting deleted from under us, + * because remove_temp_rel_by_relid() doesn't remove entries, only + * mark them dead. */ - while (temp_rels != NIL) + foreach(l, temp_rels) { - TempTable *temp_rel = (TempTable *) lfirst(temp_rels); + TempTable *temp_rel = (TempTable *) lfirst(l); + + if (temp_rel->deleted_in_cur_xact) + continue; /* ignore it if deleted already */ if (temp_rel->relkind != RELKIND_INDEX) { char relname[NAMEDATALEN]; /* safe from deallocation */ - strcpy(relname, temp_rel->user_relname); + strcpy(relname, NameStr(temp_rel->user_relname)); heap_drop_with_catalog(relname, allowSystemTableMods); } else @@ -126,79 +233,30 @@ remove_all_temp_relations(void) } /* - * Remove a temp relation map entry (part of DROP TABLE on a temp table) + * Clean up temprel mapping entries during transaction commit or abort. * - * we don't have the relname for indexes, so we just pass the oid - */ -void -remove_temp_rel_by_relid(Oid relid) -{ - MemoryContext oldcxt; - List *l, - *prev; - - oldcxt = MemoryContextSwitchTo(CacheMemoryContext); - - prev = NIL; - l = temp_rels; - while (l != NIL) - { - TempTable *temp_rel = (TempTable *) lfirst(l); - - if (temp_rel->relid == relid) - { - pfree(temp_rel->user_relname); - pfree(temp_rel->relname); - pfree(temp_rel); - /* remove from linked list */ - if (prev != NIL) - { - lnext(prev) = lnext(l); - pfree(l); - l = lnext(prev); - } - else - { - temp_rels = lnext(l); - pfree(l); - l = temp_rels; - } - } - else - { - prev = l; - l = lnext(l); - } - } - - MemoryContextSwitchTo(oldcxt); -} - -/* - * Remove freshly-created map entries during transaction abort. + * During commit, remove entries that were deleted during this transaction; + * during abort, remove those created during this transaction. * - * The underlying physical rel will be removed by normal abort processing. - * We just have to delete the map entry. + * We do not need to worry about removing the underlying physical relation; + * that's someone else's job. */ void -remove_temp_rel_in_myxid(void) +AtEOXact_temp_relations(bool isCommit) { - MemoryContext oldcxt; List *l, *prev; - oldcxt = MemoryContextSwitchTo(CacheMemoryContext); - prev = NIL; l = temp_rels; while (l != NIL) { TempTable *temp_rel = (TempTable *) lfirst(l); - if (temp_rel->xid == GetCurrentTransactionId()) + if (isCommit ? temp_rel->deleted_in_cur_xact : + temp_rel->created_in_cur_xact) { - pfree(temp_rel->user_relname); - pfree(temp_rel->relname); + /* This entry must be removed */ pfree(temp_rel); /* remove from linked list */ if (prev != NIL) @@ -216,65 +274,13 @@ remove_temp_rel_in_myxid(void) } else { + /* This entry must be preserved */ + temp_rel->created_in_cur_xact = false; + temp_rel->deleted_in_cur_xact = false; prev = l; l = lnext(l); } } - - MemoryContextSwitchTo(oldcxt); -} - -/* - * To implement ALTER TABLE RENAME on a temp table, we shouldn't touch - * the underlying physical table at all, just change the map entry! - * - * This routine is invoked early in ALTER TABLE RENAME to check for - * the temp-table case. If oldname matches a temp table name, change - * the map entry to the new logical name and return TRUE (or elog if - * there is a conflict with another temp table name). If there is - * no match, return FALSE indicating that normal rename should proceed. - * - * We also reject an attempt to rename a normal table to a name in use - * as a temp table name. That would fail later on anyway when rename.c - * looks for a rename conflict, but we can give a more specific error - * message for the problem here. - * - * It might seem that we need to check for attempts to rename the physical - * file underlying a temp table, but that'll be rejected anyway because - * pg_tempXXX looks like a system table name. - * - * A nitpicker might complain that the rename should be undone if the - * current xact is later aborted, but I'm not going to fix that now. - * This whole mapping mechanism ought to be replaced with something - * schema-based, anyhow. - */ -bool -rename_temp_relation(const char *oldname, - const char *newname) -{ - List *l; - - foreach(l, temp_rels) - { - TempTable *temp_rel = (TempTable *) lfirst(l); - - if (strcmp(temp_rel->user_relname, oldname) == 0) - { - if (get_temp_rel_by_username(newname) != NULL) - elog(ERROR, "Cannot rename temp table \"%s\": temp table \"%s\" already exists", - oldname, newname); - /* user_relname was palloc'd NAMEDATALEN, so safe to re-use it */ - StrNCpy(temp_rel->user_relname, newname, NAMEDATALEN); - return true; - } - } - - /* Old name does not match any temp table name, what about new? */ - if (get_temp_rel_by_username(newname) != NULL) - elog(ERROR, "Cannot rename \"%s\" to \"%s\": a temp table by that name already exists", - oldname, newname); - - return false; } @@ -292,8 +298,11 @@ get_temp_rel_by_username(const char *user_relname) { TempTable *temp_rel = (TempTable *) lfirst(l); - if (strcmp(temp_rel->user_relname, user_relname) == 0) - return temp_rel->relname; + if (temp_rel->deleted_in_cur_xact) + continue; /* ignore it if logically deleted */ + + if (strcmp(NameStr(temp_rel->user_relname), user_relname) == 0) + return NameStr(temp_rel->relname); } return NULL; } @@ -310,8 +319,11 @@ get_temp_rel_by_physicalname(const char *relname) { TempTable *temp_rel = (TempTable *) lfirst(l); - if (strcmp(temp_rel->relname, relname) == 0) - return temp_rel->user_relname; + if (temp_rel->deleted_in_cur_xact) + continue; /* ignore it if logically deleted */ + + if (strcmp(NameStr(temp_rel->relname), relname) == 0) + return NameStr(temp_rel->user_relname); } /* needed for bootstrapping temp tables */ return pstrdup(relname); diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h index 409e103f4d..4cc317492f 100644 --- a/src/include/catalog/heap.h +++ b/src/include/catalog/heap.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: heap.h,v 1.31 2000/07/04 06:11:54 tgl Exp $ + * $Id: heap.h,v 1.32 2000/11/08 22:10:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -29,7 +29,7 @@ extern Relation heap_create(char *relname, TupleDesc tupDesc, bool istemp, bool storage_create, bool allow_system_table_mods); -extern bool heap_storage_create(Relation rel); +extern void heap_storage_create(Relation rel); extern Oid heap_create_with_catalog(char *relname, TupleDesc tupdesc, char relkind, bool istemp, diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index e00b25e6f0..967bffb4aa 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: index.h,v 1.29 2000/07/14 22:17:56 tgl Exp $ + * $Id: index.h,v 1.30 2000/11/08 22:10:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -46,9 +46,9 @@ extern void FormIndexDatum(IndexInfo *indexInfo, Datum *datum, char *nullv); -extern void UpdateStats(Oid relid, long reltuples, bool inplace); +extern void UpdateStats(Oid relid, long reltuples); extern bool IndexesAreActive(Oid relid, bool comfirmCommitted); -extern void setRelhasindexInplace(Oid relid, bool hasindex, bool immediate); +extern void setRelhasindex(Oid relid, bool hasindex); extern bool SetReindexProcessing(bool processing); extern bool IsReindexProcessing(void); diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h index fd1cfdb360..421156ac21 100644 --- a/src/include/parser/parse_clause.h +++ b/src/include/parser/parse_clause.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: parse_clause.h,v 1.19 2000/09/12 21:07:12 tgl Exp $ + * $Id: parse_clause.h,v 1.20 2000/11/08 22:10:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,6 +17,7 @@ #include "parser/parse_node.h" extern void makeRangeTable(ParseState *pstate, List *frmList); +extern void lockTargetTable(ParseState *pstate, char *relname); extern void setTargetTable(ParseState *pstate, char *relname, bool inh, bool inJoinSet); extern Node *transformWhereClause(ParseState *pstate, Node *where); diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h index 22dd797c7f..a0a41c3289 100644 --- a/src/include/parser/parse_node.h +++ b/src/include/parser/parse_node.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: parse_node.h,v 1.22 2000/09/29 18:21:40 tgl Exp $ + * $Id: parse_node.h,v 1.23 2000/11/08 22:10:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -26,6 +26,7 @@ typedef struct ParseState List *p_joinlist; /* join items so far (will become * FromExpr node's fromlist) */ int p_last_resno; /* last targetlist resno assigned */ + List *p_forUpdate; /* FOR UPDATE clause, if any (see gram.y) */ bool p_hasAggs; bool p_hasSubLinks; bool p_is_insert; diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 80aca7c57e..fc15e59859 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: buf_internals.h,v 1.42 2000/10/28 16:21:00 vadim Exp $ + * $Id: buf_internals.h,v 1.43 2000/11/08 22:10:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -200,7 +200,7 @@ extern int NLocBuffer; extern BufferDesc *LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr); extern int WriteLocalBuffer(Buffer buffer, bool release); -extern int FlushLocalBuffer(Buffer buffer, bool release); +extern int FlushLocalBuffer(Buffer buffer, bool sync, bool release); extern void InitLocalBuffer(void); extern void LocalBufferSync(void); extern void ResetLocalBufferPool(void); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 0ed4837305..22c0ccde7d 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -7,15 +7,16 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: bufmgr.h,v 1.42 2000/10/28 16:21:00 vadim Exp $ + * $Id: bufmgr.h,v 1.43 2000/11/08 22:10:02 tgl Exp $ * *------------------------------------------------------------------------- */ #ifndef BUFMGR_H #define BUFMGR_H -#include "storage/buf_internals.h" #include "access/xlogdefs.h" +#include "storage/buf_internals.h" +#include "storage/relfilenode.h" typedef void *Block; @@ -151,7 +152,7 @@ extern int WriteBuffer(Buffer buffer); extern int WriteNoReleaseBuffer(Buffer buffer); extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum); -extern int FlushBuffer(Buffer buffer, bool release); +extern int FlushBuffer(Buffer buffer, bool sync, bool release); extern void InitBufferPool(IPCKey key); extern void PrintBufferUsage(FILE *statfp); @@ -162,7 +163,8 @@ extern void FlushBufferPool(void); extern BlockNumber BufferGetBlockNumber(Buffer buffer); extern BlockNumber RelationGetNumberOfBlocks(Relation relation); extern int FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock); -extern void ReleaseRelationBuffers(Relation rel); +extern void DropRelationBuffers(Relation rel); +extern void DropRelFileNodeBuffers(RelFileNode rnode); extern void DropBuffers(Oid dbid); extern void PrintPinnedBufs(void); extern int BufferShmemSize(void); diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 49a2e3e5e9..99eed75fe6 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: smgr.h,v 1.23 2000/10/28 16:21:00 vadim Exp $ + * $Id: smgr.h,v 1.24 2000/11/08 22:10:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,7 +28,7 @@ extern int smgrinit(void); extern int smgrcreate(int16 which, Relation reln); extern int smgrunlink(int16 which, Relation reln); extern int smgrextend(int16 which, Relation reln, char *buffer); -extern int smgropen(int16 which, Relation reln); +extern int smgropen(int16 which, Relation reln, bool failOK); extern int smgrclose(int16 which, Relation reln); extern int smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer); @@ -43,6 +43,7 @@ extern int smgrblindmarkdirty(int16 which, RelFileNode rnode, extern int smgrmarkdirty(int16 which, Relation reln, BlockNumber blkno); extern int smgrnblocks(int16 which, Relation reln); extern int smgrtruncate(int16 which, Relation reln, int nblocks); +extern int smgrDoPendingDeletes(bool isCommit); extern int smgrcommit(void); extern int smgrabort(void); @@ -56,7 +57,7 @@ extern int smgrsync(void); /* in md.c */ extern int mdinit(void); extern int mdcreate(Relation reln); -extern int mdunlink(Relation reln); +extern int mdunlink(RelFileNode rnode); extern int mdextend(Relation reln, char *buffer); extern int mdopen(Relation reln); extern int mdclose(Relation reln); @@ -64,9 +65,9 @@ extern int mdread(Relation reln, BlockNumber blocknum, char *buffer); extern int mdwrite(Relation reln, BlockNumber blocknum, char *buffer); extern int mdflush(Relation reln, BlockNumber blocknum, char *buffer); extern int mdmarkdirty(Relation reln, BlockNumber blkno); -extern int mdblindwrt(RelFileNode rnode, BlockNumber blkno, - char *buffer, bool dofsync); -extern int mdblindmarkdirty(RelFileNode rnode, BlockNumber blkno); +extern int mdblindwrt(RelFileNode rnode, BlockNumber blkno, + char *buffer, bool dofsync); +extern int mdblindmarkdirty(RelFileNode rnode, BlockNumber blkno); extern int mdnblocks(Relation reln); extern int mdtruncate(Relation reln, int nblocks); extern int mdcommit(void); @@ -81,7 +82,7 @@ extern SPINLOCK MMCacheLock; extern int mminit(void); extern int mmcreate(Relation reln); -extern int mmunlink(Relation reln); +extern int mmunlink(RelFileNode rnode); extern int mmextend(Relation reln, char *buffer); extern int mmopen(Relation reln); extern int mmclose(Relation reln); diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index a585152555..b2ccee3adf 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: inval.h,v 1.17 2000/06/08 19:51:06 momjian Exp $ + * $Id: inval.h,v 1.18 2000/11/08 22:10:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -26,6 +26,4 @@ extern void RelationInvalidateHeapTuple(Relation relation, HeapTuple tuple); extern void RelationMark4RollbackHeapTuple(Relation relation, HeapTuple tuple); -extern void ImmediateInvalidateSharedHeapTuple(Relation relation, HeapTuple tuple); - #endif /* INVAL_H */ diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 4deec0618a..fd4012b0dd 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: rel.h,v 1.41 2000/09/07 09:58:38 vadim Exp $ + * $Id: rel.h,v 1.42 2000/11/08 22:10:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -92,7 +92,6 @@ typedef struct RelationData uint16 rd_refcnt; /* reference count */ bool rd_myxactonly; /* rel uses the local buffer mgr */ bool rd_isnailed; /* rel is nailed in cache */ - bool rd_unlinked; /* rel already unlinked or not created yet */ bool rd_indexfound; /* true if rd_indexlist is valid */ bool rd_uniqueindex; /* true if rel is a UNIQUE index */ Form_pg_am rd_am; /* AM tuple */ diff --git a/src/include/utils/temprel.h b/src/include/utils/temprel.h index a99839ac26..789d505878 100644 --- a/src/include/utils/temprel.h +++ b/src/include/utils/temprel.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: temprel.h,v 1.11 2000/10/11 21:28:19 momjian Exp $ + * $Id: temprel.h,v 1.12 2000/11/08 22:10:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,7 +23,7 @@ extern bool rename_temp_relation(const char *oldname, const char *newname); extern void remove_all_temp_relations(void); -extern void remove_temp_rel_in_myxid(void); +extern void AtEOXact_temp_relations(bool isCommit); extern char *get_temp_rel_by_username(const char *user_relname); extern char *get_temp_rel_by_physicalname(const char *relname); diff --git a/src/test/regress/expected/errors.out b/src/test/regress/expected/errors.out index c59ba0817c..b3396b5ec6 100644 --- a/src/test/regress/expected/errors.out +++ b/src/test/regress/expected/errors.out @@ -62,10 +62,10 @@ alter table rename; ERROR: parser: parse error at or near ";" -- no such relation alter table nonesuch rename to newnonesuch; -ERROR: Relation 'nonesuch' does not exist +ERROR: Relation "nonesuch" does not exist -- no such relation alter table nonesuch rename to stud_emp; -ERROR: Relation 'nonesuch' does not exist +ERROR: Relation "nonesuch" does not exist -- system relation alter table stud_emp rename to pg_stud_emp; ERROR: renamerel: Illegal class name: "pg_stud_emp" -- pg_ is reserved for system catalogs