1 /*-------------------------------------------------------------------------
4 * CLUSTER a table on an index. This is now also used for VACUUM FULL.
6 * There is hardly anything left of Paul Brown's original implementation...
9 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
10 * Portions Copyright (c) 1994-5, Regents of the University of California
14 * src/backend/commands/cluster.c
16 *-------------------------------------------------------------------------
20 #include "access/amapi.h"
21 #include "access/multixact.h"
22 #include "access/relscan.h"
23 #include "access/rewriteheap.h"
24 #include "access/transam.h"
25 #include "access/tuptoaster.h"
26 #include "access/xact.h"
27 #include "access/xlog.h"
28 #include "catalog/pg_am.h"
29 #include "catalog/catalog.h"
30 #include "catalog/dependency.h"
31 #include "catalog/heap.h"
32 #include "catalog/index.h"
33 #include "catalog/namespace.h"
34 #include "catalog/objectaccess.h"
35 #include "catalog/toasting.h"
36 #include "commands/cluster.h"
37 #include "commands/tablecmds.h"
38 #include "commands/vacuum.h"
39 #include "miscadmin.h"
40 #include "optimizer/planner.h"
41 #include "storage/bufmgr.h"
42 #include "storage/lmgr.h"
43 #include "storage/predicate.h"
44 #include "storage/smgr.h"
45 #include "utils/acl.h"
46 #include "utils/fmgroids.h"
47 #include "utils/inval.h"
48 #include "utils/lsyscache.h"
49 #include "utils/memutils.h"
50 #include "utils/pg_rusage.h"
51 #include "utils/relmapper.h"
52 #include "utils/snapmgr.h"
53 #include "utils/syscache.h"
54 #include "utils/tqual.h"
55 #include "utils/tuplesort.h"
59 * This struct is used to pass around the information on tables to be
60 * clustered. We need this so we can make a list of them when invoked without
61 * a specific table/index pair.
70 static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
71 static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
72 bool verbose, bool *pSwapToastByContent,
73 TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
74 static List *get_tables_to_cluster(MemoryContext cluster_context);
75 static void reform_and_rewrite_tuple(HeapTuple tuple,
76 TupleDesc oldTupDesc, TupleDesc newTupDesc,
77 Datum *values, bool *isnull,
78 bool newRelHasOids, RewriteState rwstate);
81 /*---------------------------------------------------------------------------
82 * This cluster code allows for clustering multiple tables at once. Because
83 * of this, we cannot just run everything on a single transaction, or we
84 * would be forced to acquire exclusive locks on all the tables being
85 * clustered, simultaneously --- very likely leading to deadlock.
87 * To solve this we follow a similar strategy to VACUUM code,
88 * clustering each relation in a separate transaction. For this to work,
90 * - provide a separate memory context so that we can pass information in
91 * a way that survives across transactions
92 * - start a new transaction every time a new relation is clustered
93 * - check for validity of the information on to-be-clustered relations,
94 * as someone might have deleted a relation behind our back, or
95 * clustered one on a different index
96 * - end the transaction
98 * The single-relation case does not have any such overhead.
100 * We also allow a relation to be specified without index. In that case,
101 * the indisclustered bit will be looked up, and an ERROR will be thrown
102 * if there is no index with the bit set.
103 *---------------------------------------------------------------------------
106 cluster(ClusterStmt *stmt, bool isTopLevel)
108 if (stmt->relation != NULL)
110 /* This is the single-relation case. */
112 indexOid = InvalidOid;
115 /* Find, lock, and check permissions on the table */
116 tableOid = RangeVarGetRelidExtended(stmt->relation,
119 RangeVarCallbackOwnsTable, NULL);
120 rel = heap_open(tableOid, NoLock);
123 * Reject clustering a remote temp table ... their local buffer
124 * manager is not going to cope.
126 if (RELATION_IS_OTHER_TEMP(rel))
128 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
129 errmsg("cannot cluster temporary tables of other sessions")));
132 * Reject clustering a partitioned table.
134 if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
136 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
137 errmsg("cannot cluster a partitioned table")));
139 if (stmt->indexname == NULL)
143 /* We need to find the index that has indisclustered set. */
144 foreach(index, RelationGetIndexList(rel))
147 Form_pg_index indexForm;
149 indexOid = lfirst_oid(index);
150 idxtuple = SearchSysCache1(INDEXRELID,
151 ObjectIdGetDatum(indexOid));
152 if (!HeapTupleIsValid(idxtuple))
153 elog(ERROR, "cache lookup failed for index %u", indexOid);
154 indexForm = (Form_pg_index) GETSTRUCT(idxtuple);
155 if (indexForm->indisclustered)
157 ReleaseSysCache(idxtuple);
160 ReleaseSysCache(idxtuple);
161 indexOid = InvalidOid;
164 if (!OidIsValid(indexOid))
166 (errcode(ERRCODE_UNDEFINED_OBJECT),
167 errmsg("there is no previously clustered index for table \"%s\"",
168 stmt->relation->relname)));
173 * The index is expected to be in the same namespace as the
176 indexOid = get_relname_relid(stmt->indexname,
177 rel->rd_rel->relnamespace);
178 if (!OidIsValid(indexOid))
180 (errcode(ERRCODE_UNDEFINED_OBJECT),
181 errmsg("index \"%s\" for table \"%s\" does not exist",
182 stmt->indexname, stmt->relation->relname)));
185 /* close relation, keep lock till commit */
186 heap_close(rel, NoLock);
189 cluster_rel(tableOid, indexOid, false, stmt->verbose);
194 * This is the "multi relation" case. We need to cluster all tables
195 * that have some index with indisclustered set.
197 MemoryContext cluster_context;
202 * We cannot run this form of CLUSTER inside a user transaction block;
203 * we'd be holding locks way too long.
205 PreventInTransactionBlock(isTopLevel, "CLUSTER");
208 * Create special memory context for cross-transaction storage.
210 * Since it is a child of PortalContext, it will go away even in case
213 cluster_context = AllocSetContextCreate(PortalContext,
215 ALLOCSET_DEFAULT_SIZES);
218 * Build the list of relations to cluster. Note that this lives in
221 rvs = get_tables_to_cluster(cluster_context);
223 /* Commit to get out of starting transaction */
225 CommitTransactionCommand();
227 /* Ok, now that we've got them all, cluster them one by one */
230 RelToCluster *rvtc = (RelToCluster *) lfirst(rv);
232 /* Start a new transaction for each relation. */
233 StartTransactionCommand();
234 /* functions in indexes may want a snapshot set */
235 PushActiveSnapshot(GetTransactionSnapshot());
237 cluster_rel(rvtc->tableOid, rvtc->indexOid, true, stmt->verbose);
239 CommitTransactionCommand();
242 /* Start a new transaction for the cleanup work. */
243 StartTransactionCommand();
245 /* Clean up working storage */
246 MemoryContextDelete(cluster_context);
253 * This clusters the table by creating a new, clustered table and
254 * swapping the relfilenodes of the new table and the old table, so
255 * the OID of the original table is preserved. Thus we do not lose
256 * GRANT, inheritance nor references to this table (this was a bug
257 * in releases through 7.3).
259 * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
260 * the new table, it's better to create the indexes afterwards than to fill
261 * them incrementally while we load the table.
263 * If indexOid is InvalidOid, the table will be rewritten in physical order
264 * instead of index order. This is the new implementation of VACUUM FULL,
265 * and error messages should refer to the operation as VACUUM not CLUSTER.
268 cluster_rel(Oid tableOid, Oid indexOid, bool recheck, bool verbose)
272 /* Check for user-requested abort. */
273 CHECK_FOR_INTERRUPTS();
276 * We grab exclusive access to the target rel and index for the duration
277 * of the transaction. (This is redundant for the single-transaction
278 * case, since cluster() already did it.) The index lock is taken inside
279 * check_index_is_clusterable.
281 OldHeap = try_relation_open(tableOid, AccessExclusiveLock);
283 /* If the table has gone away, we can skip processing it */
288 * Since we may open a new transaction for each relation, we have to check
289 * that the relation still is what we think it is.
291 * If this is a single-transaction CLUSTER, we can skip these tests. We
292 * *must* skip the one on indisclustered since it would reject an attempt
293 * to cluster a not-previously-clustered index.
298 Form_pg_index indexForm;
300 /* Check that the user still owns the relation */
301 if (!pg_class_ownercheck(tableOid, GetUserId()))
303 relation_close(OldHeap, AccessExclusiveLock);
308 * Silently skip a temp table for a remote session. Only doing this
309 * check in the "recheck" case is appropriate (which currently means
310 * somebody is executing a database-wide CLUSTER), because there is
311 * another check in cluster() which will stop any attempt to cluster
312 * remote temp tables by name. There is another check in cluster_rel
313 * which is redundant, but we leave it for extra safety.
315 if (RELATION_IS_OTHER_TEMP(OldHeap))
317 relation_close(OldHeap, AccessExclusiveLock);
321 if (OidIsValid(indexOid))
324 * Check that the index still exists
326 if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid)))
328 relation_close(OldHeap, AccessExclusiveLock);
333 * Check that the index is still the one with indisclustered set.
335 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexOid));
336 if (!HeapTupleIsValid(tuple)) /* probably can't happen */
338 relation_close(OldHeap, AccessExclusiveLock);
341 indexForm = (Form_pg_index) GETSTRUCT(tuple);
342 if (!indexForm->indisclustered)
344 ReleaseSysCache(tuple);
345 relation_close(OldHeap, AccessExclusiveLock);
348 ReleaseSysCache(tuple);
353 * We allow VACUUM FULL, but not CLUSTER, on shared catalogs. CLUSTER
354 * would work in most respects, but the index would only get marked as
355 * indisclustered in the current database, leading to unexpected behavior
356 * if CLUSTER were later invoked in another database.
358 if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
360 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
361 errmsg("cannot cluster a shared catalog")));
364 * Don't process temp tables of other backends ... their local buffer
365 * manager is not going to cope.
367 if (RELATION_IS_OTHER_TEMP(OldHeap))
369 if (OidIsValid(indexOid))
371 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
372 errmsg("cannot cluster temporary tables of other sessions")));
375 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
376 errmsg("cannot vacuum temporary tables of other sessions")));
380 * Also check for active uses of the relation in the current transaction,
381 * including open scans and pending AFTER trigger events.
383 CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM");
385 /* Check heap and index are valid to cluster on */
386 if (OidIsValid(indexOid))
387 check_index_is_clusterable(OldHeap, indexOid, recheck, AccessExclusiveLock);
390 * Quietly ignore the request if this is a materialized view which has not
391 * been populated from its query. No harm is done because there is no data
392 * to deal with, and we don't want to throw an error if this is part of a
393 * multi-relation request -- for example, CLUSTER was run on the entire
396 if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW &&
397 !RelationIsPopulated(OldHeap))
399 relation_close(OldHeap, AccessExclusiveLock);
404 * All predicate locks on the tuples or pages are about to be made
405 * invalid, because we move tuples around. Promote them to relation
406 * locks. Predicate locks on indexes will be promoted when they are
409 TransferPredicateLocksToHeapRelation(OldHeap);
411 /* rebuild_relation does all the dirty work */
412 rebuild_relation(OldHeap, indexOid, verbose);
414 /* NB: rebuild_relation does heap_close() on OldHeap */
418 * Verify that the specified heap and index are valid to cluster on
420 * Side effect: obtains lock on the index. The caller may
421 * in some cases already have AccessExclusiveLock on the table, but
422 * not in all cases so we can't rely on the table-level lock for
426 check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMODE lockmode)
430 OldIndex = index_open(indexOid, lockmode);
433 * Check that index is in fact an index on the given relation
435 if (OldIndex->rd_index == NULL ||
436 OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
438 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
439 errmsg("\"%s\" is not an index for table \"%s\"",
440 RelationGetRelationName(OldIndex),
441 RelationGetRelationName(OldHeap))));
443 /* Index AM must allow clustering */
444 if (!OldIndex->rd_amroutine->amclusterable)
446 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
447 errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
448 RelationGetRelationName(OldIndex))));
451 * Disallow clustering on incomplete indexes (those that might not index
452 * every row of the relation). We could relax this by making a separate
453 * seqscan pass over the table to copy the missing rows, but that seems
454 * expensive and tedious.
456 if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL))
458 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
459 errmsg("cannot cluster on partial index \"%s\"",
460 RelationGetRelationName(OldIndex))));
463 * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
464 * it might well not contain entries for every heap row, or might not even
465 * be internally consistent. (But note that we don't check indcheckxmin;
466 * the worst consequence of following broken HOT chains would be that we
467 * might put recently-dead tuples out-of-order in the new table, and there
468 * is little harm in that.)
470 if (!IndexIsValid(OldIndex->rd_index))
472 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
473 errmsg("cannot cluster on invalid index \"%s\"",
474 RelationGetRelationName(OldIndex))));
476 /* Drop relcache refcnt on OldIndex, but keep lock */
477 index_close(OldIndex, NoLock);
481 * mark_index_clustered: mark the specified index as the one clustered on
483 * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
486 mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
488 HeapTuple indexTuple;
489 Form_pg_index indexForm;
493 /* Disallow applying to a partitioned table */
494 if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
496 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
497 errmsg("cannot mark index clustered in partitioned table")));
500 * If the index is already marked clustered, no need to do anything.
502 if (OidIsValid(indexOid))
504 indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexOid));
505 if (!HeapTupleIsValid(indexTuple))
506 elog(ERROR, "cache lookup failed for index %u", indexOid);
507 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
509 if (indexForm->indisclustered)
511 ReleaseSysCache(indexTuple);
515 ReleaseSysCache(indexTuple);
519 * Check each index of the relation and set/clear the bit as needed.
521 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
523 foreach(index, RelationGetIndexList(rel))
525 Oid thisIndexOid = lfirst_oid(index);
527 indexTuple = SearchSysCacheCopy1(INDEXRELID,
528 ObjectIdGetDatum(thisIndexOid));
529 if (!HeapTupleIsValid(indexTuple))
530 elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
531 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
534 * Unset the bit if set. We know it's wrong because we checked this
537 if (indexForm->indisclustered)
539 indexForm->indisclustered = false;
540 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
542 else if (thisIndexOid == indexOid)
544 /* this was checked earlier, but let's be real sure */
545 if (!IndexIsValid(indexForm))
546 elog(ERROR, "cannot cluster on invalid index %u", indexOid);
547 indexForm->indisclustered = true;
548 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
551 InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
552 InvalidOid, is_internal);
554 heap_freetuple(indexTuple);
557 heap_close(pg_index, RowExclusiveLock);
561 * rebuild_relation: rebuild an existing relation in index or physical order
563 * OldHeap: table to rebuild --- must be opened and exclusive-locked!
564 * indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
566 * NB: this routine closes OldHeap at the right time; caller should not.
569 rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
571 Oid tableOid = RelationGetRelid(OldHeap);
572 Oid tableSpace = OldHeap->rd_rel->reltablespace;
575 bool is_system_catalog;
576 bool swap_toast_by_content;
577 TransactionId frozenXid;
578 MultiXactId cutoffMulti;
580 /* Mark the correct index as clustered */
581 if (OidIsValid(indexOid))
582 mark_index_clustered(OldHeap, indexOid, true);
584 /* Remember info about rel before closing OldHeap */
585 relpersistence = OldHeap->rd_rel->relpersistence;
586 is_system_catalog = IsSystemRelation(OldHeap);
588 /* Close relcache entry, but keep lock until transaction commit */
589 heap_close(OldHeap, NoLock);
591 /* Create the transient table that will receive the re-ordered data */
592 OIDNewHeap = make_new_heap(tableOid, tableSpace,
594 AccessExclusiveLock);
596 /* Copy the heap data into the new table in the desired order */
597 copy_heap_data(OIDNewHeap, tableOid, indexOid, verbose,
598 &swap_toast_by_content, &frozenXid, &cutoffMulti);
601 * Swap the physical files of the target and transient tables, then
602 * rebuild the target's indexes and throw away the transient table.
604 finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
605 swap_toast_by_content, false, true,
606 frozenXid, cutoffMulti,
612 * Create the transient table that will be filled with new data during
613 * CLUSTER, ALTER TABLE, and similar operations. The transient table
614 * duplicates the logical structure of the OldHeap, but is placed in
615 * NewTableSpace which might be different from OldHeap's. Also, it's built
616 * with the specified persistence, which might differ from the original's.
618 * After this, the caller should load the new heap with transferred/modified
619 * data, then call finish_heap_swap to complete the operation.
622 make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence,
625 TupleDesc OldHeapDesc;
626 char NewHeapName[NAMEDATALEN];
635 OldHeap = heap_open(OIDOldHeap, lockmode);
636 OldHeapDesc = RelationGetDescr(OldHeap);
639 * Note that the NewHeap will not receive any of the defaults or
640 * constraints associated with the OldHeap; we don't need 'em, and there's
641 * no reason to spend cycles inserting them into the catalogs only to
646 * But we do want to use reloptions of the old heap for new heap.
648 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap));
649 if (!HeapTupleIsValid(tuple))
650 elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
651 reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
654 reloptions = (Datum) 0;
656 if (relpersistence == RELPERSISTENCE_TEMP)
657 namespaceid = LookupCreationNamespace("pg_temp");
659 namespaceid = RelationGetNamespace(OldHeap);
662 * Create the new heap, using a temporary name in the same namespace as
663 * the existing table. NOTE: there is some risk of collision with user
664 * relnames. Working around this seems more trouble than it's worth; in
665 * particular, we can't create the new heap in a different namespace from
666 * the old, or we will have problems with the TEMP status of temp tables.
668 * Note: the new heap is not a shared relation, even if we are rebuilding
669 * a shared rel. However, we do make the new heap mapped if the source is
670 * mapped. This simplifies swap_relation_files, and is absolutely
671 * necessary for rebuilding pg_class, for reasons explained there.
673 snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
675 OIDNewHeap = heap_create_with_catalog(NewHeapName,
681 OldHeap->rd_rel->relowner,
687 RelationIsMapped(OldHeap),
697 Assert(OIDNewHeap != InvalidOid);
699 ReleaseSysCache(tuple);
702 * Advance command counter so that the newly-created relation's catalog
703 * tuples will be visible to heap_open.
705 CommandCounterIncrement();
708 * If necessary, create a TOAST table for the new relation.
710 * If the relation doesn't have a TOAST table already, we can't need one
711 * for the new relation. The other way around is possible though: if some
712 * wide columns have been dropped, NewHeapCreateToastTable can decide that
713 * no TOAST table is needed for the new table.
715 * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
716 * that the TOAST table will be visible for insertion.
718 toastid = OldHeap->rd_rel->reltoastrelid;
719 if (OidIsValid(toastid))
721 /* keep the existing toast table's reloptions, if any */
722 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
723 if (!HeapTupleIsValid(tuple))
724 elog(ERROR, "cache lookup failed for relation %u", toastid);
725 reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
728 reloptions = (Datum) 0;
730 NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode);
732 ReleaseSysCache(tuple);
735 heap_close(OldHeap, NoLock);
741 * Do the physical copying of heap data.
743 * There are three output parameters:
744 * *pSwapToastByContent is set true if toast tables must be swapped by content.
745 * *pFreezeXid receives the TransactionId used as freeze cutoff point.
746 * *pCutoffMulti receives the MultiXactId used as a cutoff point.
749 copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
750 bool *pSwapToastByContent, TransactionId *pFreezeXid,
751 MultiXactId *pCutoffMulti)
756 Relation relRelation;
758 Form_pg_class relform;
759 TupleDesc oldTupDesc;
760 TupleDesc newTupDesc;
764 IndexScanDesc indexScan;
765 HeapScanDesc heapScan;
767 bool is_system_catalog;
768 TransactionId OldestXmin;
769 TransactionId FreezeXid;
770 MultiXactId MultiXactCutoff;
771 RewriteState rwstate;
773 Tuplesortstate *tuplesort;
774 double num_tuples = 0,
776 tups_recently_dead = 0;
777 BlockNumber num_pages;
778 int elevel = verbose ? INFO : DEBUG2;
781 pg_rusage_init(&ru0);
784 * Open the relations we need.
786 NewHeap = heap_open(OIDNewHeap, AccessExclusiveLock);
787 OldHeap = heap_open(OIDOldHeap, AccessExclusiveLock);
788 if (OidIsValid(OIDOldIndex))
789 OldIndex = index_open(OIDOldIndex, AccessExclusiveLock);
794 * Their tuple descriptors should be exactly alike, but here we only need
795 * assume that they have the same number of columns.
797 oldTupDesc = RelationGetDescr(OldHeap);
798 newTupDesc = RelationGetDescr(NewHeap);
799 Assert(newTupDesc->natts == oldTupDesc->natts);
801 /* Preallocate values/isnull arrays */
802 natts = newTupDesc->natts;
803 values = (Datum *) palloc(natts * sizeof(Datum));
804 isnull = (bool *) palloc(natts * sizeof(bool));
807 * If the OldHeap has a toast table, get lock on the toast table to keep
808 * it from being vacuumed. This is needed because autovacuum processes
809 * toast tables independently of their main tables, with no lock on the
810 * latter. If an autovacuum were to start on the toast table after we
811 * compute our OldestXmin below, it would use a later OldestXmin, and then
812 * possibly remove as DEAD toast tuples belonging to main tuples we think
813 * are only RECENTLY_DEAD. Then we'd fail while trying to copy those
816 * We don't need to open the toast relation here, just lock it. The lock
817 * will be held till end of transaction.
819 if (OldHeap->rd_rel->reltoastrelid)
820 LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
823 * We need to log the copied data in WAL iff WAL archiving/streaming is
824 * enabled AND it's a WAL-logged rel.
826 use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap);
828 /* use_wal off requires smgr_targblock be initially invalid */
829 Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
832 * If both tables have TOAST tables, perform toast swap by content. It is
833 * possible that the old table has a toast table but the new one doesn't,
834 * if toastable columns have been dropped. In that case we have to do
835 * swap by links. This is okay because swap by content is only essential
836 * for system catalogs, and we don't support schema changes for them.
838 if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
840 *pSwapToastByContent = true;
843 * When doing swap by content, any toast pointers written into NewHeap
844 * must use the old toast table's OID, because that's where the toast
845 * data will eventually be found. Set this up by setting rd_toastoid.
846 * This also tells toast_save_datum() to preserve the toast value
847 * OIDs, which we want so as not to invalidate toast pointers in
848 * system catalog caches, and to avoid making multiple copies of a
849 * single toast value.
851 * Note that we must hold NewHeap open until we are done writing data,
852 * since the relcache will not guarantee to remember this setting once
853 * the relation is closed. Also, this technique depends on the fact
854 * that no one will try to read from the NewHeap until after we've
855 * finished writing it and swapping the rels --- otherwise they could
856 * follow the toast pointers to the wrong place. (It would actually
857 * work for values copied over from the old toast table, but not for
858 * any values that we toast which were previously not toasted.)
860 NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
863 *pSwapToastByContent = false;
866 * Compute xids used to freeze and weed out dead tuples and multixacts.
867 * Since we're going to rewrite the whole table anyway, there's no reason
868 * not to be aggressive about this.
870 vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0,
871 &OldestXmin, &FreezeXid, NULL, &MultiXactCutoff,
875 * FreezeXid will become the table's new relfrozenxid, and that mustn't go
876 * backwards, so take the max.
878 if (TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid))
879 FreezeXid = OldHeap->rd_rel->relfrozenxid;
882 * MultiXactCutoff, similarly, shouldn't go backwards either.
884 if (MultiXactIdPrecedes(MultiXactCutoff, OldHeap->rd_rel->relminmxid))
885 MultiXactCutoff = OldHeap->rd_rel->relminmxid;
887 /* return selected values to caller */
888 *pFreezeXid = FreezeXid;
889 *pCutoffMulti = MultiXactCutoff;
891 /* Remember if it's a system catalog */
892 is_system_catalog = IsSystemRelation(OldHeap);
894 /* Initialize the rewrite operation */
895 rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid,
896 MultiXactCutoff, use_wal);
899 * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
900 * the OldHeap. We know how to use a sort to duplicate the ordering of a
901 * btree index, and will use seqscan-and-sort for that case if the planner
902 * tells us it's cheaper. Otherwise, always indexscan if an index is
903 * provided, else plain seqscan.
905 if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
906 use_sort = plan_cluster_use_sort(OIDOldHeap, OIDOldIndex);
910 /* Set up sorting if wanted */
912 tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
913 maintenance_work_mem,
919 * Prepare to scan the OldHeap. To ensure we see recently-dead tuples
920 * that still need to be copied, we scan with SnapshotAny and use
921 * HeapTupleSatisfiesVacuum for the visibility test.
923 if (OldIndex != NULL && !use_sort)
926 indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
927 index_rescan(indexScan, NULL, 0, NULL, 0);
931 heapScan = heap_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
935 /* Log what we're doing */
936 if (indexScan != NULL)
938 (errmsg("clustering \"%s.%s\" using index scan on \"%s\"",
939 get_namespace_name(RelationGetNamespace(OldHeap)),
940 RelationGetRelationName(OldHeap),
941 RelationGetRelationName(OldIndex))));
942 else if (tuplesort != NULL)
944 (errmsg("clustering \"%s.%s\" using sequential scan and sort",
945 get_namespace_name(RelationGetNamespace(OldHeap)),
946 RelationGetRelationName(OldHeap))));
949 (errmsg("vacuuming \"%s.%s\"",
950 get_namespace_name(RelationGetNamespace(OldHeap)),
951 RelationGetRelationName(OldHeap))));
954 * Scan through the OldHeap, either in OldIndex order or sequentially;
955 * copy each tuple into the NewHeap, or transiently to the tuplesort
956 * module. Note that we don't bother sorting dead tuples (they won't get
957 * to the new table anyway).
965 CHECK_FOR_INTERRUPTS();
967 if (indexScan != NULL)
969 tuple = index_getnext(indexScan, ForwardScanDirection);
973 /* Since we used no scan keys, should never need to recheck */
974 if (indexScan->xs_recheck)
975 elog(ERROR, "CLUSTER does not support lossy index conditions");
977 buf = indexScan->xs_cbuf;
981 tuple = heap_getnext(heapScan, ForwardScanDirection);
985 buf = heapScan->rs_cbuf;
988 LockBuffer(buf, BUFFER_LOCK_SHARE);
990 switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
993 /* Definitely dead */
996 case HEAPTUPLE_RECENTLY_DEAD:
997 tups_recently_dead += 1;
1000 /* Live or recently dead, must copy it */
1003 case HEAPTUPLE_INSERT_IN_PROGRESS:
1006 * Since we hold exclusive lock on the relation, normally the
1007 * only way to see this is if it was inserted earlier in our
1008 * own transaction. However, it can happen in system
1009 * catalogs, since we tend to release write lock before commit
1010 * there. Give a warning if neither case applies; but in any
1011 * case we had better copy it.
1013 if (!is_system_catalog &&
1014 !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
1015 elog(WARNING, "concurrent insert in progress within table \"%s\"",
1016 RelationGetRelationName(OldHeap));
1020 case HEAPTUPLE_DELETE_IN_PROGRESS:
1023 * Similar situation to INSERT_IN_PROGRESS case.
1025 if (!is_system_catalog &&
1026 !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data)))
1027 elog(WARNING, "concurrent delete in progress within table \"%s\"",
1028 RelationGetRelationName(OldHeap));
1029 /* treat as recently dead */
1030 tups_recently_dead += 1;
1034 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1035 isdead = false; /* keep compiler quiet */
1039 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1044 /* heap rewrite module still needs to see it... */
1045 if (rewrite_heap_dead_tuple(rwstate, tuple))
1047 /* A previous recently-dead tuple is now known dead */
1049 tups_recently_dead -= 1;
1055 if (tuplesort != NULL)
1056 tuplesort_putheaptuple(tuplesort, tuple);
1058 reform_and_rewrite_tuple(tuple,
1059 oldTupDesc, newTupDesc,
1061 NewHeap->rd_rel->relhasoids, rwstate);
1064 if (indexScan != NULL)
1065 index_endscan(indexScan);
1066 if (heapScan != NULL)
1067 heap_endscan(heapScan);
1070 * In scan-and-sort mode, complete the sort, then read out all live tuples
1071 * from the tuplestore and write them to the new relation.
1073 if (tuplesort != NULL)
1075 tuplesort_performsort(tuplesort);
1081 CHECK_FOR_INTERRUPTS();
1083 tuple = tuplesort_getheaptuple(tuplesort, true);
1087 reform_and_rewrite_tuple(tuple,
1088 oldTupDesc, newTupDesc,
1090 NewHeap->rd_rel->relhasoids, rwstate);
1093 tuplesort_end(tuplesort);
1096 /* Write out any remaining tuples, and fsync if needed */
1097 end_heap_rewrite(rwstate);
1099 /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
1100 NewHeap->rd_toastoid = InvalidOid;
1102 num_pages = RelationGetNumberOfBlocks(NewHeap);
1104 /* Log what we did */
1106 (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
1107 RelationGetRelationName(OldHeap),
1108 tups_vacuumed, num_tuples,
1109 RelationGetNumberOfBlocks(OldHeap)),
1110 errdetail("%.0f dead row versions cannot be removed yet.\n"
1113 pg_rusage_show(&ru0))));
1119 if (OldIndex != NULL)
1120 index_close(OldIndex, NoLock);
1121 heap_close(OldHeap, NoLock);
1122 heap_close(NewHeap, NoLock);
1124 /* Update pg_class to reflect the correct values of pages and tuples. */
1125 relRelation = heap_open(RelationRelationId, RowExclusiveLock);
1127 reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDNewHeap));
1128 if (!HeapTupleIsValid(reltup))
1129 elog(ERROR, "cache lookup failed for relation %u", OIDNewHeap);
1130 relform = (Form_pg_class) GETSTRUCT(reltup);
1132 relform->relpages = num_pages;
1133 relform->reltuples = num_tuples;
1135 /* Don't update the stats for pg_class. See swap_relation_files. */
1136 if (OIDOldHeap != RelationRelationId)
1137 CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
1139 CacheInvalidateRelcacheByTuple(reltup);
1142 heap_freetuple(reltup);
1143 heap_close(relRelation, RowExclusiveLock);
1145 /* Make the update visible */
1146 CommandCounterIncrement();
1150 * Swap the physical files of two given relations.
1152 * We swap the physical identity (reltablespace, relfilenode) while keeping the
1153 * same logical identities of the two relations. relpersistence is also
1154 * swapped, which is critical since it determines where buffers live for each
1157 * We can swap associated TOAST data in either of two ways: recursively swap
1158 * the physical content of the toast tables (and their indexes), or swap the
1159 * TOAST links in the given relations' pg_class entries. The former is needed
1160 * to manage rewrites of shared catalogs (where we cannot change the pg_class
1161 * links) while the latter is the only way to handle cases in which a toast
1162 * table is added or removed altogether.
1164 * Additionally, the first relation is marked with relfrozenxid set to
1165 * frozenXid. It seems a bit ugly to have this here, but the caller would
1166 * have to do it anyway, so having it here saves a heap_update. Note: in
1167 * the swap-toast-links case, we assume we don't need to change the toast
1168 * table's relfrozenxid: the new version of the toast table should already
1169 * have relfrozenxid set to RecentXmin, which is good enough.
1171 * Lastly, if r2 and its toast table and toast index (if any) are mapped,
1172 * their OIDs are emitted into mapped_tables[]. This is hacky but beats
1173 * having to look the information up again later in finish_heap_swap.
1176 swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
1177 bool swap_toast_by_content,
1179 TransactionId frozenXid,
1180 MultiXactId cutoffMulti,
1183 Relation relRelation;
1186 Form_pg_class relform1,
1193 /* We need writable copies of both pg_class tuples. */
1194 relRelation = heap_open(RelationRelationId, RowExclusiveLock);
1196 reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1));
1197 if (!HeapTupleIsValid(reltup1))
1198 elog(ERROR, "cache lookup failed for relation %u", r1);
1199 relform1 = (Form_pg_class) GETSTRUCT(reltup1);
1201 reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2));
1202 if (!HeapTupleIsValid(reltup2))
1203 elog(ERROR, "cache lookup failed for relation %u", r2);
1204 relform2 = (Form_pg_class) GETSTRUCT(reltup2);
1206 relfilenode1 = relform1->relfilenode;
1207 relfilenode2 = relform2->relfilenode;
1209 if (OidIsValid(relfilenode1) && OidIsValid(relfilenode2))
1212 * Normal non-mapped relations: swap relfilenodes, reltablespaces,
1215 Assert(!target_is_pg_class);
1217 swaptemp = relform1->relfilenode;
1218 relform1->relfilenode = relform2->relfilenode;
1219 relform2->relfilenode = swaptemp;
1221 swaptemp = relform1->reltablespace;
1222 relform1->reltablespace = relform2->reltablespace;
1223 relform2->reltablespace = swaptemp;
1225 swptmpchr = relform1->relpersistence;
1226 relform1->relpersistence = relform2->relpersistence;
1227 relform2->relpersistence = swptmpchr;
1229 /* Also swap toast links, if we're swapping by links */
1230 if (!swap_toast_by_content)
1232 swaptemp = relform1->reltoastrelid;
1233 relform1->reltoastrelid = relform2->reltoastrelid;
1234 relform2->reltoastrelid = swaptemp;
1240 * Mapped-relation case. Here we have to swap the relation mappings
1241 * instead of modifying the pg_class columns. Both must be mapped.
1243 if (OidIsValid(relfilenode1) || OidIsValid(relfilenode2))
1244 elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
1245 NameStr(relform1->relname));
1248 * We can't change the tablespace nor persistence of a mapped rel, and
1249 * we can't handle toast link swapping for one either, because we must
1250 * not apply any critical changes to its pg_class row. These cases
1251 * should be prevented by upstream permissions tests, so these checks
1252 * are non-user-facing emergency backstop.
1254 if (relform1->reltablespace != relform2->reltablespace)
1255 elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
1256 NameStr(relform1->relname));
1257 if (relform1->relpersistence != relform2->relpersistence)
1258 elog(ERROR, "cannot change persistence of mapped relation \"%s\"",
1259 NameStr(relform1->relname));
1260 if (!swap_toast_by_content &&
1261 (relform1->reltoastrelid || relform2->reltoastrelid))
1262 elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
1263 NameStr(relform1->relname));
1266 * Fetch the mappings --- shouldn't fail, but be paranoid
1268 relfilenode1 = RelationMapOidToFilenode(r1, relform1->relisshared);
1269 if (!OidIsValid(relfilenode1))
1270 elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1271 NameStr(relform1->relname), r1);
1272 relfilenode2 = RelationMapOidToFilenode(r2, relform2->relisshared);
1273 if (!OidIsValid(relfilenode2))
1274 elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1275 NameStr(relform2->relname), r2);
1278 * Send replacement mappings to relmapper. Note these won't actually
1279 * take effect until CommandCounterIncrement.
1281 RelationMapUpdateMap(r1, relfilenode2, relform1->relisshared, false);
1282 RelationMapUpdateMap(r2, relfilenode1, relform2->relisshared, false);
1284 /* Pass OIDs of mapped r2 tables back to caller */
1285 *mapped_tables++ = r2;
1289 * In the case of a shared catalog, these next few steps will only affect
1290 * our own database's pg_class row; but that's okay, because they are all
1291 * noncritical updates. That's also an important fact for the case of a
1292 * mapped catalog, because it's possible that we'll commit the map change
1293 * and then fail to commit the pg_class update.
1296 /* set rel1's frozen Xid and minimum MultiXid */
1297 if (relform1->relkind != RELKIND_INDEX)
1299 Assert(TransactionIdIsNormal(frozenXid));
1300 relform1->relfrozenxid = frozenXid;
1301 Assert(MultiXactIdIsValid(cutoffMulti));
1302 relform1->relminmxid = cutoffMulti;
1305 /* swap size statistics too, since new rel has freshly-updated stats */
1309 int32 swap_allvisible;
1311 swap_pages = relform1->relpages;
1312 relform1->relpages = relform2->relpages;
1313 relform2->relpages = swap_pages;
1315 swap_tuples = relform1->reltuples;
1316 relform1->reltuples = relform2->reltuples;
1317 relform2->reltuples = swap_tuples;
1319 swap_allvisible = relform1->relallvisible;
1320 relform1->relallvisible = relform2->relallvisible;
1321 relform2->relallvisible = swap_allvisible;
1325 * Update the tuples in pg_class --- unless the target relation of the
1326 * swap is pg_class itself. In that case, there is zero point in making
1327 * changes because we'd be updating the old data that we're about to throw
1328 * away. Because the real work being done here for a mapped relation is
1329 * just to change the relation map settings, it's all right to not update
1330 * the pg_class rows in this case. The most important changes will instead
1331 * performed later, in finish_heap_swap() itself.
1333 if (!target_is_pg_class)
1335 CatalogIndexState indstate;
1337 indstate = CatalogOpenIndexes(relRelation);
1338 CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1,
1340 CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2,
1342 CatalogCloseIndexes(indstate);
1346 /* no update ... but we do still need relcache inval */
1347 CacheInvalidateRelcacheByTuple(reltup1);
1348 CacheInvalidateRelcacheByTuple(reltup2);
1352 * Post alter hook for modified relations. The change to r2 is always
1353 * internal, but r1 depends on the invocation context.
1355 InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0,
1356 InvalidOid, is_internal);
1357 InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0,
1361 * If we have toast tables associated with the relations being swapped,
1362 * deal with them too.
1364 if (relform1->reltoastrelid || relform2->reltoastrelid)
1366 if (swap_toast_by_content)
1368 if (relform1->reltoastrelid && relform2->reltoastrelid)
1370 /* Recursively swap the contents of the toast tables */
1371 swap_relation_files(relform1->reltoastrelid,
1372 relform2->reltoastrelid,
1374 swap_toast_by_content,
1382 /* caller messed up */
1383 elog(ERROR, "cannot swap toast files by content when there's only one");
1389 * We swapped the ownership links, so we need to change dependency
1392 * NOTE: it is possible that only one table has a toast table.
1394 * NOTE: at present, a TOAST table's only dependency is the one on
1395 * its owning table. If more are ever created, we'd need to use
1396 * something more selective than deleteDependencyRecordsFor() to
1397 * get rid of just the link we want.
1399 ObjectAddress baseobject,
1404 * We disallow this case for system catalogs, to avoid the
1405 * possibility that the catalog we're rebuilding is one of the
1406 * ones the dependency changes would change. It's too late to be
1407 * making any data changes to the target catalog.
1409 if (IsSystemClass(r1, relform1))
1410 elog(ERROR, "cannot swap toast files by links for system catalogs");
1412 /* Delete old dependencies */
1413 if (relform1->reltoastrelid)
1415 count = deleteDependencyRecordsFor(RelationRelationId,
1416 relform1->reltoastrelid,
1419 elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1422 if (relform2->reltoastrelid)
1424 count = deleteDependencyRecordsFor(RelationRelationId,
1425 relform2->reltoastrelid,
1428 elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1432 /* Register new dependencies */
1433 baseobject.classId = RelationRelationId;
1434 baseobject.objectSubId = 0;
1435 toastobject.classId = RelationRelationId;
1436 toastobject.objectSubId = 0;
1438 if (relform1->reltoastrelid)
1440 baseobject.objectId = r1;
1441 toastobject.objectId = relform1->reltoastrelid;
1442 recordDependencyOn(&toastobject, &baseobject,
1443 DEPENDENCY_INTERNAL);
1446 if (relform2->reltoastrelid)
1448 baseobject.objectId = r2;
1449 toastobject.objectId = relform2->reltoastrelid;
1450 recordDependencyOn(&toastobject, &baseobject,
1451 DEPENDENCY_INTERNAL);
1457 * If we're swapping two toast tables by content, do the same for their
1458 * valid index. The swap can actually be safely done only if the relations
1461 if (swap_toast_by_content &&
1462 relform1->relkind == RELKIND_TOASTVALUE &&
1463 relform2->relkind == RELKIND_TOASTVALUE)
1468 /* Get valid index for each relation */
1469 toastIndex1 = toast_get_valid_index(r1,
1470 AccessExclusiveLock);
1471 toastIndex2 = toast_get_valid_index(r2,
1472 AccessExclusiveLock);
1474 swap_relation_files(toastIndex1,
1477 swap_toast_by_content,
1479 InvalidTransactionId,
1485 heap_freetuple(reltup1);
1486 heap_freetuple(reltup2);
1488 heap_close(relRelation, RowExclusiveLock);
1491 * Close both relcache entries' smgr links. We need this kluge because
1492 * both links will be invalidated during upcoming CommandCounterIncrement.
1493 * Whichever of the rels is the second to be cleared will have a dangling
1494 * reference to the other's smgr entry. Rather than trying to avoid this
1495 * by ordering operations just so, it's easiest to close the links first.
1496 * (Fortunately, since one of the entries is local in our transaction,
1497 * it's sufficient to clear out our own relcache this way; the problem
1498 * cannot arise for other backends when they see our update on the
1499 * non-transient relation.)
1501 * Caution: the placement of this step interacts with the decision to
1502 * handle toast rels by recursion. When we are trying to rebuild pg_class
1503 * itself, the smgr close on pg_class must happen after all accesses in
1506 RelationCloseSmgrByOid(r1);
1507 RelationCloseSmgrByOid(r2);
1511 * Remove the transient table that was built by make_new_heap, and finish
1512 * cleaning up (including rebuilding all indexes on the old heap).
1515 finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
1516 bool is_system_catalog,
1517 bool swap_toast_by_content,
1518 bool check_constraints,
1520 TransactionId frozenXid,
1521 MultiXactId cutoffMulti,
1522 char newrelpersistence)
1524 ObjectAddress object;
1525 Oid mapped_tables[4];
1529 /* Zero out possible results from swapped_relation_files */
1530 memset(mapped_tables, 0, sizeof(mapped_tables));
1533 * Swap the contents of the heap relations (including any toast tables).
1534 * Also set old heap's relfrozenxid to frozenXid.
1536 swap_relation_files(OIDOldHeap, OIDNewHeap,
1537 (OIDOldHeap == RelationRelationId),
1538 swap_toast_by_content, is_internal,
1539 frozenXid, cutoffMulti, mapped_tables);
1542 * If it's a system catalog, queue a sinval message to flush all catcaches
1543 * on the catalog when we reach CommandCounterIncrement.
1545 if (is_system_catalog)
1546 CacheInvalidateCatalog(OIDOldHeap);
1549 * Rebuild each index on the relation (but not the toast table, which is
1550 * all-new at this point). It is important to do this before the DROP
1551 * step because if we are processing a system catalog that will be used
1552 * during DROP, we want to have its indexes available. There is no
1553 * advantage to the other order anyway because this is all transactional,
1554 * so no chance to reclaim disk space before commit. We do not need a
1555 * final CommandCounterIncrement() because reindex_relation does it.
1557 * Note: because index_build is called via reindex_relation, it will never
1558 * set indcheckxmin true for the indexes. This is OK even though in some
1559 * sense we are building new indexes rather than rebuilding existing ones,
1560 * because the new heap won't contain any HOT chains at all, let alone
1561 * broken ones, so it can't be necessary to set indcheckxmin.
1563 reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE;
1564 if (check_constraints)
1565 reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS;
1568 * Ensure that the indexes have the same persistence as the parent
1571 if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
1572 reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED;
1573 else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
1574 reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT;
1576 reindex_relation(OIDOldHeap, reindex_flags, 0);
1579 * If the relation being rebuild is pg_class, swap_relation_files()
1580 * couldn't update pg_class's own pg_class entry (check comments in
1581 * swap_relation_files()), thus relfrozenxid was not updated. That's
1582 * annoying because a potential reason for doing a VACUUM FULL is a
1583 * imminent or actual anti-wraparound shutdown. So, now that we can
1584 * access the new relation using its indices, update relfrozenxid.
1585 * pg_class doesn't have a toast relation, so we don't need to update the
1586 * corresponding toast relation. Not that there's little point moving all
1587 * relfrozenxid updates here since swap_relation_files() needs to write to
1588 * pg_class for non-mapped relations anyway.
1590 if (OIDOldHeap == RelationRelationId)
1592 Relation relRelation;
1594 Form_pg_class relform;
1596 relRelation = heap_open(RelationRelationId, RowExclusiveLock);
1598 reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap));
1599 if (!HeapTupleIsValid(reltup))
1600 elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
1601 relform = (Form_pg_class) GETSTRUCT(reltup);
1603 relform->relfrozenxid = frozenXid;
1604 relform->relminmxid = cutoffMulti;
1606 CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
1608 heap_close(relRelation, RowExclusiveLock);
1611 /* Destroy new heap with old filenode */
1612 object.classId = RelationRelationId;
1613 object.objectId = OIDNewHeap;
1614 object.objectSubId = 0;
1617 * The new relation is local to our transaction and we know nothing
1618 * depends on it, so DROP_RESTRICT should be OK.
1620 performDeletion(&object, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
1622 /* performDeletion does CommandCounterIncrement at end */
1625 * Now we must remove any relation mapping entries that we set up for the
1626 * transient table, as well as its toast table and toast index if any. If
1627 * we fail to do this before commit, the relmapper will complain about new
1628 * permanent map entries being added post-bootstrap.
1630 for (i = 0; OidIsValid(mapped_tables[i]); i++)
1631 RelationMapRemoveMapping(mapped_tables[i]);
1634 * At this point, everything is kosher except that, if we did toast swap
1635 * by links, the toast table's name corresponds to the transient table.
1636 * The name is irrelevant to the backend because it's referenced by OID,
1637 * but users looking at the catalogs could be confused. Rename it to
1638 * prevent this problem.
1640 * Note no lock required on the relation, because we already hold an
1641 * exclusive lock on it.
1643 if (!swap_toast_by_content)
1647 newrel = heap_open(OIDOldHeap, NoLock);
1648 if (OidIsValid(newrel->rd_rel->reltoastrelid))
1651 char NewToastName[NAMEDATALEN];
1653 /* Get the associated valid index to be renamed */
1654 toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid,
1657 /* rename the toast table ... */
1658 snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
1660 RenameRelationInternal(newrel->rd_rel->reltoastrelid,
1661 NewToastName, true);
1663 /* ... and its valid index too. */
1664 snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
1667 RenameRelationInternal(toastidx,
1668 NewToastName, true);
1670 relation_close(newrel, NoLock);
1673 /* if it's not a catalog table, clear any missing attribute settings */
1674 if (!is_system_catalog)
1678 newrel = heap_open(OIDOldHeap, NoLock);
1679 RelationClearMissing(newrel);
1680 relation_close(newrel, NoLock);
1686 * Get a list of tables that the current user owns and
1687 * have indisclustered set. Return the list in a List * of rvsToCluster
1688 * with the tableOid and the indexOid on which the table is already
1692 get_tables_to_cluster(MemoryContext cluster_context)
1694 Relation indRelation;
1697 HeapTuple indexTuple;
1698 Form_pg_index index;
1699 MemoryContext old_context;
1704 * Get all indexes that have indisclustered set and are owned by
1705 * appropriate user. System relations or nailed-in relations cannot ever
1706 * have indisclustered set, because CLUSTER will refuse to set it when
1707 * called with one of them as argument.
1709 indRelation = heap_open(IndexRelationId, AccessShareLock);
1711 Anum_pg_index_indisclustered,
1712 BTEqualStrategyNumber, F_BOOLEQ,
1713 BoolGetDatum(true));
1714 scan = heap_beginscan_catalog(indRelation, 1, &entry);
1715 while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1717 index = (Form_pg_index) GETSTRUCT(indexTuple);
1719 if (!pg_class_ownercheck(index->indrelid, GetUserId()))
1723 * We have to build the list in a different memory context so it will
1724 * survive the cross-transaction processing
1726 old_context = MemoryContextSwitchTo(cluster_context);
1728 rvtc = (RelToCluster *) palloc(sizeof(RelToCluster));
1729 rvtc->tableOid = index->indrelid;
1730 rvtc->indexOid = index->indexrelid;
1731 rvs = lcons(rvtc, rvs);
1733 MemoryContextSwitchTo(old_context);
1737 relation_close(indRelation, AccessShareLock);
1744 * Reconstruct and rewrite the given tuple
1746 * We cannot simply copy the tuple as-is, for several reasons:
1748 * 1. We'd like to squeeze out the values of any dropped columns, both
1749 * to save space and to ensure we have no corner-case failures. (It's
1750 * possible for example that the new table hasn't got a TOAST table
1751 * and so is unable to store any large values of dropped cols.)
1753 * 2. The tuple might not even be legal for the new table; this is
1754 * currently only known to happen as an after-effect of ALTER TABLE
1757 * So, we must reconstruct the tuple from component Datums.
1760 reform_and_rewrite_tuple(HeapTuple tuple,
1761 TupleDesc oldTupDesc, TupleDesc newTupDesc,
1762 Datum *values, bool *isnull,
1763 bool newRelHasOids, RewriteState rwstate)
1765 HeapTuple copiedTuple;
1768 heap_deform_tuple(tuple, oldTupDesc, values, isnull);
1770 /* Be sure to null out any dropped columns */
1771 for (i = 0; i < newTupDesc->natts; i++)
1773 if (TupleDescAttr(newTupDesc, i)->attisdropped)
1777 copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
1779 /* Preserve OID, if any */
1781 HeapTupleSetOid(copiedTuple, HeapTupleGetOid(tuple));
1783 /* The heap rewrite module does the rest */
1784 rewrite_heap_tuple(rwstate, tuple, copiedTuple);
1786 heap_freetuple(copiedTuple);