1 /*-------------------------------------------------------------------------
4 * CLUSTER a table on an index.
6 * There is hardly anything left of Paul Brown's original implementation...
9 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
10 * Portions Copyright (c) 1994-5, Regents of the University of California
14 * $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.165 2007/11/15 21:14:33 momjian Exp $
16 *-------------------------------------------------------------------------
20 #include "access/genam.h"
21 #include "access/heapam.h"
22 #include "access/rewriteheap.h"
23 #include "access/transam.h"
24 #include "access/xact.h"
25 #include "catalog/catalog.h"
26 #include "catalog/dependency.h"
27 #include "catalog/heap.h"
28 #include "catalog/index.h"
29 #include "catalog/indexing.h"
30 #include "catalog/namespace.h"
31 #include "catalog/toasting.h"
32 #include "commands/cluster.h"
33 #include "commands/vacuum.h"
34 #include "miscadmin.h"
35 #include "storage/procarray.h"
36 #include "utils/acl.h"
37 #include "utils/fmgroids.h"
38 #include "utils/inval.h"
39 #include "utils/lsyscache.h"
40 #include "utils/memutils.h"
41 #include "utils/relcache.h"
42 #include "utils/syscache.h"
46 * This struct is used to pass around the information on tables to be
47 * clustered. We need this so we can make a list of them when invoked without
48 * a specific table/index pair.
57 static void cluster_rel(RelToCluster *rv, bool recheck);
58 static void rebuild_relation(Relation OldHeap, Oid indexOid);
59 static TransactionId copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex);
60 static List *get_tables_to_cluster(MemoryContext cluster_context);
64 /*---------------------------------------------------------------------------
65 * This cluster code allows for clustering multiple tables at once. Because
66 * of this, we cannot just run everything on a single transaction, or we
67 * would be forced to acquire exclusive locks on all the tables being
68 * clustered, simultaneously --- very likely leading to deadlock.
70 * To solve this we follow a similar strategy to VACUUM code,
71 * clustering each relation in a separate transaction. For this to work,
73 * - provide a separate memory context so that we can pass information in
74 * a way that survives across transactions
75 * - start a new transaction every time a new relation is clustered
76 * - check for validity of the information on to-be-clustered relations,
77 * as someone might have deleted a relation behind our back, or
78 * clustered one on a different index
79 * - end the transaction
81 * The single-relation case does not have any such overhead.
83 * We also allow a relation to be specified without index. In that case,
84 * the indisclustered bit will be looked up, and an ERROR will be thrown
85 * if there is no index with the bit set.
86 *---------------------------------------------------------------------------
89 cluster(ClusterStmt *stmt, bool isTopLevel)
91 if (stmt->relation != NULL)
93 /* This is the single-relation case. */
95 indexOid = InvalidOid;
99 /* Find and lock the table */
100 rel = heap_openrv(stmt->relation, AccessExclusiveLock);
102 tableOid = RelationGetRelid(rel);
104 /* Check permissions */
105 if (!pg_class_ownercheck(tableOid, GetUserId()))
106 aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS,
107 RelationGetRelationName(rel));
110 * Reject clustering a remote temp table ... their local buffer
111 * manager is not going to cope.
113 if (isOtherTempNamespace(RelationGetNamespace(rel)))
115 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
116 errmsg("cannot cluster temporary tables of other sessions")));
118 if (stmt->indexname == NULL)
122 /* We need to find the index that has indisclustered set. */
123 foreach(index, RelationGetIndexList(rel))
126 Form_pg_index indexForm;
128 indexOid = lfirst_oid(index);
129 idxtuple = SearchSysCache(INDEXRELID,
130 ObjectIdGetDatum(indexOid),
132 if (!HeapTupleIsValid(idxtuple))
133 elog(ERROR, "cache lookup failed for index %u", indexOid);
134 indexForm = (Form_pg_index) GETSTRUCT(idxtuple);
135 if (indexForm->indisclustered)
137 ReleaseSysCache(idxtuple);
140 ReleaseSysCache(idxtuple);
141 indexOid = InvalidOid;
144 if (!OidIsValid(indexOid))
146 (errcode(ERRCODE_UNDEFINED_OBJECT),
147 errmsg("there is no previously clustered index for table \"%s\"",
148 stmt->relation->relname)));
153 * The index is expected to be in the same namespace as the
156 indexOid = get_relname_relid(stmt->indexname,
157 rel->rd_rel->relnamespace);
158 if (!OidIsValid(indexOid))
160 (errcode(ERRCODE_UNDEFINED_OBJECT),
161 errmsg("index \"%s\" for table \"%s\" does not exist",
162 stmt->indexname, stmt->relation->relname)));
165 /* All other checks are done in cluster_rel() */
166 rvtc.tableOid = tableOid;
167 rvtc.indexOid = indexOid;
169 /* close relation, keep lock till commit */
170 heap_close(rel, NoLock);
173 cluster_rel(&rvtc, false);
178 * This is the "multi relation" case. We need to cluster all tables
179 * that have some index with indisclustered set.
181 MemoryContext cluster_context;
186 * We cannot run this form of CLUSTER inside a user transaction block;
187 * we'd be holding locks way too long.
189 PreventTransactionChain(isTopLevel, "CLUSTER");
192 * Create special memory context for cross-transaction storage.
194 * Since it is a child of PortalContext, it will go away even in case
197 cluster_context = AllocSetContextCreate(PortalContext,
199 ALLOCSET_DEFAULT_MINSIZE,
200 ALLOCSET_DEFAULT_INITSIZE,
201 ALLOCSET_DEFAULT_MAXSIZE);
204 * Build the list of relations to cluster. Note that this lives in
207 rvs = get_tables_to_cluster(cluster_context);
209 /* Commit to get out of starting transaction */
210 CommitTransactionCommand();
212 /* Ok, now that we've got them all, cluster them one by one */
215 RelToCluster *rvtc = (RelToCluster *) lfirst(rv);
217 /* Start a new transaction for each relation. */
218 StartTransactionCommand();
219 /* functions in indexes may want a snapshot set */
220 ActiveSnapshot = CopySnapshot(GetTransactionSnapshot());
221 cluster_rel(rvtc, true);
222 CommitTransactionCommand();
225 /* Start a new transaction for the cleanup work. */
226 StartTransactionCommand();
228 /* Clean up working storage */
229 MemoryContextDelete(cluster_context);
236 * This clusters the table by creating a new, clustered table and
237 * swapping the relfilenodes of the new table and the old table, so
238 * the OID of the original table is preserved. Thus we do not lose
239 * GRANT, inheritance nor references to this table (this was a bug
240 * in releases thru 7.3).
242 * Also create new indexes and swap the filenodes with the old indexes the
243 * same way we do for the relation. Since we are effectively bulk-loading
244 * the new table, it's better to create the indexes afterwards than to fill
245 * them incrementally while we load the table.
248 cluster_rel(RelToCluster *rvtc, bool recheck)
252 /* Check for user-requested abort. */
253 CHECK_FOR_INTERRUPTS();
256 * We grab exclusive access to the target rel and index for the duration
257 * of the transaction. (This is redundant for the single-transaction
258 * case, since cluster() already did it.) The index lock is taken inside
259 * check_index_is_clusterable.
261 OldHeap = try_relation_open(rvtc->tableOid, AccessExclusiveLock);
263 /* If the table has gone away, we can skip processing it */
268 * Since we may open a new transaction for each relation, we have to check
269 * that the relation still is what we think it is.
271 * If this is a single-transaction CLUSTER, we can skip these tests. We
272 * *must* skip the one on indisclustered since it would reject an attempt
273 * to cluster a not-previously-clustered index.
278 Form_pg_index indexForm;
280 /* Check that the user still owns the relation */
281 if (!pg_class_ownercheck(rvtc->tableOid, GetUserId()))
283 relation_close(OldHeap, AccessExclusiveLock);
288 * Silently skip a temp table for a remote session. Only doing this
289 * check in the "recheck" case is appropriate (which currently means
290 * somebody is executing a database-wide CLUSTER), because there is
291 * another check in cluster() which will stop any attempt to cluster
292 * remote temp tables by name. There is another check in
293 * check_index_is_clusterable which is redundant, but we leave it for
296 if (isOtherTempNamespace(RelationGetNamespace(OldHeap)))
298 relation_close(OldHeap, AccessExclusiveLock);
303 * Check that the index still exists
305 if (!SearchSysCacheExists(RELOID,
306 ObjectIdGetDatum(rvtc->indexOid),
309 relation_close(OldHeap, AccessExclusiveLock);
314 * Check that the index is still the one with indisclustered set.
316 tuple = SearchSysCache(INDEXRELID,
317 ObjectIdGetDatum(rvtc->indexOid),
319 if (!HeapTupleIsValid(tuple)) /* probably can't happen */
321 relation_close(OldHeap, AccessExclusiveLock);
324 indexForm = (Form_pg_index) GETSTRUCT(tuple);
325 if (!indexForm->indisclustered)
327 ReleaseSysCache(tuple);
328 relation_close(OldHeap, AccessExclusiveLock);
331 ReleaseSysCache(tuple);
334 /* Check index is valid to cluster on */
335 check_index_is_clusterable(OldHeap, rvtc->indexOid, recheck);
337 /* rebuild_relation does all the dirty work */
338 rebuild_relation(OldHeap, rvtc->indexOid);
340 /* NB: rebuild_relation does heap_close() on OldHeap */
344 * Verify that the specified index is a legitimate index to cluster on
346 * Side effect: obtains exclusive lock on the index. The caller should
347 * already have exclusive lock on the table, so the index lock is likely
348 * redundant, but it seems best to grab it anyway to ensure the index
349 * definition can't change under us.
352 check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck)
356 OldIndex = index_open(indexOid, AccessExclusiveLock);
359 * Check that index is in fact an index on the given relation
361 if (OldIndex->rd_index == NULL ||
362 OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
364 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
365 errmsg("\"%s\" is not an index for table \"%s\"",
366 RelationGetRelationName(OldIndex),
367 RelationGetRelationName(OldHeap))));
370 * Disallow clustering on incomplete indexes (those that might not index
371 * every row of the relation). We could relax this by making a separate
372 * seqscan pass over the table to copy the missing rows, but that seems
373 * expensive and tedious.
375 if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred))
377 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
378 errmsg("cannot cluster on partial index \"%s\"",
379 RelationGetRelationName(OldIndex))));
381 if (!OldIndex->rd_am->amclusterable)
383 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
384 errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
385 RelationGetRelationName(OldIndex))));
387 if (!OldIndex->rd_am->amindexnulls)
392 * If the AM doesn't index nulls, then it's a partial index unless we
393 * can prove all the rows are non-null. Note we only need look at the
394 * first column; multicolumn-capable AMs are *required* to index nulls
395 * in columns after the first.
397 colno = OldIndex->rd_index->indkey.values[0];
400 /* ordinary user attribute */
401 if (!OldHeap->rd_att->attrs[colno - 1]->attnotnull)
403 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
404 errmsg("cannot cluster on index \"%s\" because access method does not handle null values",
405 RelationGetRelationName(OldIndex)),
407 ? errhint("You might be able to work around this by marking column \"%s\" NOT NULL, or use ALTER TABLE ... SET WITHOUT CLUSTER to remove the cluster specification from the table.",
408 NameStr(OldHeap->rd_att->attrs[colno - 1]->attname))
409 : errhint("You might be able to work around this by marking column \"%s\" NOT NULL.",
410 NameStr(OldHeap->rd_att->attrs[colno - 1]->attname))));
414 /* system column --- okay, always non-null */
417 /* index expression, lose... */
419 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
420 errmsg("cannot cluster on expressional index \"%s\" because its index access method does not handle null values",
421 RelationGetRelationName(OldIndex))));
425 * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
426 * it might well not contain entries for every heap row, or might not even
427 * be internally consistent. (But note that we don't check indcheckxmin;
428 * the worst consequence of following broken HOT chains would be that we
429 * might put recently-dead tuples out-of-order in the new table, and there
430 * is little harm in that.)
432 if (!OldIndex->rd_index->indisvalid)
434 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
435 errmsg("cannot cluster on invalid index \"%s\"",
436 RelationGetRelationName(OldIndex))));
439 * Disallow clustering system relations. This will definitely NOT work
440 * for shared relations (we have no way to update pg_class rows in other
441 * databases), nor for nailed-in-cache relations (the relfilenode values
442 * for those are hardwired, see relcache.c). It might work for other
443 * system relations, but I ain't gonna risk it.
445 if (IsSystemRelation(OldHeap))
447 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
448 errmsg("\"%s\" is a system catalog",
449 RelationGetRelationName(OldHeap))));
452 * Don't allow cluster on temp tables of other backends ... their local
453 * buffer manager is not going to cope.
455 if (isOtherTempNamespace(RelationGetNamespace(OldHeap)))
457 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
458 errmsg("cannot cluster temporary tables of other sessions")));
460 /* Drop relcache refcnt on OldIndex, but keep lock */
461 index_close(OldIndex, NoLock);
465 * mark_index_clustered: mark the specified index as the one clustered on
467 * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
470 mark_index_clustered(Relation rel, Oid indexOid)
472 HeapTuple indexTuple;
473 Form_pg_index indexForm;
478 * If the index is already marked clustered, no need to do anything.
480 if (OidIsValid(indexOid))
482 indexTuple = SearchSysCache(INDEXRELID,
483 ObjectIdGetDatum(indexOid),
485 if (!HeapTupleIsValid(indexTuple))
486 elog(ERROR, "cache lookup failed for index %u", indexOid);
487 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
489 if (indexForm->indisclustered)
491 ReleaseSysCache(indexTuple);
495 ReleaseSysCache(indexTuple);
499 * Check each index of the relation and set/clear the bit as needed.
501 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
503 foreach(index, RelationGetIndexList(rel))
505 Oid thisIndexOid = lfirst_oid(index);
507 indexTuple = SearchSysCacheCopy(INDEXRELID,
508 ObjectIdGetDatum(thisIndexOid),
510 if (!HeapTupleIsValid(indexTuple))
511 elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
512 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
515 * Unset the bit if set. We know it's wrong because we checked this
518 if (indexForm->indisclustered)
520 indexForm->indisclustered = false;
521 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
522 CatalogUpdateIndexes(pg_index, indexTuple);
523 /* Ensure we see the update in the index's relcache entry */
524 CacheInvalidateRelcacheByRelid(thisIndexOid);
526 else if (thisIndexOid == indexOid)
528 indexForm->indisclustered = true;
529 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
530 CatalogUpdateIndexes(pg_index, indexTuple);
531 /* Ensure we see the update in the index's relcache entry */
532 CacheInvalidateRelcacheByRelid(thisIndexOid);
534 heap_freetuple(indexTuple);
537 heap_close(pg_index, RowExclusiveLock);
541 * rebuild_relation: rebuild an existing relation in index order
543 * OldHeap: table to rebuild --- must be opened and exclusive-locked!
544 * indexOid: index to cluster by
546 * NB: this routine closes OldHeap at the right time; caller should not.
549 rebuild_relation(Relation OldHeap, Oid indexOid)
551 Oid tableOid = RelationGetRelid(OldHeap);
552 Oid tableSpace = OldHeap->rd_rel->reltablespace;
554 char NewHeapName[NAMEDATALEN];
555 TransactionId frozenXid;
556 ObjectAddress object;
558 /* Mark the correct index as clustered */
559 mark_index_clustered(OldHeap, indexOid);
561 /* Close relcache entry, but keep lock until transaction commit */
562 heap_close(OldHeap, NoLock);
565 * Create the new heap, using a temporary name in the same namespace as
566 * the existing table. NOTE: there is some risk of collision with user
567 * relnames. Working around this seems more trouble than it's worth; in
568 * particular, we can't create the new heap in a different namespace from
569 * the old, or we will have problems with the TEMP status of temp tables.
571 snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", tableOid);
573 OIDNewHeap = make_new_heap(tableOid, NewHeapName, tableSpace);
576 * We don't need CommandCounterIncrement() because make_new_heap did it.
580 * Copy the heap data into the new table in the desired order.
582 frozenXid = copy_heap_data(OIDNewHeap, tableOid, indexOid);
584 /* To make the new heap's data visible (probably not needed?). */
585 CommandCounterIncrement();
587 /* Swap the physical files of the old and new heaps. */
588 swap_relation_files(tableOid, OIDNewHeap, frozenXid);
590 CommandCounterIncrement();
592 /* Destroy new heap with old filenode */
593 object.classId = RelationRelationId;
594 object.objectId = OIDNewHeap;
595 object.objectSubId = 0;
598 * The new relation is local to our transaction and we know nothing
599 * depends on it, so DROP_RESTRICT should be OK.
601 performDeletion(&object, DROP_RESTRICT);
603 /* performDeletion does CommandCounterIncrement at end */
606 * Rebuild each index on the relation (but not the toast table, which is
607 * all-new at this point). We do not need CommandCounterIncrement()
608 * because reindex_relation does it.
610 reindex_relation(tableOid, false);
614 * Create the new table that we will fill with correctly-ordered data.
617 make_new_heap(Oid OIDOldHeap, const char *NewName, Oid NewTableSpace)
619 TupleDesc OldHeapDesc,
627 OldHeap = heap_open(OIDOldHeap, AccessExclusiveLock);
628 OldHeapDesc = RelationGetDescr(OldHeap);
631 * Need to make a copy of the tuple descriptor, since
632 * heap_create_with_catalog modifies it.
634 tupdesc = CreateTupleDescCopyConstr(OldHeapDesc);
637 * Use options of the old heap for new heap.
639 tuple = SearchSysCache(RELOID,
640 ObjectIdGetDatum(OIDOldHeap),
642 if (!HeapTupleIsValid(tuple))
643 elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
644 reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
647 reloptions = (Datum) 0;
649 OIDNewHeap = heap_create_with_catalog(NewName,
650 RelationGetNamespace(OldHeap),
653 OldHeap->rd_rel->relowner,
655 OldHeap->rd_rel->relkind,
656 OldHeap->rd_rel->relisshared,
661 allowSystemTableMods);
663 ReleaseSysCache(tuple);
666 * Advance command counter so that the newly-created relation's catalog
667 * tuples will be visible to heap_open.
669 CommandCounterIncrement();
672 * If necessary, create a TOAST table for the new relation. Note that
673 * AlterTableCreateToastTable ends with CommandCounterIncrement(), so that
674 * the TOAST table will be visible for insertion.
676 AlterTableCreateToastTable(OIDNewHeap);
678 heap_close(OldHeap, NoLock);
684 * Do the physical copying of heap data. Returns the TransactionId used as
685 * freeze cutoff point for the tuples.
688 copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
693 TupleDesc oldTupDesc;
694 TupleDesc newTupDesc;
701 TransactionId OldestXmin;
702 TransactionId FreezeXid;
703 RewriteState rwstate;
706 * Open the relations we need.
708 NewHeap = heap_open(OIDNewHeap, AccessExclusiveLock);
709 OldHeap = heap_open(OIDOldHeap, AccessExclusiveLock);
710 OldIndex = index_open(OIDOldIndex, AccessExclusiveLock);
713 * Their tuple descriptors should be exactly alike, but here we only need
714 * assume that they have the same number of columns.
716 oldTupDesc = RelationGetDescr(OldHeap);
717 newTupDesc = RelationGetDescr(NewHeap);
718 Assert(newTupDesc->natts == oldTupDesc->natts);
720 /* Preallocate values/isnull arrays */
721 natts = newTupDesc->natts;
722 values = (Datum *) palloc(natts * sizeof(Datum));
723 isnull = (bool *) palloc(natts * sizeof(bool));
726 * We need to log the copied data in WAL iff WAL archiving is enabled AND
727 * it's not a temp rel.
729 use_wal = XLogArchivingActive() && !NewHeap->rd_istemp;
731 /* use_wal off requires rd_targblock be initially invalid */
732 Assert(NewHeap->rd_targblock == InvalidBlockNumber);
735 * compute xids used to freeze and weed out dead tuples. We use -1
736 * freeze_min_age to avoid having CLUSTER freeze tuples earlier than a
737 * plain VACUUM would.
739 vacuum_set_xid_limits(-1, OldHeap->rd_rel->relisshared,
740 &OldestXmin, &FreezeXid);
742 /* Initialize the rewrite operation */
743 rwstate = begin_heap_rewrite(NewHeap, OldestXmin, FreezeXid, use_wal);
746 * Scan through the OldHeap in OldIndex order and copy each tuple into the
747 * NewHeap. To ensure we see recently-dead tuples that still need to be
748 * copied, we scan with SnapshotAny and use HeapTupleSatisfiesVacuum for
749 * the visibility test.
751 scan = index_beginscan(OldHeap, OldIndex,
752 SnapshotAny, 0, (ScanKey) NULL);
754 while ((tuple = index_getnext(scan, ForwardScanDirection)) != NULL)
756 HeapTuple copiedTuple;
760 CHECK_FOR_INTERRUPTS();
762 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
764 switch (HeapTupleSatisfiesVacuum(tuple->t_data, OldestXmin,
768 /* Definitely dead */
772 case HEAPTUPLE_RECENTLY_DEAD:
773 /* Live or recently dead, must copy it */
776 case HEAPTUPLE_INSERT_IN_PROGRESS:
779 * We should not see this unless it's been inserted earlier in
780 * our own transaction.
782 if (!TransactionIdIsCurrentTransactionId(
783 HeapTupleHeaderGetXmin(tuple->t_data)))
784 elog(ERROR, "concurrent insert in progress");
788 case HEAPTUPLE_DELETE_IN_PROGRESS:
791 * We should not see this unless it's been deleted earlier in
792 * our own transaction.
794 Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
795 if (!TransactionIdIsCurrentTransactionId(
796 HeapTupleHeaderGetXmax(tuple->t_data)))
797 elog(ERROR, "concurrent delete in progress");
798 /* treat as recently dead */
802 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
803 isdead = false; /* keep compiler quiet */
807 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
811 /* heap rewrite module still needs to see it... */
812 rewrite_heap_dead_tuple(rwstate, tuple);
817 * We cannot simply copy the tuple as-is, for several reasons:
819 * 1. We'd like to squeeze out the values of any dropped columns, both
820 * to save space and to ensure we have no corner-case failures. (It's
821 * possible for example that the new table hasn't got a TOAST table
822 * and so is unable to store any large values of dropped cols.)
824 * 2. The tuple might not even be legal for the new table; this is
825 * currently only known to happen as an after-effect of ALTER TABLE
828 * So, we must reconstruct the tuple from component Datums.
830 heap_deform_tuple(tuple, oldTupDesc, values, isnull);
832 /* Be sure to null out any dropped columns */
833 for (i = 0; i < natts; i++)
835 if (newTupDesc->attrs[i]->attisdropped)
839 copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
841 /* Preserve OID, if any */
842 if (NewHeap->rd_rel->relhasoids)
843 HeapTupleSetOid(copiedTuple, HeapTupleGetOid(tuple));
845 /* The heap rewrite module does the rest */
846 rewrite_heap_tuple(rwstate, tuple, copiedTuple);
848 heap_freetuple(copiedTuple);
853 /* Write out any remaining tuples, and fsync if needed */
854 end_heap_rewrite(rwstate);
859 index_close(OldIndex, NoLock);
860 heap_close(OldHeap, NoLock);
861 heap_close(NewHeap, NoLock);
867 * Swap the physical files of two given relations.
869 * We swap the physical identity (reltablespace and relfilenode) while
870 * keeping the same logical identities of the two relations.
872 * Also swap any TOAST links, so that the toast data moves along with
873 * the main-table data.
875 * Additionally, the first relation is marked with relfrozenxid set to
876 * frozenXid. It seems a bit ugly to have this here, but all callers would
877 * have to do it anyway, so having it here saves a heap_update. Note: the
878 * TOAST table needs no special handling, because since we swapped the links,
879 * the entry for the TOAST table will now contain RecentXmin in relfrozenxid,
880 * which is the correct value.
883 swap_relation_files(Oid r1, Oid r2, TransactionId frozenXid)
885 Relation relRelation;
888 Form_pg_class relform1,
891 CatalogIndexState indstate;
893 /* We need writable copies of both pg_class tuples. */
894 relRelation = heap_open(RelationRelationId, RowExclusiveLock);
896 reltup1 = SearchSysCacheCopy(RELOID,
897 ObjectIdGetDatum(r1),
899 if (!HeapTupleIsValid(reltup1))
900 elog(ERROR, "cache lookup failed for relation %u", r1);
901 relform1 = (Form_pg_class) GETSTRUCT(reltup1);
903 reltup2 = SearchSysCacheCopy(RELOID,
904 ObjectIdGetDatum(r2),
906 if (!HeapTupleIsValid(reltup2))
907 elog(ERROR, "cache lookup failed for relation %u", r2);
908 relform2 = (Form_pg_class) GETSTRUCT(reltup2);
911 * Actually swap the fields in the two tuples
913 swaptemp = relform1->relfilenode;
914 relform1->relfilenode = relform2->relfilenode;
915 relform2->relfilenode = swaptemp;
917 swaptemp = relform1->reltablespace;
918 relform1->reltablespace = relform2->reltablespace;
919 relform2->reltablespace = swaptemp;
921 swaptemp = relform1->reltoastrelid;
922 relform1->reltoastrelid = relform2->reltoastrelid;
923 relform2->reltoastrelid = swaptemp;
925 /* we should not swap reltoastidxid */
927 /* set rel1's frozen Xid */
928 Assert(TransactionIdIsNormal(frozenXid));
929 relform1->relfrozenxid = frozenXid;
931 /* swap size statistics too, since new rel has freshly-updated stats */
936 swap_pages = relform1->relpages;
937 relform1->relpages = relform2->relpages;
938 relform2->relpages = swap_pages;
940 swap_tuples = relform1->reltuples;
941 relform1->reltuples = relform2->reltuples;
942 relform2->reltuples = swap_tuples;
945 /* Update the tuples in pg_class */
946 simple_heap_update(relRelation, &reltup1->t_self, reltup1);
947 simple_heap_update(relRelation, &reltup2->t_self, reltup2);
949 /* Keep system catalogs current */
950 indstate = CatalogOpenIndexes(relRelation);
951 CatalogIndexInsert(indstate, reltup1);
952 CatalogIndexInsert(indstate, reltup2);
953 CatalogCloseIndexes(indstate);
956 * If we have toast tables associated with the relations being swapped,
957 * change their dependency links to re-associate them with their new
958 * owning relations. Otherwise the wrong one will get dropped ...
960 * NOTE: it is possible that only one table has a toast table; this can
961 * happen in CLUSTER if there were dropped columns in the old table, and
962 * in ALTER TABLE when adding or changing type of columns.
964 * NOTE: at present, a TOAST table's only dependency is the one on its
965 * owning table. If more are ever created, we'd need to use something
966 * more selective than deleteDependencyRecordsFor() to get rid of only the
969 if (relform1->reltoastrelid || relform2->reltoastrelid)
971 ObjectAddress baseobject,
975 /* Delete old dependencies */
976 if (relform1->reltoastrelid)
978 count = deleteDependencyRecordsFor(RelationRelationId,
979 relform1->reltoastrelid);
981 elog(ERROR, "expected one dependency record for TOAST table, found %ld",
984 if (relform2->reltoastrelid)
986 count = deleteDependencyRecordsFor(RelationRelationId,
987 relform2->reltoastrelid);
989 elog(ERROR, "expected one dependency record for TOAST table, found %ld",
993 /* Register new dependencies */
994 baseobject.classId = RelationRelationId;
995 baseobject.objectSubId = 0;
996 toastobject.classId = RelationRelationId;
997 toastobject.objectSubId = 0;
999 if (relform1->reltoastrelid)
1001 baseobject.objectId = r1;
1002 toastobject.objectId = relform1->reltoastrelid;
1003 recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
1006 if (relform2->reltoastrelid)
1008 baseobject.objectId = r2;
1009 toastobject.objectId = relform2->reltoastrelid;
1010 recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
1015 * Blow away the old relcache entries now. We need this kluge because
1016 * relcache.c keeps a link to the smgr relation for the physical file, and
1017 * that will be out of date as soon as we do CommandCounterIncrement.
1018 * Whichever of the rels is the second to be cleared during cache
1019 * invalidation will have a dangling reference to an already-deleted smgr
1020 * relation. Rather than trying to avoid this by ordering operations just
1021 * so, it's easiest to not have the relcache entries there at all.
1022 * (Fortunately, since one of the entries is local in our transaction,
1023 * it's sufficient to clear out our own relcache this way; the problem
1024 * cannot arise for other backends when they see our update on the
1025 * non-local relation.)
1027 RelationForgetRelation(r1);
1028 RelationForgetRelation(r2);
1031 heap_freetuple(reltup1);
1032 heap_freetuple(reltup2);
1034 heap_close(relRelation, RowExclusiveLock);
1038 * Get a list of tables that the current user owns and
1039 * have indisclustered set. Return the list in a List * of rvsToCluster
1040 * with the tableOid and the indexOid on which the table is already
1044 get_tables_to_cluster(MemoryContext cluster_context)
1046 Relation indRelation;
1049 HeapTuple indexTuple;
1050 Form_pg_index index;
1051 MemoryContext old_context;
1056 * Get all indexes that have indisclustered set and are owned by
1057 * appropriate user. System relations or nailed-in relations cannot ever
1058 * have indisclustered set, because CLUSTER will refuse to set it when
1059 * called with one of them as argument.
1061 indRelation = heap_open(IndexRelationId, AccessShareLock);
1063 Anum_pg_index_indisclustered,
1064 BTEqualStrategyNumber, F_BOOLEQ,
1065 BoolGetDatum(true));
1066 scan = heap_beginscan(indRelation, SnapshotNow, 1, &entry);
1067 while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1069 index = (Form_pg_index) GETSTRUCT(indexTuple);
1071 if (!pg_class_ownercheck(index->indrelid, GetUserId()))
1075 * We have to build the list in a different memory context so it will
1076 * survive the cross-transaction processing
1078 old_context = MemoryContextSwitchTo(cluster_context);
1080 rvtc = (RelToCluster *) palloc(sizeof(RelToCluster));
1081 rvtc->tableOid = index->indrelid;
1082 rvtc->indexOid = index->indexrelid;
1083 rvs = lcons(rvtc, rvs);
1085 MemoryContextSwitchTo(old_context);
1089 relation_close(indRelation, AccessShareLock);