1 /*-------------------------------------------------------------------------
4 * heap table access method code
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/heap/heapam_handler.c
15 * This files wires up the lower level heapam.c et al routines with the
16 * tableam abstraction.
18 *-------------------------------------------------------------------------
24 #include "miscadmin.h"
26 #include "access/genam.h"
27 #include "access/heapam.h"
28 #include "access/multixact.h"
29 #include "access/rewriteheap.h"
30 #include "access/tableam.h"
31 #include "access/tsmapi.h"
32 #include "access/xact.h"
33 #include "catalog/catalog.h"
34 #include "catalog/index.h"
35 #include "catalog/storage.h"
36 #include "catalog/storage_xlog.h"
37 #include "commands/progress.h"
38 #include "executor/executor.h"
39 #include "optimizer/plancat.h"
41 #include "storage/bufmgr.h"
42 #include "storage/bufpage.h"
43 #include "storage/bufmgr.h"
44 #include "storage/lmgr.h"
45 #include "storage/predicate.h"
46 #include "storage/procarray.h"
47 #include "storage/smgr.h"
48 #include "utils/builtins.h"
49 #include "utils/rel.h"
52 static void reform_and_rewrite_tuple(HeapTuple tuple,
53 Relation OldHeap, Relation NewHeap,
54 Datum *values, bool *isnull, RewriteState rwstate);
56 static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
58 OffsetNumber tupoffset);
60 static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan);
62 static const TableAmRoutine heapam_methods;
65 /* ------------------------------------------------------------------------
66 * Slot related callbacks for heap AM
67 * ------------------------------------------------------------------------
70 static const TupleTableSlotOps *
71 heapam_slot_callbacks(Relation relation)
73 return &TTSOpsBufferHeapTuple;
77 /* ------------------------------------------------------------------------
78 * Index Scan Callbacks for heap AM
79 * ------------------------------------------------------------------------
82 static IndexFetchTableData *
83 heapam_index_fetch_begin(Relation rel)
85 IndexFetchHeapData *hscan = palloc0(sizeof(IndexFetchHeapData));
87 hscan->xs_base.rel = rel;
88 hscan->xs_cbuf = InvalidBuffer;
90 return &hscan->xs_base;
94 heapam_index_fetch_reset(IndexFetchTableData *scan)
96 IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
98 if (BufferIsValid(hscan->xs_cbuf))
100 ReleaseBuffer(hscan->xs_cbuf);
101 hscan->xs_cbuf = InvalidBuffer;
106 heapam_index_fetch_end(IndexFetchTableData *scan)
108 IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
110 heapam_index_fetch_reset(scan);
116 heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
119 TupleTableSlot *slot,
120 bool *call_again, bool *all_dead)
122 IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
123 BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
126 Assert(TTS_IS_BUFFERTUPLE(slot));
128 /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
131 /* Switch to correct buffer if we don't have it already */
132 Buffer prev_buf = hscan->xs_cbuf;
134 hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
136 ItemPointerGetBlockNumber(tid));
139 * Prune page, but only if we weren't already on this page
141 if (prev_buf != hscan->xs_cbuf)
142 heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
145 /* Obtain share-lock on the buffer so we can examine visibility */
146 LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_SHARE);
147 got_heap_tuple = heap_hot_search_buffer(tid,
151 &bslot->base.tupdata,
154 bslot->base.tupdata.t_self = *tid;
155 LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_UNLOCK);
160 * Only in a non-MVCC snapshot can more than one member of the HOT
163 *call_again = !IsMVCCSnapshot(snapshot);
165 slot->tts_tableOid = RelationGetRelid(scan->rel);
166 ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
170 /* We've reached the end of the HOT chain. */
174 return got_heap_tuple;
178 /* ------------------------------------------------------------------------
179 * Callbacks for non-modifying operations on individual tuples for heap AM
180 * ------------------------------------------------------------------------
184 heapam_fetch_row_version(Relation relation,
187 TupleTableSlot *slot)
189 BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
192 Assert(TTS_IS_BUFFERTUPLE(slot));
194 bslot->base.tupdata.t_self = *tid;
195 if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer))
197 /* store in slot, transferring existing pin */
198 ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
199 slot->tts_tableOid = RelationGetRelid(relation);
208 heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
211 BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
214 Assert(TTS_IS_BUFFERTUPLE(slot));
215 Assert(BufferIsValid(bslot->buffer));
218 * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
219 * Caller should be holding pin, but not lock.
221 LockBuffer(bslot->buffer, BUFFER_LOCK_SHARE);
222 res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot,
224 LockBuffer(bslot->buffer, BUFFER_LOCK_UNLOCK);
230 /* ----------------------------------------------------------------------------
231 * Functions for manipulations of physical tuples for heap AM.
232 * ----------------------------------------------------------------------------
236 heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
237 int options, BulkInsertState bistate)
239 bool shouldFree = true;
240 HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
242 /* Update the tuple with table oid */
243 slot->tts_tableOid = RelationGetRelid(relation);
244 tuple->t_tableOid = slot->tts_tableOid;
246 /* Perform the insertion, and copy the resulting ItemPointer */
247 heap_insert(relation, tuple, cid, options, bistate);
248 ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
255 heapam_tuple_insert_speculative(Relation relation, TupleTableSlot *slot,
256 CommandId cid, int options,
257 BulkInsertState bistate, uint32 specToken)
259 bool shouldFree = true;
260 HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
262 /* Update the tuple with table oid */
263 slot->tts_tableOid = RelationGetRelid(relation);
264 tuple->t_tableOid = slot->tts_tableOid;
266 HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
267 options |= HEAP_INSERT_SPECULATIVE;
269 /* Perform the insertion, and copy the resulting ItemPointer */
270 heap_insert(relation, tuple, cid, options, bistate);
271 ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
278 heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot,
279 uint32 spekToken, bool succeeded)
281 bool shouldFree = true;
282 HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
284 /* adjust the tuple's state accordingly */
286 heap_finish_speculative(relation, &slot->tts_tid);
288 heap_abort_speculative(relation, &slot->tts_tid);
295 heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
296 Snapshot snapshot, Snapshot crosscheck, bool wait,
297 TM_FailureData *tmfd, bool changingPart)
300 * Currently Deleting of index tuples are handled at vacuum, in case if
301 * the storage itself is cleaning the dead tuples by itself, it is the
302 * time to call the index tuple deletion also.
304 return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
309 heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
310 CommandId cid, Snapshot snapshot, Snapshot crosscheck,
311 bool wait, TM_FailureData *tmfd,
312 LockTupleMode *lockmode, bool *update_indexes)
314 bool shouldFree = true;
315 HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
318 /* Update the tuple with table oid */
319 slot->tts_tableOid = RelationGetRelid(relation);
320 tuple->t_tableOid = slot->tts_tableOid;
322 result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
324 ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
327 * Decide whether new index entries are needed for the tuple
329 * Note: heap_update returns the tid (location) of the new tuple in the
332 * If it's a HOT update, we mustn't insert new index entries.
334 *update_indexes = result == TM_Ok && !HeapTupleIsHeapOnly(tuple);
343 heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
344 TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
345 LockWaitPolicy wait_policy, uint8 flags,
346 TM_FailureData *tmfd)
348 BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
351 HeapTuple tuple = &bslot->base.tupdata;
354 follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
355 tmfd->traversed = false;
357 Assert(TTS_IS_BUFFERTUPLE(slot));
360 tuple->t_self = *tid;
361 result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
362 follow_updates, &buffer, tmfd);
364 if (result == TM_Updated &&
365 (flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
367 ReleaseBuffer(buffer);
368 /* Should not encounter speculative tuple on recheck */
369 Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
371 if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
373 SnapshotData SnapshotDirty;
374 TransactionId priorXmax;
376 /* it was updated, so look at the updated version */
378 /* updated row should have xmin matching this xmax */
379 priorXmax = tmfd->xmax;
381 /* signal that a tuple later in the chain is getting locked */
382 tmfd->traversed = true;
387 * Loop here to deal with updated or busy tuples
389 InitDirtySnapshot(SnapshotDirty);
392 if (ItemPointerIndicatesMovedPartitions(tid))
394 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
395 errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
397 tuple->t_self = *tid;
398 if (heap_fetch(relation, &SnapshotDirty, tuple, &buffer))
401 * If xmin isn't what we're expecting, the slot must have
402 * been recycled and reused for an unrelated tuple. This
403 * implies that the latest version of the row was deleted,
404 * so we need do nothing. (Should be safe to examine xmin
405 * without getting buffer's content lock. We assume
406 * reading a TransactionId to be atomic, and Xmin never
407 * changes in an existing tuple, except to invalid or
408 * frozen, and neither of those can match priorXmax.)
410 if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
413 ReleaseBuffer(buffer);
417 /* otherwise xmin should not be dirty... */
418 if (TransactionIdIsValid(SnapshotDirty.xmin))
419 elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
422 * If tuple is being updated by other transaction then we
423 * have to wait for its commit/abort, or die trying.
425 if (TransactionIdIsValid(SnapshotDirty.xmax))
427 ReleaseBuffer(buffer);
431 XactLockTableWait(SnapshotDirty.xmax,
432 relation, &tuple->t_self,
436 if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
437 /* skip instead of waiting */
438 return TM_WouldBlock;
441 if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
443 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
444 errmsg("could not obtain lock on row in relation \"%s\"",
445 RelationGetRelationName(relation))));
448 continue; /* loop back to repeat heap_fetch */
452 * If tuple was inserted by our own transaction, we have
453 * to check cmin against cid: cmin >= current CID means
454 * our command cannot see the tuple, so we should ignore
455 * it. Otherwise heap_lock_tuple() will throw an error,
456 * and so would any later attempt to update or delete the
457 * tuple. (We need not check cmax because
458 * HeapTupleSatisfiesDirty will consider a tuple deleted
459 * by our transaction dead, regardless of cmax.) We just
460 * checked that priorXmax == xmin, so we can test that
461 * variable instead of doing HeapTupleHeaderGetXmin again.
463 if (TransactionIdIsCurrentTransactionId(priorXmax) &&
464 HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
466 ReleaseBuffer(buffer);
471 * This is a live tuple, so try to lock it again.
473 ReleaseBuffer(buffer);
474 goto tuple_lock_retry;
478 * If the referenced slot was actually empty, the latest
479 * version of the row must have been deleted, so we need do
482 if (tuple->t_data == NULL)
488 * As above, if xmin isn't what we're expecting, do nothing.
490 if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
493 if (BufferIsValid(buffer))
494 ReleaseBuffer(buffer);
499 * If we get here, the tuple was found but failed
500 * SnapshotDirty. Assuming the xmin is either a committed xact
501 * or our own xact (as it certainly should be if we're trying
502 * to modify the tuple), this must mean that the row was
503 * updated or deleted by either a committed xact or our own
504 * xact. If it was deleted, we can ignore it; if it was
505 * updated then chain up to the next version and repeat the
508 * As above, it should be safe to examine xmax and t_ctid
509 * without the buffer content lock, because they can't be
512 if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
514 /* deleted, so forget about it */
515 if (BufferIsValid(buffer))
516 ReleaseBuffer(buffer);
520 /* updated, so look at the updated row */
521 *tid = tuple->t_data->t_ctid;
522 /* updated row should have xmin matching this xmax */
523 priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
524 if (BufferIsValid(buffer))
525 ReleaseBuffer(buffer);
526 /* loop back to fetch next in chain */
531 /* tuple was deleted, so give up */
536 slot->tts_tableOid = RelationGetRelid(relation);
537 tuple->t_tableOid = slot->tts_tableOid;
539 /* store in slot, transferring existing pin */
540 ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
546 heapam_finish_bulk_insert(Relation relation, int options)
549 * If we skipped writing WAL, then we need to sync the heap (but not
550 * indexes since those use WAL anyway / don't go through tableam)
552 if (options & HEAP_INSERT_SKIP_WAL)
557 /* ------------------------------------------------------------------------
558 * DDL related callbacks for heap AM.
559 * ------------------------------------------------------------------------
563 heapam_relation_set_new_filenode(Relation rel, char persistence,
564 TransactionId *freezeXid,
565 MultiXactId *minmulti)
568 * Initialize to the minimum XID that could put tuples in the table. We
569 * know that no xacts older than RecentXmin are still running, so that
572 *freezeXid = RecentXmin;
575 * Similarly, initialize the minimum Multixact to the first value that
576 * could possibly be stored in tuples in the table. Running transactions
577 * could reuse values from their local cache, so we are careful to
578 * consider all currently running multis.
580 * XXX this could be refined further, but is it worth the hassle?
582 *minmulti = GetOldestMultiXactId();
584 RelationCreateStorage(rel->rd_node, persistence);
587 * If required, set up an init fork for an unlogged table so that it can
588 * be correctly reinitialized on restart. An immediate sync is required
589 * even if the page has been logged, because the write did not go through
590 * shared_buffers and therefore a concurrent checkpoint may have moved the
591 * redo pointer past our xlog record. Recovery may as well remove it
592 * while replaying, for example, XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE
593 * record. Therefore, logging is necessary even if wal_level=minimal.
595 if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
597 Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
598 rel->rd_rel->relkind == RELKIND_MATVIEW ||
599 rel->rd_rel->relkind == RELKIND_TOASTVALUE);
600 RelationOpenSmgr(rel);
601 smgrcreate(rel->rd_smgr, INIT_FORKNUM, false);
602 log_smgrcreate(&rel->rd_smgr->smgr_rnode.node, INIT_FORKNUM);
603 smgrimmedsync(rel->rd_smgr, INIT_FORKNUM);
608 heapam_relation_nontransactional_truncate(Relation rel)
610 RelationTruncate(rel, 0);
614 heapam_relation_copy_data(Relation rel, RelFileNode newrnode)
618 dstrel = smgropen(newrnode, rel->rd_backend);
619 RelationOpenSmgr(rel);
622 * Create and copy all forks of the relation, and schedule unlinking of
623 * old physical files.
625 * NOTE: any conflict in relfilenode value will be caught in
626 * RelationCreateStorage().
628 RelationCreateStorage(newrnode, rel->rd_rel->relpersistence);
631 RelationCopyStorage(rel->rd_smgr, dstrel, MAIN_FORKNUM,
632 rel->rd_rel->relpersistence);
634 /* copy those extra forks that exist */
635 for (ForkNumber forkNum = MAIN_FORKNUM + 1;
636 forkNum <= MAX_FORKNUM; forkNum++)
638 if (smgrexists(rel->rd_smgr, forkNum))
640 smgrcreate(dstrel, forkNum, false);
643 * WAL log creation if the relation is persistent, or this is the
644 * init fork of an unlogged relation.
646 if (rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT ||
647 (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
648 forkNum == INIT_FORKNUM))
649 log_smgrcreate(&newrnode, forkNum);
650 RelationCopyStorage(rel->rd_smgr, dstrel, forkNum,
651 rel->rd_rel->relpersistence);
656 /* drop old relation, and close new one */
657 RelationDropStorage(rel);
662 heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
663 Relation OldIndex, bool use_sort,
664 TransactionId OldestXmin,
665 TransactionId FreezeXid,
666 MultiXactId MultiXactCutoff,
668 double *tups_vacuumed,
669 double *tups_recently_dead)
671 RewriteState rwstate;
672 IndexScanDesc indexScan;
673 TableScanDesc tableScan;
674 HeapScanDesc heapScan;
676 bool is_system_catalog;
677 Tuplesortstate *tuplesort;
678 TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
679 TupleDesc newTupDesc = RelationGetDescr(NewHeap);
680 TupleTableSlot *slot;
684 BufferHeapTupleTableSlot *hslot;
686 /* Remember if it's a system catalog */
687 is_system_catalog = IsSystemRelation(OldHeap);
690 * We need to log the copied data in WAL iff WAL archiving/streaming is
691 * enabled AND it's a WAL-logged rel.
693 use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap);
695 /* use_wal off requires smgr_targblock be initially invalid */
696 Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
698 /* Preallocate values/isnull arrays */
699 natts = newTupDesc->natts;
700 values = (Datum *) palloc(natts * sizeof(Datum));
701 isnull = (bool *) palloc(natts * sizeof(bool));
703 /* Initialize the rewrite operation */
704 rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid,
705 MultiXactCutoff, use_wal);
708 /* Set up sorting if wanted */
710 tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
711 maintenance_work_mem,
717 * Prepare to scan the OldHeap. To ensure we see recently-dead tuples
718 * that still need to be copied, we scan with SnapshotAny and use
719 * HeapTupleSatisfiesVacuum for the visibility test.
721 if (OldIndex != NULL && !use_sort)
723 const int ci_index[] = {
724 PROGRESS_CLUSTER_PHASE,
725 PROGRESS_CLUSTER_INDEX_RELID
729 /* Set phase and OIDOldIndex to columns */
730 ci_val[0] = PROGRESS_CLUSTER_PHASE_INDEX_SCAN_HEAP;
731 ci_val[1] = RelationGetRelid(OldIndex);
732 pgstat_progress_update_multi_param(2, ci_index, ci_val);
736 indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
737 index_rescan(indexScan, NULL, 0, NULL, 0);
741 /* In scan-and-sort mode and also VACUUM FULL, set phase */
742 pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
743 PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP);
745 tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
746 heapScan = (HeapScanDesc) tableScan;
749 /* Set total heap blocks */
750 pgstat_progress_update_param(PROGRESS_CLUSTER_TOTAL_HEAP_BLKS,
751 heapScan->rs_nblocks);
754 slot = table_slot_create(OldHeap, NULL);
755 hslot = (BufferHeapTupleTableSlot *) slot;
758 * Scan through the OldHeap, either in OldIndex order or sequentially;
759 * copy each tuple into the NewHeap, or transiently to the tuplesort
760 * module. Note that we don't bother sorting dead tuples (they won't get
761 * to the new table anyway).
769 CHECK_FOR_INTERRUPTS();
771 if (indexScan != NULL)
773 if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
776 /* Since we used no scan keys, should never need to recheck */
777 if (indexScan->xs_recheck)
778 elog(ERROR, "CLUSTER does not support lossy index conditions");
782 if (!table_scan_getnextslot(tableScan, ForwardScanDirection, slot))
786 * In scan-and-sort mode and also VACUUM FULL, set heap blocks
789 pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_BLKS_SCANNED,
790 heapScan->rs_cblock + 1);
793 tuple = ExecFetchSlotHeapTuple(slot, false, NULL);
796 LockBuffer(buf, BUFFER_LOCK_SHARE);
798 switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
801 /* Definitely dead */
804 case HEAPTUPLE_RECENTLY_DEAD:
805 *tups_recently_dead += 1;
808 /* Live or recently dead, must copy it */
811 case HEAPTUPLE_INSERT_IN_PROGRESS:
814 * Since we hold exclusive lock on the relation, normally the
815 * only way to see this is if it was inserted earlier in our
816 * own transaction. However, it can happen in system
817 * catalogs, since we tend to release write lock before commit
818 * there. Give a warning if neither case applies; but in any
819 * case we had better copy it.
821 if (!is_system_catalog &&
822 !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
823 elog(WARNING, "concurrent insert in progress within table \"%s\"",
824 RelationGetRelationName(OldHeap));
828 case HEAPTUPLE_DELETE_IN_PROGRESS:
831 * Similar situation to INSERT_IN_PROGRESS case.
833 if (!is_system_catalog &&
834 !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data)))
835 elog(WARNING, "concurrent delete in progress within table \"%s\"",
836 RelationGetRelationName(OldHeap));
837 /* treat as recently dead */
838 *tups_recently_dead += 1;
842 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
843 isdead = false; /* keep compiler quiet */
847 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
852 /* heap rewrite module still needs to see it... */
853 if (rewrite_heap_dead_tuple(rwstate, tuple))
855 /* A previous recently-dead tuple is now known dead */
857 *tups_recently_dead -= 1;
863 if (tuplesort != NULL)
865 tuplesort_putheaptuple(tuplesort, tuple);
868 * In scan-and-sort mode, report increase in number of tuples
871 pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED,
876 const int ct_index[] = {
877 PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED,
878 PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN
882 reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
883 values, isnull, rwstate);
886 * In indexscan mode and also VACUUM FULL, report increase in
887 * number of tuples scanned and written
889 ct_val[0] = *num_tuples;
890 ct_val[1] = *num_tuples;
891 pgstat_progress_update_multi_param(2, ct_index, ct_val);
895 if (indexScan != NULL)
896 index_endscan(indexScan);
897 if (tableScan != NULL)
898 table_endscan(tableScan);
900 ExecDropSingleTupleTableSlot(slot);
903 * In scan-and-sort mode, complete the sort, then read out all live tuples
904 * from the tuplestore and write them to the new relation.
906 if (tuplesort != NULL)
910 /* Report that we are now sorting tuples */
911 pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
912 PROGRESS_CLUSTER_PHASE_SORT_TUPLES);
914 tuplesort_performsort(tuplesort);
916 /* Report that we are now writing new heap */
917 pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
918 PROGRESS_CLUSTER_PHASE_WRITE_NEW_HEAP);
924 CHECK_FOR_INTERRUPTS();
926 tuple = tuplesort_getheaptuple(tuplesort, true);
931 reform_and_rewrite_tuple(tuple,
935 /* Report n_tuples */
936 pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN,
940 tuplesort_end(tuplesort);
943 /* Write out any remaining tuples, and fsync if needed */
944 end_heap_rewrite(rwstate);
952 heapam_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
953 BufferAccessStrategy bstrategy)
955 HeapScanDesc hscan = (HeapScanDesc) scan;
958 * We must maintain a pin on the target page's buffer to ensure that
959 * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
960 * under us. Hence, pin the page until we are done looking at it. We
961 * also choose to hold sharelock on the buffer throughout --- we could
962 * release and re-acquire sharelock for each tuple, but since we aren't
963 * doing much work per tuple, the extra lock traffic is probably better
966 hscan->rs_cblock = blockno;
967 hscan->rs_cindex = FirstOffsetNumber;
968 hscan->rs_cbuf = ReadBufferExtended(scan->rs_rd, MAIN_FORKNUM,
969 blockno, RBM_NORMAL, bstrategy);
970 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
972 /* in heap all blocks can contain tuples, so always return true */
977 heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
978 double *liverows, double *deadrows,
979 TupleTableSlot *slot)
981 HeapScanDesc hscan = (HeapScanDesc) scan;
983 OffsetNumber maxoffset;
984 BufferHeapTupleTableSlot *hslot;
986 Assert(TTS_IS_BUFFERTUPLE(slot));
988 hslot = (BufferHeapTupleTableSlot *) slot;
989 targpage = BufferGetPage(hscan->rs_cbuf);
990 maxoffset = PageGetMaxOffsetNumber(targpage);
992 /* Inner loop over all tuples on the selected page */
993 for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
996 HeapTuple targtuple = &hslot->base.tupdata;
997 bool sample_it = false;
999 itemid = PageGetItemId(targpage, hscan->rs_cindex);
1002 * We ignore unused and redirect line pointers. DEAD line pointers
1003 * should be counted as dead, because we need vacuum to run to get rid
1004 * of them. Note that this rule agrees with the way that
1005 * heap_page_prune() counts things.
1007 if (!ItemIdIsNormal(itemid))
1009 if (ItemIdIsDead(itemid))
1014 ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
1016 targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
1017 targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
1018 targtuple->t_len = ItemIdGetLength(itemid);
1020 switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
1023 case HEAPTUPLE_LIVE:
1028 case HEAPTUPLE_DEAD:
1029 case HEAPTUPLE_RECENTLY_DEAD:
1030 /* Count dead and recently-dead rows */
1034 case HEAPTUPLE_INSERT_IN_PROGRESS:
1037 * Insert-in-progress rows are not counted. We assume that
1038 * when the inserting transaction commits or aborts, it will
1039 * send a stats message to increment the proper count. This
1040 * works right only if that transaction ends after we finish
1041 * analyzing the table; if things happen in the other order,
1042 * its stats update will be overwritten by ours. However, the
1043 * error will be large only if the other transaction runs long
1044 * enough to insert many tuples, so assuming it will finish
1045 * after us is the safer option.
1047 * A special case is that the inserting transaction might be
1048 * our own. In this case we should count and sample the row,
1049 * to accommodate users who load a table and analyze it in one
1050 * transaction. (pgstat_report_analyze has to adjust the
1051 * numbers we send to the stats collector to make this come
1054 if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple->t_data)))
1061 case HEAPTUPLE_DELETE_IN_PROGRESS:
1064 * We count and sample delete-in-progress rows the same as
1065 * live ones, so that the stats counters come out right if the
1066 * deleting transaction commits after us, per the same
1067 * reasoning given above.
1069 * If the delete was done by our own transaction, however, we
1070 * must count the row as dead to make pgstat_report_analyze's
1071 * stats adjustments come out right. (Note: this works out
1072 * properly when the row was both inserted and deleted in our
1075 * The net effect of these choices is that we act as though an
1076 * IN_PROGRESS transaction hasn't happened yet, except if it
1077 * is our own transaction, which we assume has happened.
1079 * This approach ensures that we behave sanely if we see both
1080 * the pre-image and post-image rows for a row being updated
1081 * by a concurrent transaction: we will sample the pre-image
1082 * but not the post-image. We also get sane results if the
1083 * concurrent transaction never commits.
1085 if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple->t_data)))
1095 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1101 ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
1104 /* note that we leave the buffer locked here! */
1109 /* Now release the lock and pin on the page */
1110 UnlockReleaseBuffer(hscan->rs_cbuf);
1111 hscan->rs_cbuf = InvalidBuffer;
1113 /* also prevent old slot contents from having pin on page */
1114 ExecClearTuple(slot);
1120 heapam_index_build_range_scan(Relation heapRelation,
1121 Relation indexRelation,
1122 IndexInfo *indexInfo,
1126 BlockNumber start_blockno,
1127 BlockNumber numblocks,
1128 IndexBuildCallback callback,
1129 void *callback_state,
1133 bool is_system_catalog;
1134 bool checking_uniqueness;
1135 HeapTuple heapTuple;
1136 Datum values[INDEX_MAX_KEYS];
1137 bool isnull[INDEX_MAX_KEYS];
1139 ExprState *predicate;
1140 TupleTableSlot *slot;
1142 ExprContext *econtext;
1144 bool need_unregister_snapshot = false;
1145 TransactionId OldestXmin;
1146 BlockNumber previous_blkno = InvalidBlockNumber;
1147 BlockNumber root_blkno = InvalidBlockNumber;
1148 OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1153 Assert(OidIsValid(indexRelation->rd_rel->relam));
1155 /* Remember if it's a system catalog */
1156 is_system_catalog = IsSystemRelation(heapRelation);
1158 /* See whether we're verifying uniqueness/exclusion properties */
1159 checking_uniqueness = (indexInfo->ii_Unique ||
1160 indexInfo->ii_ExclusionOps != NULL);
1163 * "Any visible" mode is not compatible with uniqueness checks; make sure
1164 * only one of those is requested.
1166 Assert(!(anyvisible && checking_uniqueness));
1169 * Need an EState for evaluation of index expressions and partial-index
1170 * predicates. Also a slot to hold the current tuple.
1172 estate = CreateExecutorState();
1173 econtext = GetPerTupleExprContext(estate);
1174 slot = table_slot_create(heapRelation, NULL);
1176 /* Arrange for econtext's scan tuple to be the tuple under test */
1177 econtext->ecxt_scantuple = slot;
1179 /* Set up execution state for predicate, if any. */
1180 predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1183 * Prepare for scan of the base relation. In a normal index build, we use
1184 * SnapshotAny because we must retrieve all tuples and do our own time
1185 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1186 * concurrent build, or during bootstrap, we take a regular MVCC snapshot
1187 * and index whatever's live according to that.
1189 OldestXmin = InvalidTransactionId;
1191 /* okay to ignore lazy VACUUMs here */
1192 if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
1193 OldestXmin = GetOldestXmin(heapRelation, PROCARRAY_FLAGS_VACUUM);
1198 * Serial index build.
1200 * Must begin our own heap scan in this case. We may also need to
1201 * register a snapshot whose lifetime is under our direct control.
1203 if (!TransactionIdIsValid(OldestXmin))
1205 snapshot = RegisterSnapshot(GetTransactionSnapshot());
1206 need_unregister_snapshot = true;
1209 snapshot = SnapshotAny;
1211 scan = table_beginscan_strat(heapRelation, /* relation */
1212 snapshot, /* snapshot */
1213 0, /* number of keys */
1214 NULL, /* scan key */
1215 true, /* buffer access strategy OK */
1216 allow_sync); /* syncscan OK? */
1221 * Parallel index build.
1223 * Parallel case never registers/unregisters own snapshot. Snapshot
1224 * is taken from parallel heap scan, and is SnapshotAny or an MVCC
1225 * snapshot, based on same criteria as serial case.
1227 Assert(!IsBootstrapProcessingMode());
1229 snapshot = scan->rs_snapshot;
1232 hscan = (HeapScanDesc) scan;
1234 /* Publish number of blocks to scan */
1237 BlockNumber nblocks;
1239 if (hscan->rs_base.rs_parallel != NULL)
1241 ParallelBlockTableScanDesc pbscan;
1243 pbscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
1244 nblocks = pbscan->phs_nblocks;
1247 nblocks = hscan->rs_nblocks;
1249 pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_TOTAL,
1254 * Must call GetOldestXmin() with SnapshotAny. Should never call
1255 * GetOldestXmin() with MVCC snapshot. (It's especially worth checking
1256 * this for parallel builds, since ambuild routines that support parallel
1257 * builds must work these details out for themselves.)
1259 Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
1260 Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
1261 !TransactionIdIsValid(OldestXmin));
1262 Assert(snapshot == SnapshotAny || !anyvisible);
1264 /* set our scan endpoints */
1266 heap_setscanlimits(scan, start_blockno, numblocks);
1269 /* syncscan can only be requested on whole relation */
1270 Assert(start_blockno == 0);
1271 Assert(numblocks == InvalidBlockNumber);
1277 * Scan all tuples in the base relation.
1279 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1283 CHECK_FOR_INTERRUPTS();
1285 /* Report scan progress, if asked to. */
1288 BlockNumber blocks_done = heapam_scan_get_blocks_done(hscan);
1290 if (blocks_done != previous_blkno)
1292 pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
1294 previous_blkno = blocks_done;
1299 * When dealing with a HOT-chain of updated tuples, we want to index
1300 * the values of the live tuple (if any), but index it under the TID
1301 * of the chain's root tuple. This approach is necessary to preserve
1302 * the HOT-chain structure in the heap. So we need to be able to find
1303 * the root item offset for every tuple that's in a HOT-chain. When
1304 * first reaching a new page of the relation, call
1305 * heap_get_root_tuples() to build a map of root item offsets on the
1308 * It might look unsafe to use this information across buffer
1309 * lock/unlock. However, we hold ShareLock on the table so no
1310 * ordinary insert/update/delete should occur; and we hold pin on the
1311 * buffer continuously while visiting the page, so no pruning
1312 * operation can occur either.
1314 * Also, although our opinions about tuple liveness could change while
1315 * we scan the page (due to concurrent transaction commits/aborts),
1316 * the chain root locations won't, so this info doesn't need to be
1317 * rebuilt after waiting for another transaction.
1319 * Note the implied assumption that there is no more than one live
1320 * tuple per HOT-chain --- else we could create more than one index
1321 * entry pointing to the same root tuple.
1323 if (hscan->rs_cblock != root_blkno)
1325 Page page = BufferGetPage(hscan->rs_cbuf);
1327 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1328 heap_get_root_tuples(page, root_offsets);
1329 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1331 root_blkno = hscan->rs_cblock;
1334 if (snapshot == SnapshotAny)
1336 /* do our own time qual check */
1338 TransactionId xwait;
1343 * We could possibly get away with not locking the buffer here,
1344 * since caller should hold ShareLock on the relation, but let's
1345 * be conservative about it. (This remark is still correct even
1346 * with HOT-pruning: our pin on the buffer prevents pruning.)
1348 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1351 * The criteria for counting a tuple as live in this block need to
1352 * match what analyze.c's heapam_scan_analyze_next_tuple() does,
1353 * otherwise CREATE INDEX and ANALYZE may produce wildly different
1354 * reltuples values, e.g. when there are many recently-dead
1357 switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
1360 case HEAPTUPLE_DEAD:
1361 /* Definitely dead, we can ignore it */
1363 tupleIsAlive = false;
1365 case HEAPTUPLE_LIVE:
1366 /* Normal case, index and unique-check it */
1368 tupleIsAlive = true;
1369 /* Count it as live, too */
1372 case HEAPTUPLE_RECENTLY_DEAD:
1375 * If tuple is recently deleted then we must index it
1376 * anyway to preserve MVCC semantics. (Pre-existing
1377 * transactions could try to use the index after we finish
1378 * building it, and may need to see such tuples.)
1380 * However, if it was HOT-updated then we must only index
1381 * the live tuple at the end of the HOT-chain. Since this
1382 * breaks semantics for pre-existing snapshots, mark the
1383 * index as unusable for them.
1385 * We don't count recently-dead tuples in reltuples, even
1386 * if we index them; see heapam_scan_analyze_next_tuple().
1388 if (HeapTupleIsHotUpdated(heapTuple))
1391 /* mark the index as unsafe for old snapshots */
1392 indexInfo->ii_BrokenHotChain = true;
1396 /* In any case, exclude the tuple from unique-checking */
1397 tupleIsAlive = false;
1399 case HEAPTUPLE_INSERT_IN_PROGRESS:
1402 * In "anyvisible" mode, this tuple is visible and we
1403 * don't need any further checks.
1408 tupleIsAlive = true;
1414 * Since caller should hold ShareLock or better, normally
1415 * the only way to see this is if it was inserted earlier
1416 * in our own transaction. However, it can happen in
1417 * system catalogs, since we tend to release write lock
1418 * before commit there. Give a warning if neither case
1421 xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1422 if (!TransactionIdIsCurrentTransactionId(xwait))
1424 if (!is_system_catalog)
1425 elog(WARNING, "concurrent insert in progress within table \"%s\"",
1426 RelationGetRelationName(heapRelation));
1429 * If we are performing uniqueness checks, indexing
1430 * such a tuple could lead to a bogus uniqueness
1431 * failure. In that case we wait for the inserting
1432 * transaction to finish and check again.
1434 if (checking_uniqueness)
1437 * Must drop the lock on the buffer before we wait
1439 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1440 XactLockTableWait(xwait, heapRelation,
1442 XLTW_InsertIndexUnique);
1443 CHECK_FOR_INTERRUPTS();
1450 * For consistency with
1451 * heapam_scan_analyze_next_tuple(), count
1452 * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
1453 * when inserted by our own transaction.
1459 * We must index such tuples, since if the index build
1460 * commits then they're good.
1463 tupleIsAlive = true;
1465 case HEAPTUPLE_DELETE_IN_PROGRESS:
1468 * As with INSERT_IN_PROGRESS case, this is unexpected
1469 * unless it's our own deletion or a system catalog; but
1470 * in anyvisible mode, this tuple is visible.
1475 tupleIsAlive = false;
1480 xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1481 if (!TransactionIdIsCurrentTransactionId(xwait))
1483 if (!is_system_catalog)
1484 elog(WARNING, "concurrent delete in progress within table \"%s\"",
1485 RelationGetRelationName(heapRelation));
1488 * If we are performing uniqueness checks, assuming
1489 * the tuple is dead could lead to missing a
1490 * uniqueness violation. In that case we wait for the
1491 * deleting transaction to finish and check again.
1493 * Also, if it's a HOT-updated tuple, we should not
1494 * index it but rather the live tuple at the end of
1495 * the HOT-chain. However, the deleting transaction
1496 * could abort, possibly leaving this tuple as live
1497 * after all, in which case it has to be indexed. The
1498 * only way to know what to do is to wait for the
1499 * deleting transaction to finish and check again.
1501 if (checking_uniqueness ||
1502 HeapTupleIsHotUpdated(heapTuple))
1505 * Must drop the lock on the buffer before we wait
1507 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1508 XactLockTableWait(xwait, heapRelation,
1510 XLTW_InsertIndexUnique);
1511 CHECK_FOR_INTERRUPTS();
1516 * Otherwise index it but don't check for uniqueness,
1517 * the same as a RECENTLY_DEAD tuple.
1522 * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
1523 * if they were not deleted by the current
1524 * transaction. That's what
1525 * heapam_scan_analyze_next_tuple() does, and we want
1526 * the behavior to be consistent.
1530 else if (HeapTupleIsHotUpdated(heapTuple))
1533 * It's a HOT-updated tuple deleted by our own xact.
1534 * We can assume the deletion will commit (else the
1535 * index contents don't matter), so treat the same as
1536 * RECENTLY_DEAD HOT-updated tuples.
1539 /* mark the index as unsafe for old snapshots */
1540 indexInfo->ii_BrokenHotChain = true;
1545 * It's a regular tuple deleted by our own xact. Index
1546 * it, but don't check for uniqueness nor count in
1547 * reltuples, the same as a RECENTLY_DEAD tuple.
1551 /* In any case, exclude the tuple from unique-checking */
1552 tupleIsAlive = false;
1555 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1556 indexIt = tupleIsAlive = false; /* keep compiler quiet */
1560 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1567 /* heap_getnext did the time qual check */
1568 tupleIsAlive = true;
1572 MemoryContextReset(econtext->ecxt_per_tuple_memory);
1574 /* Set up for predicate or expression evaluation */
1575 ExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf);
1578 * In a partial index, discard tuples that don't satisfy the
1581 if (predicate != NULL)
1583 if (!ExecQual(predicate, econtext))
1588 * For the current heap tuple, extract all the attributes we use in
1589 * this index, and note which are null. This also performs evaluation
1590 * of any expressions needed.
1592 FormIndexDatum(indexInfo,
1599 * You'd think we should go ahead and build the index tuple here, but
1600 * some index AMs want to do further processing on the data first. So
1601 * pass the values[] and isnull[] arrays, instead.
1604 if (HeapTupleIsHeapOnly(heapTuple))
1607 * For a heap-only tuple, pretend its TID is that of the root. See
1608 * src/backend/access/heap/README.HOT for discussion.
1610 HeapTupleData rootTuple;
1611 OffsetNumber offnum;
1613 rootTuple = *heapTuple;
1614 offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1616 if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
1618 (errcode(ERRCODE_DATA_CORRUPTED),
1619 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1620 ItemPointerGetBlockNumber(&heapTuple->t_self),
1622 RelationGetRelationName(heapRelation))));
1624 ItemPointerSetOffsetNumber(&rootTuple.t_self,
1625 root_offsets[offnum - 1]);
1627 /* Call the AM's callback routine to process the tuple */
1628 callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
1633 /* Call the AM's callback routine to process the tuple */
1634 callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
1639 /* Report scan progress one last time. */
1642 BlockNumber blks_done;
1644 if (hscan->rs_base.rs_parallel != NULL)
1646 ParallelBlockTableScanDesc pbscan;
1648 pbscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
1649 blks_done = pbscan->phs_nblocks;
1652 blks_done = hscan->rs_nblocks;
1654 pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
1658 table_endscan(scan);
1660 /* we can now forget our snapshot, if set and registered by us */
1661 if (need_unregister_snapshot)
1662 UnregisterSnapshot(snapshot);
1664 ExecDropSingleTupleTableSlot(slot);
1666 FreeExecutorState(estate);
1668 /* These may have been pointing to the now-gone estate */
1669 indexInfo->ii_ExpressionsState = NIL;
1670 indexInfo->ii_PredicateState = NULL;
1676 heapam_index_validate_scan(Relation heapRelation,
1677 Relation indexRelation,
1678 IndexInfo *indexInfo,
1680 ValidateIndexState *state)
1684 HeapTuple heapTuple;
1685 Datum values[INDEX_MAX_KEYS];
1686 bool isnull[INDEX_MAX_KEYS];
1687 ExprState *predicate;
1688 TupleTableSlot *slot;
1690 ExprContext *econtext;
1691 BlockNumber root_blkno = InvalidBlockNumber;
1692 OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1693 bool in_index[MaxHeapTuplesPerPage];
1694 BlockNumber previous_blkno = InvalidBlockNumber;
1696 /* state variables for the merge */
1697 ItemPointer indexcursor = NULL;
1698 ItemPointerData decoded;
1699 bool tuplesort_empty = false;
1704 Assert(OidIsValid(indexRelation->rd_rel->relam));
1707 * Need an EState for evaluation of index expressions and partial-index
1708 * predicates. Also a slot to hold the current tuple.
1710 estate = CreateExecutorState();
1711 econtext = GetPerTupleExprContext(estate);
1712 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
1715 /* Arrange for econtext's scan tuple to be the tuple under test */
1716 econtext->ecxt_scantuple = slot;
1718 /* Set up execution state for predicate, if any. */
1719 predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1722 * Prepare for scan of the base relation. We need just those tuples
1723 * satisfying the passed-in reference snapshot. We must disable syncscan
1724 * here, because it's critical that we read from block zero forward to
1725 * match the sorted TIDs.
1727 scan = table_beginscan_strat(heapRelation, /* relation */
1728 snapshot, /* snapshot */
1729 0, /* number of keys */
1730 NULL, /* scan key */
1731 true, /* buffer access strategy OK */
1732 false); /* syncscan not OK */
1733 hscan = (HeapScanDesc) scan;
1735 pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_TOTAL,
1739 * Scan all tuples matching the snapshot.
1741 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1743 ItemPointer heapcursor = &heapTuple->t_self;
1744 ItemPointerData rootTuple;
1745 OffsetNumber root_offnum;
1747 CHECK_FOR_INTERRUPTS();
1751 if ((previous_blkno == InvalidBlockNumber) ||
1752 (hscan->rs_cblock != previous_blkno))
1754 pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
1756 previous_blkno = hscan->rs_cblock;
1760 * As commented in table_index_build_scan, we should index heap-only
1761 * tuples under the TIDs of their root tuples; so when we advance onto
1762 * a new heap page, build a map of root item offsets on the page.
1764 * This complicates merging against the tuplesort output: we will
1765 * visit the live tuples in order by their offsets, but the root
1766 * offsets that we need to compare against the index contents might be
1767 * ordered differently. So we might have to "look back" within the
1768 * tuplesort output, but only within the current page. We handle that
1769 * by keeping a bool array in_index[] showing all the
1770 * already-passed-over tuplesort output TIDs of the current page. We
1771 * clear that array here, when advancing onto a new heap page.
1773 if (hscan->rs_cblock != root_blkno)
1775 Page page = BufferGetPage(hscan->rs_cbuf);
1777 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1778 heap_get_root_tuples(page, root_offsets);
1779 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1781 memset(in_index, 0, sizeof(in_index));
1783 root_blkno = hscan->rs_cblock;
1786 /* Convert actual tuple TID to root TID */
1787 rootTuple = *heapcursor;
1788 root_offnum = ItemPointerGetOffsetNumber(heapcursor);
1790 if (HeapTupleIsHeapOnly(heapTuple))
1792 root_offnum = root_offsets[root_offnum - 1];
1793 if (!OffsetNumberIsValid(root_offnum))
1795 (errcode(ERRCODE_DATA_CORRUPTED),
1796 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1797 ItemPointerGetBlockNumber(heapcursor),
1798 ItemPointerGetOffsetNumber(heapcursor),
1799 RelationGetRelationName(heapRelation))));
1800 ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
1804 * "merge" by skipping through the index tuples until we find or pass
1805 * the current root tuple.
1807 while (!tuplesort_empty &&
1809 ItemPointerCompare(indexcursor, &rootTuple) < 0))
1817 * Remember index items seen earlier on the current heap page
1819 if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
1820 in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
1823 tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
1824 &ts_val, &ts_isnull, NULL);
1825 Assert(tuplesort_empty || !ts_isnull);
1826 if (!tuplesort_empty)
1828 itemptr_decode(&decoded, DatumGetInt64(ts_val));
1829 indexcursor = &decoded;
1831 /* If int8 is pass-by-ref, free (encoded) TID Datum memory */
1832 #ifndef USE_FLOAT8_BYVAL
1833 pfree(DatumGetPointer(ts_val));
1844 * If the tuplesort has overshot *and* we didn't see a match earlier,
1845 * then this tuple is missing from the index, so insert it.
1847 if ((tuplesort_empty ||
1848 ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
1849 !in_index[root_offnum - 1])
1851 MemoryContextReset(econtext->ecxt_per_tuple_memory);
1853 /* Set up for predicate or expression evaluation */
1854 ExecStoreHeapTuple(heapTuple, slot, false);
1857 * In a partial index, discard tuples that don't satisfy the
1860 if (predicate != NULL)
1862 if (!ExecQual(predicate, econtext))
1867 * For the current heap tuple, extract all the attributes we use
1868 * in this index, and note which are null. This also performs
1869 * evaluation of any expressions needed.
1871 FormIndexDatum(indexInfo,
1878 * You'd think we should go ahead and build the index tuple here,
1879 * but some index AMs want to do further processing on the data
1880 * first. So pass the values[] and isnull[] arrays, instead.
1884 * If the tuple is already committed dead, you might think we
1885 * could suppress uniqueness checking, but this is no longer true
1886 * in the presence of HOT, because the insert is actually a proxy
1887 * for a uniqueness check on the whole HOT-chain. That is, the
1888 * tuple we have here could be dead because it was already
1889 * HOT-updated, and if so the updating transaction will not have
1890 * thought it should insert index entries. The index AM will
1891 * check the whole HOT-chain and correctly detect a conflict if
1895 index_insert(indexRelation,
1900 indexInfo->ii_Unique ?
1901 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
1904 state->tups_inserted += 1;
1908 table_endscan(scan);
1910 ExecDropSingleTupleTableSlot(slot);
1912 FreeExecutorState(estate);
1914 /* These may have been pointing to the now-gone estate */
1915 indexInfo->ii_ExpressionsState = NIL;
1916 indexInfo->ii_PredicateState = NULL;
1920 * Return the number of blocks that have been read by this scan since
1921 * starting. This is meant for progress reporting rather than be fully
1922 * accurate: in a parallel scan, workers can be concurrently reading blocks
1923 * further ahead than what we report.
1926 heapam_scan_get_blocks_done(HeapScanDesc hscan)
1928 ParallelBlockTableScanDesc bpscan = NULL;
1929 BlockNumber startblock;
1930 BlockNumber blocks_done;
1932 if (hscan->rs_base.rs_parallel != NULL)
1934 bpscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
1935 startblock = bpscan->phs_startblock;
1938 startblock = hscan->rs_startblock;
1941 * Might have wrapped around the end of the relation, if startblock was
1944 if (hscan->rs_cblock > startblock)
1945 blocks_done = hscan->rs_cblock - startblock;
1948 BlockNumber nblocks;
1950 nblocks = bpscan != NULL ? bpscan->phs_nblocks : hscan->rs_nblocks;
1951 blocks_done = nblocks - startblock +
1960 /* ------------------------------------------------------------------------
1961 * Planner related callbacks for the heap AM
1962 * ------------------------------------------------------------------------
1966 heapam_estimate_rel_size(Relation rel, int32 *attr_widths,
1967 BlockNumber *pages, double *tuples,
1970 BlockNumber curpages;
1971 BlockNumber relpages;
1973 BlockNumber relallvisible;
1976 /* it has storage, ok to call the smgr */
1977 curpages = RelationGetNumberOfBlocks(rel);
1979 /* coerce values in pg_class to more desirable types */
1980 relpages = (BlockNumber) rel->rd_rel->relpages;
1981 reltuples = (double) rel->rd_rel->reltuples;
1982 relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
1985 * HACK: if the relation has never yet been vacuumed, use a minimum size
1986 * estimate of 10 pages. The idea here is to avoid assuming a
1987 * newly-created table is really small, even if it currently is, because
1988 * that may not be true once some data gets loaded into it. Once a vacuum
1989 * or analyze cycle has been done on it, it's more reasonable to believe
1990 * the size is somewhat stable.
1992 * (Note that this is only an issue if the plan gets cached and used again
1993 * after the table has been filled. What we're trying to avoid is using a
1994 * nestloop-type plan on a table that has grown substantially since the
1995 * plan was made. Normally, autovacuum/autoanalyze will occur once enough
1996 * inserts have happened and cause cached-plan invalidation; but that
1997 * doesn't happen instantaneously, and it won't happen at all for cases
1998 * such as temporary tables.)
2000 * We approximate "never vacuumed" by "has relpages = 0", which means this
2001 * will also fire on genuinely empty relations. Not great, but
2002 * fortunately that's a seldom-seen case in the real world, and it
2003 * shouldn't degrade the quality of the plan too much anyway to err in
2006 * If the table has inheritance children, we don't apply this heuristic.
2007 * Totally empty parent tables are quite common, so we should be willing
2008 * to believe that they are empty.
2010 if (curpages < 10 &&
2012 !rel->rd_rel->relhassubclass)
2015 /* report estimated # pages */
2017 /* quick exit if rel is clearly empty */
2025 /* estimate number of tuples from previous tuple density */
2027 density = reltuples / (double) relpages;
2031 * When we have no data because the relation was truncated, estimate
2032 * tuple width from attribute datatypes. We assume here that the
2033 * pages are completely full, which is OK for tables (since they've
2034 * presumably not been VACUUMed yet) but is probably an overestimate
2035 * for indexes. Fortunately get_relation_info() can clamp the
2036 * overestimate to the parent table's size.
2038 * Note: this code intentionally disregards alignment considerations,
2039 * because (a) that would be gilding the lily considering how crude
2040 * the estimate is, and (b) it creates platform dependencies in the
2041 * default plans which are kind of a headache for regression testing.
2045 tuple_width = get_rel_data_width(rel, attr_widths);
2046 tuple_width += MAXALIGN(SizeofHeapTupleHeader);
2047 tuple_width += sizeof(ItemIdData);
2048 /* note: integer division is intentional here */
2049 density = (BLCKSZ - SizeOfPageHeaderData) / tuple_width;
2051 *tuples = rint(density * (double) curpages);
2054 * We use relallvisible as-is, rather than scaling it up like we do for
2055 * the pages and tuples counts, on the theory that any pages added since
2056 * the last VACUUM are most likely not marked all-visible. But costsize.c
2057 * wants it converted to a fraction.
2059 if (relallvisible == 0 || curpages <= 0)
2061 else if ((double) relallvisible >= curpages)
2064 *allvisfrac = (double) relallvisible / curpages;
2068 /* ------------------------------------------------------------------------
2069 * Executor related callbacks for the heap AM
2070 * ------------------------------------------------------------------------
2074 heapam_scan_bitmap_next_block(TableScanDesc scan,
2075 TBMIterateResult *tbmres)
2077 HeapScanDesc hscan = (HeapScanDesc) scan;
2078 BlockNumber page = tbmres->blockno;
2083 hscan->rs_cindex = 0;
2084 hscan->rs_ntuples = 0;
2087 * Ignore any claimed entries past what we think is the end of the
2088 * relation. It may have been extended after the start of our scan (we
2089 * only hold an AccessShareLock, and it could be inserts from this
2092 if (page >= hscan->rs_nblocks)
2096 * Acquire pin on the target heap page, trading in any pin we held before.
2098 hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf,
2101 hscan->rs_cblock = page;
2102 buffer = hscan->rs_cbuf;
2103 snapshot = scan->rs_snapshot;
2108 * Prune and repair fragmentation for the whole page, if possible.
2110 heap_page_prune_opt(scan->rs_rd, buffer);
2113 * We must hold share lock on the buffer content while examining tuple
2114 * visibility. Afterwards, however, the tuples we have found to be
2115 * visible are guaranteed good as long as we hold the buffer pin.
2117 LockBuffer(buffer, BUFFER_LOCK_SHARE);
2120 * We need two separate strategies for lossy and non-lossy cases.
2122 if (tbmres->ntuples >= 0)
2125 * Bitmap is non-lossy, so we just look through the offsets listed in
2126 * tbmres; but we have to follow any HOT chain starting at each such
2131 for (curslot = 0; curslot < tbmres->ntuples; curslot++)
2133 OffsetNumber offnum = tbmres->offsets[curslot];
2134 ItemPointerData tid;
2135 HeapTupleData heapTuple;
2137 ItemPointerSet(&tid, page, offnum);
2138 if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
2139 &heapTuple, NULL, true))
2140 hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
2146 * Bitmap is lossy, so we must examine each item pointer on the page.
2147 * But we can ignore HOT chains, since we'll check each tuple anyway.
2149 Page dp = (Page) BufferGetPage(buffer);
2150 OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
2151 OffsetNumber offnum;
2153 for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
2156 HeapTupleData loctup;
2159 lp = PageGetItemId(dp, offnum);
2160 if (!ItemIdIsNormal(lp))
2162 loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
2163 loctup.t_len = ItemIdGetLength(lp);
2164 loctup.t_tableOid = scan->rs_rd->rd_id;
2165 ItemPointerSet(&loctup.t_self, page, offnum);
2166 valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
2169 hscan->rs_vistuples[ntup++] = offnum;
2170 PredicateLockTuple(scan->rs_rd, &loctup, snapshot);
2172 CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
2177 LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2179 Assert(ntup <= MaxHeapTuplesPerPage);
2180 hscan->rs_ntuples = ntup;
2186 heapam_scan_bitmap_next_tuple(TableScanDesc scan,
2187 TBMIterateResult *tbmres,
2188 TupleTableSlot *slot)
2190 HeapScanDesc hscan = (HeapScanDesc) scan;
2191 OffsetNumber targoffset;
2196 * Out of range? If so, nothing more to look at on this page
2198 if (hscan->rs_cindex < 0 || hscan->rs_cindex >= hscan->rs_ntuples)
2201 targoffset = hscan->rs_vistuples[hscan->rs_cindex];
2202 dp = (Page) BufferGetPage(hscan->rs_cbuf);
2203 lp = PageGetItemId(dp, targoffset);
2204 Assert(ItemIdIsNormal(lp));
2206 hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
2207 hscan->rs_ctup.t_len = ItemIdGetLength(lp);
2208 hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
2209 ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
2211 pgstat_count_heap_fetch(scan->rs_rd);
2214 * Set up the result slot to point to this tuple. Note that the slot
2215 * acquires a pin on the buffer.
2217 ExecStoreBufferHeapTuple(&hscan->rs_ctup,
2227 heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
2229 HeapScanDesc hscan = (HeapScanDesc) scan;
2230 TsmRoutine *tsm = scanstate->tsmroutine;
2231 BlockNumber blockno;
2233 /* return false immediately if relation is empty */
2234 if (hscan->rs_nblocks == 0)
2237 if (tsm->NextSampleBlock)
2239 blockno = tsm->NextSampleBlock(scanstate, hscan->rs_nblocks);
2240 hscan->rs_cblock = blockno;
2244 /* scanning table sequentially */
2246 if (hscan->rs_cblock == InvalidBlockNumber)
2248 Assert(!hscan->rs_inited);
2249 blockno = hscan->rs_startblock;
2253 Assert(hscan->rs_inited);
2255 blockno = hscan->rs_cblock + 1;
2257 if (blockno >= hscan->rs_nblocks)
2259 /* wrap to begining of rel, might not have started at 0 */
2264 * Report our new scan position for synchronization purposes.
2266 * Note: we do this before checking for end of scan so that the
2267 * final state of the position hint is back at the start of the
2268 * rel. That's not strictly necessary, but otherwise when you run
2269 * the same query multiple times the starting position would shift
2270 * a little bit backwards on every invocation, which is confusing.
2271 * We don't guarantee any specific ordering in general, though.
2273 if (scan->rs_syncscan)
2274 ss_report_location(scan->rs_rd, blockno);
2276 if (blockno == hscan->rs_startblock)
2278 blockno = InvalidBlockNumber;
2283 if (!BlockNumberIsValid(blockno))
2285 if (BufferIsValid(hscan->rs_cbuf))
2286 ReleaseBuffer(hscan->rs_cbuf);
2287 hscan->rs_cbuf = InvalidBuffer;
2288 hscan->rs_cblock = InvalidBlockNumber;
2289 hscan->rs_inited = false;
2294 heapgetpage(scan, blockno);
2295 hscan->rs_inited = true;
2301 heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate,
2302 TupleTableSlot *slot)
2304 HeapScanDesc hscan = (HeapScanDesc) scan;
2305 TsmRoutine *tsm = scanstate->tsmroutine;
2306 BlockNumber blockno = hscan->rs_cblock;
2307 bool pagemode = scan->rs_pageatatime;
2311 OffsetNumber maxoffset;
2314 * When not using pagemode, we must lock the buffer during tuple
2315 * visibility checks.
2318 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
2320 page = (Page) BufferGetPage(hscan->rs_cbuf);
2321 all_visible = PageIsAllVisible(page) &&
2322 !scan->rs_snapshot->takenDuringRecovery;
2323 maxoffset = PageGetMaxOffsetNumber(page);
2327 OffsetNumber tupoffset;
2329 CHECK_FOR_INTERRUPTS();
2331 /* Ask the tablesample method which tuples to check on this page. */
2332 tupoffset = tsm->NextSampleTuple(scanstate,
2336 if (OffsetNumberIsValid(tupoffset))
2340 HeapTuple tuple = &(hscan->rs_ctup);
2342 /* Skip invalid tuple pointers. */
2343 itemid = PageGetItemId(page, tupoffset);
2344 if (!ItemIdIsNormal(itemid))
2347 tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2348 tuple->t_len = ItemIdGetLength(itemid);
2349 ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
2355 visible = SampleHeapTupleVisible(scan, hscan->rs_cbuf,
2358 /* in pagemode, heapgetpage did this for us */
2360 CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
2361 hscan->rs_cbuf, scan->rs_snapshot);
2363 /* Try next tuple from same page. */
2367 /* Found visible tuple, return it. */
2369 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2371 ExecStoreBufferHeapTuple(tuple, slot, hscan->rs_cbuf);
2373 /* Count successfully-fetched tuples as heap fetches */
2374 pgstat_count_heap_getnext(scan->rs_rd);
2381 * If we get here, it means we've exhausted the items on this page
2382 * and it's time to move to the next.
2385 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2387 ExecClearTuple(slot);
2396 /* ----------------------------------------------------------------------------
2397 * Helper functions for the above.
2398 * ----------------------------------------------------------------------------
2402 * Reconstruct and rewrite the given tuple
2404 * We cannot simply copy the tuple as-is, for several reasons:
2406 * 1. We'd like to squeeze out the values of any dropped columns, both
2407 * to save space and to ensure we have no corner-case failures. (It's
2408 * possible for example that the new table hasn't got a TOAST table
2409 * and so is unable to store any large values of dropped cols.)
2411 * 2. The tuple might not even be legal for the new table; this is
2412 * currently only known to happen as an after-effect of ALTER TABLE
2415 * So, we must reconstruct the tuple from component Datums.
2418 reform_and_rewrite_tuple(HeapTuple tuple,
2419 Relation OldHeap, Relation NewHeap,
2420 Datum *values, bool *isnull, RewriteState rwstate)
2422 TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
2423 TupleDesc newTupDesc = RelationGetDescr(NewHeap);
2424 HeapTuple copiedTuple;
2427 heap_deform_tuple(tuple, oldTupDesc, values, isnull);
2429 /* Be sure to null out any dropped columns */
2430 for (i = 0; i < newTupDesc->natts; i++)
2432 if (TupleDescAttr(newTupDesc, i)->attisdropped)
2436 copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
2438 /* The heap rewrite module does the rest */
2439 rewrite_heap_tuple(rwstate, tuple, copiedTuple);
2441 heap_freetuple(copiedTuple);
2445 * Check visibility of the tuple.
2448 SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
2450 OffsetNumber tupoffset)
2452 HeapScanDesc hscan = (HeapScanDesc) scan;
2454 if (scan->rs_pageatatime)
2457 * In pageatatime mode, heapgetpage() already did visibility checks,
2458 * so just look at the info it left in rs_vistuples[].
2460 * We use a binary search over the known-sorted array. Note: we could
2461 * save some effort if we insisted that NextSampleTuple select tuples
2462 * in increasing order, but it's not clear that there would be enough
2463 * gain to justify the restriction.
2466 end = hscan->rs_ntuples - 1;
2468 while (start <= end)
2470 int mid = (start + end) / 2;
2471 OffsetNumber curoffset = hscan->rs_vistuples[mid];
2473 if (tupoffset == curoffset)
2475 else if (tupoffset < curoffset)
2485 /* Otherwise, we have to check the tuple individually. */
2486 return HeapTupleSatisfiesVisibility(tuple, scan->rs_snapshot,
2492 /* ------------------------------------------------------------------------
2493 * Definition of the heap table access method.
2494 * ------------------------------------------------------------------------
2497 static const TableAmRoutine heapam_methods = {
2498 .type = T_TableAmRoutine,
2500 .slot_callbacks = heapam_slot_callbacks,
2502 .scan_begin = heap_beginscan,
2503 .scan_end = heap_endscan,
2504 .scan_rescan = heap_rescan,
2505 .scan_getnextslot = heap_getnextslot,
2507 .parallelscan_estimate = table_block_parallelscan_estimate,
2508 .parallelscan_initialize = table_block_parallelscan_initialize,
2509 .parallelscan_reinitialize = table_block_parallelscan_reinitialize,
2511 .index_fetch_begin = heapam_index_fetch_begin,
2512 .index_fetch_reset = heapam_index_fetch_reset,
2513 .index_fetch_end = heapam_index_fetch_end,
2514 .index_fetch_tuple = heapam_index_fetch_tuple,
2516 .tuple_insert = heapam_tuple_insert,
2517 .tuple_insert_speculative = heapam_tuple_insert_speculative,
2518 .tuple_complete_speculative = heapam_tuple_complete_speculative,
2519 .tuple_delete = heapam_tuple_delete,
2520 .tuple_update = heapam_tuple_update,
2521 .tuple_lock = heapam_tuple_lock,
2522 .finish_bulk_insert = heapam_finish_bulk_insert,
2524 .tuple_fetch_row_version = heapam_fetch_row_version,
2525 .tuple_get_latest_tid = heap_get_latest_tid,
2526 .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
2527 .compute_xid_horizon_for_tuples = heap_compute_xid_horizon_for_tuples,
2529 .relation_set_new_filenode = heapam_relation_set_new_filenode,
2530 .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
2531 .relation_copy_data = heapam_relation_copy_data,
2532 .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
2533 .relation_vacuum = heap_vacuum_rel,
2534 .scan_analyze_next_block = heapam_scan_analyze_next_block,
2535 .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
2536 .index_build_range_scan = heapam_index_build_range_scan,
2537 .index_validate_scan = heapam_index_validate_scan,
2539 .relation_estimate_size = heapam_estimate_rel_size,
2541 .scan_bitmap_next_block = heapam_scan_bitmap_next_block,
2542 .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
2543 .scan_sample_next_block = heapam_scan_sample_next_block,
2544 .scan_sample_next_tuple = heapam_scan_sample_next_tuple
2548 const TableAmRoutine *
2549 GetHeapamTableAmRoutine(void)
2551 return &heapam_methods;
2555 heap_tableam_handler(PG_FUNCTION_ARGS)
2557 PG_RETURN_POINTER(&heapam_methods);