]> granicus.if.org Git - postgresql/blob - src/backend/access/heap/pruneheap.c
Remove PageSetTLI and rename pd_tli to pd_checksum
[postgresql] / src / backend / access / heap / pruneheap.c
1 /*-------------------------------------------------------------------------
2  *
3  * pruneheap.c
4  *        heap page pruning and HOT-chain management code
5  *
6  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        src/backend/access/heap/pruneheap.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include "access/heapam.h"
18 #include "access/heapam_xlog.h"
19 #include "access/transam.h"
20 #include "access/htup_details.h"
21 #include "miscadmin.h"
22 #include "pgstat.h"
23 #include "storage/bufmgr.h"
24 #include "utils/rel.h"
25 #include "utils/tqual.h"
26
27
28 /* Working data for heap_page_prune and subroutines */
29 typedef struct
30 {
31         TransactionId new_prune_xid;    /* new prune hint value for page */
32         TransactionId latestRemovedXid;         /* latest xid to be removed by this
33                                                                                  * prune */
34         int                     nredirected;    /* numbers of entries in arrays below */
35         int                     ndead;
36         int                     nunused;
37         /* arrays that accumulate indexes of items to be changed */
38         OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
39         OffsetNumber nowdead[MaxHeapTuplesPerPage];
40         OffsetNumber nowunused[MaxHeapTuplesPerPage];
41         /* marked[i] is TRUE if item i is entered in one of the above arrays */
42         bool            marked[MaxHeapTuplesPerPage + 1];
43 } PruneState;
44
45 /* Local functions */
46 static int heap_prune_chain(Relation relation, Buffer buffer,
47                                  OffsetNumber rootoffnum,
48                                  TransactionId OldestXmin,
49                                  PruneState *prstate);
50 static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
51 static void heap_prune_record_redirect(PruneState *prstate,
52                                                    OffsetNumber offnum, OffsetNumber rdoffnum);
53 static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
54 static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
55
56
57 /*
58  * Optionally prune and repair fragmentation in the specified page.
59  *
60  * This is an opportunistic function.  It will perform housekeeping
61  * only if the page heuristically looks like a candidate for pruning and we
62  * can acquire buffer cleanup lock without blocking.
63  *
64  * Note: this is called quite often.  It's important that it fall out quickly
65  * if there's not any use in pruning.
66  *
67  * Caller must have pin on the buffer, and must *not* have a lock on it.
68  *
69  * OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD
70  * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
71  */
72 void
73 heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin)
74 {
75         Page            page = BufferGetPage(buffer);
76         Size            minfree;
77
78         /*
79          * Let's see if we really need pruning.
80          *
81          * Forget it if page is not hinted to contain something prunable that's
82          * older than OldestXmin.
83          */
84         if (!PageIsPrunable(page, OldestXmin))
85                 return;
86
87         /*
88          * We can't write WAL in recovery mode, so there's no point trying to
89          * clean the page. The master will likely issue a cleaning WAL record soon
90          * anyway, so this is no particular loss.
91          */
92         if (RecoveryInProgress())
93                 return;
94
95         /*
96          * We prune when a previous UPDATE failed to find enough space on the page
97          * for a new tuple version, or when free space falls below the relation's
98          * fill-factor target (but not less than 10%).
99          *
100          * Checking free space here is questionable since we aren't holding any
101          * lock on the buffer; in the worst case we could get a bogus answer. It's
102          * unlikely to be *seriously* wrong, though, since reading either pd_lower
103          * or pd_upper is probably atomic.      Avoiding taking a lock seems more
104          * important than sometimes getting a wrong answer in what is after all
105          * just a heuristic estimate.
106          */
107         minfree = RelationGetTargetPageFreeSpace(relation,
108                                                                                          HEAP_DEFAULT_FILLFACTOR);
109         minfree = Max(minfree, BLCKSZ / 10);
110
111         if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
112         {
113                 /* OK, try to get exclusive buffer lock */
114                 if (!ConditionalLockBufferForCleanup(buffer))
115                         return;
116
117                 /*
118                  * Now that we have buffer lock, get accurate information about the
119                  * page's free space, and recheck the heuristic about whether to
120                  * prune. (We needn't recheck PageIsPrunable, since no one else could
121                  * have pruned while we hold pin.)
122                  */
123                 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
124                 {
125                         TransactionId ignore = InvalidTransactionId;            /* return value not
126                                                                                                                                  * needed */
127
128                         /* OK to prune */
129                         (void) heap_page_prune(relation, buffer, OldestXmin, true, &ignore);
130                 }
131
132                 /* And release buffer lock */
133                 LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
134         }
135 }
136
137
138 /*
139  * Prune and repair fragmentation in the specified page.
140  *
141  * Caller must have pin and buffer cleanup lock on the page.
142  *
143  * OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD
144  * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
145  *
146  * If report_stats is true then we send the number of reclaimed heap-only
147  * tuples to pgstats.  (This must be FALSE during vacuum, since vacuum will
148  * send its own new total to pgstats, and we don't want this delta applied
149  * on top of that.)
150  *
151  * Returns the number of tuples deleted from the page and sets
152  * latestRemovedXid.
153  */
154 int
155 heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
156                                 bool report_stats, TransactionId *latestRemovedXid)
157 {
158         int                     ndeleted = 0;
159         Page            page = BufferGetPage(buffer);
160         OffsetNumber offnum,
161                                 maxoff;
162         PruneState      prstate;
163
164         /*
165          * Our strategy is to scan the page and make lists of items to change,
166          * then apply the changes within a critical section.  This keeps as much
167          * logic as possible out of the critical section, and also ensures that
168          * WAL replay will work the same as the normal case.
169          *
170          * First, initialize the new pd_prune_xid value to zero (indicating no
171          * prunable tuples).  If we find any tuples which may soon become
172          * prunable, we will save the lowest relevant XID in new_prune_xid. Also
173          * initialize the rest of our working state.
174          */
175         prstate.new_prune_xid = InvalidTransactionId;
176         prstate.latestRemovedXid = *latestRemovedXid;
177         prstate.nredirected = prstate.ndead = prstate.nunused = 0;
178         memset(prstate.marked, 0, sizeof(prstate.marked));
179
180         /* Scan the page */
181         maxoff = PageGetMaxOffsetNumber(page);
182         for (offnum = FirstOffsetNumber;
183                  offnum <= maxoff;
184                  offnum = OffsetNumberNext(offnum))
185         {
186                 ItemId          itemid;
187
188                 /* Ignore items already processed as part of an earlier chain */
189                 if (prstate.marked[offnum])
190                         continue;
191
192                 /* Nothing to do if slot is empty or already dead */
193                 itemid = PageGetItemId(page, offnum);
194                 if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
195                         continue;
196
197                 /* Process this item or chain of items */
198                 ndeleted += heap_prune_chain(relation, buffer, offnum,
199                                                                          OldestXmin,
200                                                                          &prstate);
201         }
202
203         /* Any error while applying the changes is critical */
204         START_CRIT_SECTION();
205
206         /* Have we found any prunable items? */
207         if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
208         {
209                 /*
210                  * Apply the planned item changes, then repair page fragmentation, and
211                  * update the page's hint bit about whether it has free line pointers.
212                  */
213                 heap_page_prune_execute(buffer,
214                                                                 prstate.redirected, prstate.nredirected,
215                                                                 prstate.nowdead, prstate.ndead,
216                                                                 prstate.nowunused, prstate.nunused);
217
218                 /*
219                  * Update the page's pd_prune_xid field to either zero, or the lowest
220                  * XID of any soon-prunable tuple.
221                  */
222                 ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
223
224                 /*
225                  * Also clear the "page is full" flag, since there's no point in
226                  * repeating the prune/defrag process until something else happens to
227                  * the page.
228                  */
229                 PageClearFull(page);
230
231                 MarkBufferDirty(buffer);
232
233                 /*
234                  * Emit a WAL HEAP_CLEAN record showing what we did
235                  */
236                 if (RelationNeedsWAL(relation))
237                 {
238                         XLogRecPtr      recptr;
239
240                         recptr = log_heap_clean(relation, buffer,
241                                                                         prstate.redirected, prstate.nredirected,
242                                                                         prstate.nowdead, prstate.ndead,
243                                                                         prstate.nowunused, prstate.nunused,
244                                                                         prstate.latestRemovedXid);
245
246                         PageSetLSN(BufferGetPage(buffer), recptr);
247                 }
248         }
249         else
250         {
251                 /*
252                  * If we didn't prune anything, but have found a new value for the
253                  * pd_prune_xid field, update it and mark the buffer dirty. This is
254                  * treated as a non-WAL-logged hint.
255                  *
256                  * Also clear the "page is full" flag if it is set, since there's no
257                  * point in repeating the prune/defrag process until something else
258                  * happens to the page.
259                  */
260                 if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
261                         PageIsFull(page))
262                 {
263                         ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
264                         PageClearFull(page);
265                         SetBufferCommitInfoNeedsSave(buffer);
266                 }
267         }
268
269         END_CRIT_SECTION();
270
271         /*
272          * If requested, report the number of tuples reclaimed to pgstats. This is
273          * ndeleted minus ndead, because we don't want to count a now-DEAD root
274          * item as a deletion for this purpose.
275          */
276         if (report_stats && ndeleted > prstate.ndead)
277                 pgstat_update_heap_dead_tuples(relation, ndeleted - prstate.ndead);
278
279         *latestRemovedXid = prstate.latestRemovedXid;
280
281         /*
282          * XXX Should we update the FSM information of this page ?
283          *
284          * There are two schools of thought here. We may not want to update FSM
285          * information so that the page is not used for unrelated UPDATEs/INSERTs
286          * and any free space in this page will remain available for further
287          * UPDATEs in *this* page, thus improving chances for doing HOT updates.
288          *
289          * But for a large table and where a page does not receive further UPDATEs
290          * for a long time, we might waste this space by not updating the FSM
291          * information. The relation may get extended and fragmented further.
292          *
293          * One possibility is to leave "fillfactor" worth of space in this page
294          * and update FSM with the remaining space.
295          */
296
297         return ndeleted;
298 }
299
300
301 /*
302  * Prune specified item pointer or a HOT chain originating at that item.
303  *
304  * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
305  * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
306  * chain.  We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
307  * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
308  * DEAD, the OldestXmin test is just too coarse to detect it.
309  *
310  * The root line pointer is redirected to the tuple immediately after the
311  * latest DEAD tuple.  If all tuples in the chain are DEAD, the root line
312  * pointer is marked LP_DEAD.  (This includes the case of a DEAD simple
313  * tuple, which we treat as a chain of length 1.)
314  *
315  * OldestXmin is the cutoff XID used to identify dead tuples.
316  *
317  * We don't actually change the page here, except perhaps for hint-bit updates
318  * caused by HeapTupleSatisfiesVacuum.  We just add entries to the arrays in
319  * prstate showing the changes to be made.      Items to be redirected are added
320  * to the redirected[] array (two entries per redirection); items to be set to
321  * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
322  * state are added to nowunused[].
323  *
324  * Returns the number of tuples (to be) deleted from the page.
325  */
326 static int
327 heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
328                                  TransactionId OldestXmin,
329                                  PruneState *prstate)
330 {
331         int                     ndeleted = 0;
332         Page            dp = (Page) BufferGetPage(buffer);
333         TransactionId priorXmax = InvalidTransactionId;
334         ItemId          rootlp;
335         HeapTupleHeader htup;
336         OffsetNumber latestdead = InvalidOffsetNumber,
337                                 maxoff = PageGetMaxOffsetNumber(dp),
338                                 offnum;
339         OffsetNumber chainitems[MaxHeapTuplesPerPage];
340         int                     nchain = 0,
341                                 i;
342
343         rootlp = PageGetItemId(dp, rootoffnum);
344
345         /*
346          * If it's a heap-only tuple, then it is not the start of a HOT chain.
347          */
348         if (ItemIdIsNormal(rootlp))
349         {
350                 htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
351                 if (HeapTupleHeaderIsHeapOnly(htup))
352                 {
353                         /*
354                          * If the tuple is DEAD and doesn't chain to anything else, mark
355                          * it unused immediately.  (If it does chain, we can only remove
356                          * it as part of pruning its chain.)
357                          *
358                          * We need this primarily to handle aborted HOT updates, that is,
359                          * XMIN_INVALID heap-only tuples.  Those might not be linked to by
360                          * any chain, since the parent tuple might be re-updated before
361                          * any pruning occurs.  So we have to be able to reap them
362                          * separately from chain-pruning.  (Note that
363                          * HeapTupleHeaderIsHotUpdated will never return true for an
364                          * XMIN_INVALID tuple, so this code will work even when there were
365                          * sequential updates within the aborted transaction.)
366                          *
367                          * Note that we might first arrive at a dead heap-only tuple
368                          * either here or while following a chain below.  Whichever path
369                          * gets there first will mark the tuple unused.
370                          */
371                         if (HeapTupleSatisfiesVacuum(htup, OldestXmin, buffer)
372                                 == HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup))
373                         {
374                                 heap_prune_record_unused(prstate, rootoffnum);
375                                 HeapTupleHeaderAdvanceLatestRemovedXid(htup,
376                                                                                                  &prstate->latestRemovedXid);
377                                 ndeleted++;
378                         }
379
380                         /* Nothing more to do */
381                         return ndeleted;
382                 }
383         }
384
385         /* Start from the root tuple */
386         offnum = rootoffnum;
387
388         /* while not end of the chain */
389         for (;;)
390         {
391                 ItemId          lp;
392                 bool            tupdead,
393                                         recent_dead;
394
395                 /* Some sanity checks */
396                 if (offnum < FirstOffsetNumber || offnum > maxoff)
397                         break;
398
399                 /* If item is already processed, stop --- it must not be same chain */
400                 if (prstate->marked[offnum])
401                         break;
402
403                 lp = PageGetItemId(dp, offnum);
404
405                 /* Unused item obviously isn't part of the chain */
406                 if (!ItemIdIsUsed(lp))
407                         break;
408
409                 /*
410                  * If we are looking at the redirected root line pointer, jump to the
411                  * first normal tuple in the chain.  If we find a redirect somewhere
412                  * else, stop --- it must not be same chain.
413                  */
414                 if (ItemIdIsRedirected(lp))
415                 {
416                         if (nchain > 0)
417                                 break;                  /* not at start of chain */
418                         chainitems[nchain++] = offnum;
419                         offnum = ItemIdGetRedirect(rootlp);
420                         continue;
421                 }
422
423                 /*
424                  * Likewise, a dead item pointer can't be part of the chain. (We
425                  * already eliminated the case of dead root tuple outside this
426                  * function.)
427                  */
428                 if (ItemIdIsDead(lp))
429                         break;
430
431                 Assert(ItemIdIsNormal(lp));
432                 htup = (HeapTupleHeader) PageGetItem(dp, lp);
433
434                 /*
435                  * Check the tuple XMIN against prior XMAX, if any
436                  */
437                 if (TransactionIdIsValid(priorXmax) &&
438                         !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
439                         break;
440
441                 /*
442                  * OK, this tuple is indeed a member of the chain.
443                  */
444                 chainitems[nchain++] = offnum;
445
446                 /*
447                  * Check tuple's visibility status.
448                  */
449                 tupdead = recent_dead = false;
450
451                 switch (HeapTupleSatisfiesVacuum(htup, OldestXmin, buffer))
452                 {
453                         case HEAPTUPLE_DEAD:
454                                 tupdead = true;
455                                 break;
456
457                         case HEAPTUPLE_RECENTLY_DEAD:
458                                 recent_dead = true;
459
460                                 /*
461                                  * This tuple may soon become DEAD.  Update the hint field so
462                                  * that the page is reconsidered for pruning in future.
463                                  */
464                                 heap_prune_record_prunable(prstate,
465                                                                                    HeapTupleHeaderGetUpdateXid(htup));
466                                 break;
467
468                         case HEAPTUPLE_DELETE_IN_PROGRESS:
469
470                                 /*
471                                  * This tuple may soon become DEAD.  Update the hint field so
472                                  * that the page is reconsidered for pruning in future.
473                                  */
474                                 heap_prune_record_prunable(prstate,
475                                                                                    HeapTupleHeaderGetUpdateXid(htup));
476                                 break;
477
478                         case HEAPTUPLE_LIVE:
479                         case HEAPTUPLE_INSERT_IN_PROGRESS:
480
481                                 /*
482                                  * If we wanted to optimize for aborts, we might consider
483                                  * marking the page prunable when we see INSERT_IN_PROGRESS.
484                                  * But we don't.  See related decisions about when to mark the
485                                  * page prunable in heapam.c.
486                                  */
487                                 break;
488
489                         default:
490                                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
491                                 break;
492                 }
493
494                 /*
495                  * Remember the last DEAD tuple seen.  We will advance past
496                  * RECENTLY_DEAD tuples just in case there's a DEAD one after them;
497                  * but we can't advance past anything else.  (XXX is it really worth
498                  * continuing to scan beyond RECENTLY_DEAD?  The case where we will
499                  * find another DEAD tuple is a fairly unusual corner case.)
500                  */
501                 if (tupdead)
502                 {
503                         latestdead = offnum;
504                         HeapTupleHeaderAdvanceLatestRemovedXid(htup,
505                                                                                                  &prstate->latestRemovedXid);
506                 }
507                 else if (!recent_dead)
508                         break;
509
510                 /*
511                  * If the tuple is not HOT-updated, then we are at the end of this
512                  * HOT-update chain.
513                  */
514                 if (!HeapTupleHeaderIsHotUpdated(htup))
515                         break;
516
517                 /*
518                  * Advance to next chain member.
519                  */
520                 Assert(ItemPointerGetBlockNumber(&htup->t_ctid) ==
521                            BufferGetBlockNumber(buffer));
522                 offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
523                 priorXmax = HeapTupleHeaderGetUpdateXid(htup);
524         }
525
526         /*
527          * If we found a DEAD tuple in the chain, adjust the HOT chain so that all
528          * the DEAD tuples at the start of the chain are removed and the root line
529          * pointer is appropriately redirected.
530          */
531         if (OffsetNumberIsValid(latestdead))
532         {
533                 /*
534                  * Mark as unused each intermediate item that we are able to remove
535                  * from the chain.
536                  *
537                  * When the previous item is the last dead tuple seen, we are at the
538                  * right candidate for redirection.
539                  */
540                 for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
541                 {
542                         heap_prune_record_unused(prstate, chainitems[i]);
543                         ndeleted++;
544                 }
545
546                 /*
547                  * If the root entry had been a normal tuple, we are deleting it, so
548                  * count it in the result.      But changing a redirect (even to DEAD
549                  * state) doesn't count.
550                  */
551                 if (ItemIdIsNormal(rootlp))
552                         ndeleted++;
553
554                 /*
555                  * If the DEAD tuple is at the end of the chain, the entire chain is
556                  * dead and the root line pointer can be marked dead.  Otherwise just
557                  * redirect the root to the correct chain member.
558                  */
559                 if (i >= nchain)
560                         heap_prune_record_dead(prstate, rootoffnum);
561                 else
562                         heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
563         }
564         else if (nchain < 2 && ItemIdIsRedirected(rootlp))
565         {
566                 /*
567                  * We found a redirect item that doesn't point to a valid follow-on
568                  * item.  This can happen if the loop in heap_page_prune caused us to
569                  * visit the dead successor of a redirect item before visiting the
570                  * redirect item.  We can clean up by setting the redirect item to
571                  * DEAD state.
572                  */
573                 heap_prune_record_dead(prstate, rootoffnum);
574         }
575
576         return ndeleted;
577 }
578
579 /* Record lowest soon-prunable XID */
580 static void
581 heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
582 {
583         /*
584          * This should exactly match the PageSetPrunable macro.  We can't store
585          * directly into the page header yet, so we update working state.
586          */
587         Assert(TransactionIdIsNormal(xid));
588         if (!TransactionIdIsValid(prstate->new_prune_xid) ||
589                 TransactionIdPrecedes(xid, prstate->new_prune_xid))
590                 prstate->new_prune_xid = xid;
591 }
592
593 /* Record item pointer to be redirected */
594 static void
595 heap_prune_record_redirect(PruneState *prstate,
596                                                    OffsetNumber offnum, OffsetNumber rdoffnum)
597 {
598         Assert(prstate->nredirected < MaxHeapTuplesPerPage);
599         prstate->redirected[prstate->nredirected * 2] = offnum;
600         prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
601         prstate->nredirected++;
602         Assert(!prstate->marked[offnum]);
603         prstate->marked[offnum] = true;
604         Assert(!prstate->marked[rdoffnum]);
605         prstate->marked[rdoffnum] = true;
606 }
607
608 /* Record item pointer to be marked dead */
609 static void
610 heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
611 {
612         Assert(prstate->ndead < MaxHeapTuplesPerPage);
613         prstate->nowdead[prstate->ndead] = offnum;
614         prstate->ndead++;
615         Assert(!prstate->marked[offnum]);
616         prstate->marked[offnum] = true;
617 }
618
619 /* Record item pointer to be marked unused */
620 static void
621 heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
622 {
623         Assert(prstate->nunused < MaxHeapTuplesPerPage);
624         prstate->nowunused[prstate->nunused] = offnum;
625         prstate->nunused++;
626         Assert(!prstate->marked[offnum]);
627         prstate->marked[offnum] = true;
628 }
629
630
631 /*
632  * Perform the actual page changes needed by heap_page_prune.
633  * It is expected that the caller has suitable pin and lock on the
634  * buffer, and is inside a critical section.
635  *
636  * This is split out because it is also used by heap_xlog_clean()
637  * to replay the WAL record when needed after a crash.  Note that the
638  * arguments are identical to those of log_heap_clean().
639  */
640 void
641 heap_page_prune_execute(Buffer buffer,
642                                                 OffsetNumber *redirected, int nredirected,
643                                                 OffsetNumber *nowdead, int ndead,
644                                                 OffsetNumber *nowunused, int nunused)
645 {
646         Page            page = (Page) BufferGetPage(buffer);
647         OffsetNumber *offnum;
648         int                     i;
649
650         /* Update all redirected line pointers */
651         offnum = redirected;
652         for (i = 0; i < nredirected; i++)
653         {
654                 OffsetNumber fromoff = *offnum++;
655                 OffsetNumber tooff = *offnum++;
656                 ItemId          fromlp = PageGetItemId(page, fromoff);
657
658                 ItemIdSetRedirect(fromlp, tooff);
659         }
660
661         /* Update all now-dead line pointers */
662         offnum = nowdead;
663         for (i = 0; i < ndead; i++)
664         {
665                 OffsetNumber off = *offnum++;
666                 ItemId          lp = PageGetItemId(page, off);
667
668                 ItemIdSetDead(lp);
669         }
670
671         /* Update all now-unused line pointers */
672         offnum = nowunused;
673         for (i = 0; i < nunused; i++)
674         {
675                 OffsetNumber off = *offnum++;
676                 ItemId          lp = PageGetItemId(page, off);
677
678                 ItemIdSetUnused(lp);
679         }
680
681         /*
682          * Finally, repair any fragmentation, and update the page's hint bit about
683          * whether it has free pointers.
684          */
685         PageRepairFragmentation(page);
686 }
687
688
689 /*
690  * For all items in this page, find their respective root line pointers.
691  * If item k is part of a HOT-chain with root at item j, then we set
692  * root_offsets[k - 1] = j.
693  *
694  * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
695  * We zero out all unused entries.
696  *
697  * The function must be called with at least share lock on the buffer, to
698  * prevent concurrent prune operations.
699  *
700  * Note: The information collected here is valid only as long as the caller
701  * holds a pin on the buffer. Once pin is released, a tuple might be pruned
702  * and reused by a completely unrelated tuple.
703  */
704 void
705 heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
706 {
707         OffsetNumber offnum,
708                                 maxoff;
709
710         MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber));
711
712         maxoff = PageGetMaxOffsetNumber(page);
713         for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
714         {
715                 ItemId          lp = PageGetItemId(page, offnum);
716                 HeapTupleHeader htup;
717                 OffsetNumber nextoffnum;
718                 TransactionId priorXmax;
719
720                 /* skip unused and dead items */
721                 if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
722                         continue;
723
724                 if (ItemIdIsNormal(lp))
725                 {
726                         htup = (HeapTupleHeader) PageGetItem(page, lp);
727
728                         /*
729                          * Check if this tuple is part of a HOT-chain rooted at some other
730                          * tuple. If so, skip it for now; we'll process it when we find
731                          * its root.
732                          */
733                         if (HeapTupleHeaderIsHeapOnly(htup))
734                                 continue;
735
736                         /*
737                          * This is either a plain tuple or the root of a HOT-chain.
738                          * Remember it in the mapping.
739                          */
740                         root_offsets[offnum - 1] = offnum;
741
742                         /* If it's not the start of a HOT-chain, we're done with it */
743                         if (!HeapTupleHeaderIsHotUpdated(htup))
744                                 continue;
745
746                         /* Set up to scan the HOT-chain */
747                         nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
748                         priorXmax = HeapTupleHeaderGetUpdateXid(htup);
749                 }
750                 else
751                 {
752                         /* Must be a redirect item. We do not set its root_offsets entry */
753                         Assert(ItemIdIsRedirected(lp));
754                         /* Set up to scan the HOT-chain */
755                         nextoffnum = ItemIdGetRedirect(lp);
756                         priorXmax = InvalidTransactionId;
757                 }
758
759                 /*
760                  * Now follow the HOT-chain and collect other tuples in the chain.
761                  *
762                  * Note: Even though this is a nested loop, the complexity of the
763                  * function is O(N) because a tuple in the page should be visited not
764                  * more than twice, once in the outer loop and once in HOT-chain
765                  * chases.
766                  */
767                 for (;;)
768                 {
769                         lp = PageGetItemId(page, nextoffnum);
770
771                         /* Check for broken chains */
772                         if (!ItemIdIsNormal(lp))
773                                 break;
774
775                         htup = (HeapTupleHeader) PageGetItem(page, lp);
776
777                         if (TransactionIdIsValid(priorXmax) &&
778                                 !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
779                                 break;
780
781                         /* Remember the root line pointer for this item */
782                         root_offsets[nextoffnum - 1] = offnum;
783
784                         /* Advance to next chain member, if any */
785                         if (!HeapTupleHeaderIsHotUpdated(htup))
786                                 break;
787
788                         nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
789                         priorXmax = HeapTupleHeaderGetUpdateXid(htup);
790                 }
791         }
792 }