]> granicus.if.org Git - postgresql/blob - src/backend/commands/vacuum.c
Restructure backend SIGINT/SIGTERM handling so that 'die' interrupts
[postgresql] / src / backend / commands / vacuum.c
1 /*-------------------------------------------------------------------------
2  *
3  * vacuum.c
4  *        the postgres vacuum cleaner
5  *
6  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.183 2001/01/14 05:08:15 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include <sys/types.h>
18 #include <sys/file.h>
19 #include <sys/stat.h>
20 #include <fcntl.h>
21 #include <unistd.h>
22
23 #ifndef HAVE_GETRUSAGE
24 #include "rusagestub.h"
25 #else
26 #include <sys/time.h>
27 #include <sys/resource.h>
28 #endif
29
30 #include "access/genam.h"
31 #include "access/heapam.h"
32 #include "access/xlog.h"
33 #include "catalog/catalog.h"
34 #include "catalog/catname.h"
35 #include "catalog/index.h"
36 #include "commands/vacuum.h"
37 #include "miscadmin.h"
38 #include "nodes/execnodes.h"
39 #include "storage/sinval.h"
40 #include "storage/smgr.h"
41 #include "tcop/tcopprot.h"
42 #include "utils/acl.h"
43 #include "utils/builtins.h"
44 #include "utils/fmgroids.h"
45 #include "utils/inval.h"
46 #include "utils/relcache.h"
47 #include "utils/syscache.h"
48 #include "utils/temprel.h"
49
50 extern XLogRecPtr       log_heap_clean(Relation reln, Buffer buffer,
51                                                                         char *unused, int unlen);
52 extern XLogRecPtr       log_heap_move(Relation reln, 
53                                                 Buffer oldbuf, ItemPointerData from,
54                                                 Buffer newbuf, HeapTuple newtup);
55
56 static MemoryContext vac_context = NULL;
57
58 static int      MESSAGE_LEVEL;          /* message level */
59
60 static TransactionId XmaxRecent;
61
62 /* non-export function prototypes */
63 static void vacuum_init(void);
64 static void vacuum_shutdown(void);
65 static void vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2);
66 static VRelList getrels(NameData *VacRelP);
67 static void vacuum_rel(Oid relid);
68 static void scan_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages);
69 static void repair_frag(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages, int nindices, Relation *Irel);
70 static void vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacpagelist);
71 static void vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage);
72 static void vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples);
73 static void scan_index(Relation indrel, int num_tuples);
74 static void update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats);
75 static VacPage tid_reaped(ItemPointer itemptr, VacPageList vacpagelist);
76 static void reap_page(VacPageList vacpagelist, VacPage vacpage);
77 static void vpage_insert(VacPageList vacpagelist, VacPage vpnew);
78 static void get_indices(Relation relation, int *nindices, Relation **Irel);
79 static void close_indices(int nindices, Relation *Irel);
80 static IndexInfo **get_index_desc(Relation onerel, int nindices,
81                                                                   Relation *Irel);
82 static void *vac_find_eq(void *bot, int nelem, int size, void *elm,
83                          int (*compar) (const void *, const void *));
84 static int      vac_cmp_blk(const void *left, const void *right);
85 static int      vac_cmp_offno(const void *left, const void *right);
86 static int      vac_cmp_vtlinks(const void *left, const void *right);
87 static bool enough_space(VacPage vacpage, Size len);
88 static char *show_rusage(struct rusage * ru0);
89
90
91 void
92 vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
93 {
94         NameData        VacRel;
95         Name            VacRelName;
96         MemoryContext old;
97         List       *le;
98         List       *anal_cols2 = NIL;
99
100         if (anal_cols != NIL && !analyze)
101                 elog(ERROR, "Can't vacuum columns, only tables.  You can 'vacuum analyze' columns.");
102
103         /*
104          * We cannot run VACUUM inside a user transaction block; if we were
105          * inside a transaction, then our commit- and
106          * start-transaction-command calls would not have the intended effect!
107          * Furthermore, the forced commit that occurs before truncating the
108          * relation's file would have the effect of committing the rest of the
109          * user's transaction too, which would certainly not be the desired
110          * behavior.
111          */
112         if (IsTransactionBlock())
113                 elog(ERROR, "VACUUM cannot run inside a BEGIN/END block");
114
115         if (verbose)
116                 MESSAGE_LEVEL = NOTICE;
117         else
118                 MESSAGE_LEVEL = DEBUG;
119
120         /*
121          * Create special memory context for cross-transaction storage.
122          *
123          * Since it is a child of QueryContext, it will go away eventually
124          * even if we suffer an error; there's no need for special abort
125          * cleanup logic.
126          */
127         vac_context = AllocSetContextCreate(QueryContext,
128                                                                                 "Vacuum",
129                                                                                 ALLOCSET_DEFAULT_MINSIZE,
130                                                                                 ALLOCSET_DEFAULT_INITSIZE,
131                                                                                 ALLOCSET_DEFAULT_MAXSIZE);
132
133         /* vacrel gets de-allocated on xact commit, so copy it to safe storage */
134         if (vacrel)
135         {
136                 namestrcpy(&VacRel, vacrel);
137                 VacRelName = &VacRel;
138         }
139         else
140                 VacRelName = NULL;
141
142         /* must also copy the column list, if any, to safe storage */
143         old = MemoryContextSwitchTo(vac_context);
144         foreach(le, anal_cols)
145         {
146                 char       *col = (char *) lfirst(le);
147
148                 anal_cols2 = lappend(anal_cols2, pstrdup(col));
149         }
150         MemoryContextSwitchTo(old);
151
152         /*
153          * Start up the vacuum cleaner.
154          *
155          * NOTE: since this commits the current transaction, the memory holding
156          * any passed-in parameters gets freed here.  We must have already
157          * copied pass-by-reference parameters to safe storage.  Don't make me
158          * fix this again!
159          */
160         vacuum_init();
161
162         /* vacuum the database */
163         vac_vacuum(VacRelName, analyze, anal_cols2);
164
165         /* clean up */
166         vacuum_shutdown();
167 }
168
169 /*
170  *      vacuum_init(), vacuum_shutdown() -- start up and shut down the vacuum cleaner.
171  *
172  *              Formerly, there was code here to prevent more than one VACUUM from
173  *              executing concurrently in the same database.  However, there's no
174  *              good reason to prevent that, and manually removing lockfiles after
175  *              a vacuum crash was a pain for dbadmins.  So, forget about lockfiles,
176  *              and just rely on the exclusive lock we grab on each target table
177  *              to ensure that there aren't two VACUUMs running on the same table
178  *              at the same time.
179  *
180  *              The strangeness with committing and starting transactions in the
181  *              init and shutdown routines is due to the fact that the vacuum cleaner
182  *              is invoked via an SQL command, and so is already executing inside
183  *              a transaction.  We need to leave ourselves in a predictable state
184  *              on entry and exit to the vacuum cleaner.  We commit the transaction
185  *              started in PostgresMain() inside vacuum_init(), and start one in
186  *              vacuum_shutdown() to match the commit waiting for us back in
187  *              PostgresMain().
188  */
189 static void
190 vacuum_init()
191 {
192         /* matches the StartTransaction in PostgresMain() */
193         CommitTransactionCommand();
194 }
195
196 static void
197 vacuum_shutdown()
198 {
199         /* on entry, we are not in a transaction */
200
201         /*
202          * Flush the init file that relcache.c uses to save startup time. The
203          * next backend startup will rebuild the init file with up-to-date
204          * information from pg_class.  This lets the optimizer see the stats
205          * that we've collected for certain critical system indexes.  See
206          * relcache.c for more details.
207          *
208          * Ignore any failure to unlink the file, since it might not be there if
209          * no backend has been started since the last vacuum...
210          */
211         unlink(RELCACHE_INIT_FILENAME);
212
213         /* matches the CommitTransaction in PostgresMain() */
214         StartTransactionCommand();
215
216         /*
217          * Clean up working storage --- note we must do this after
218          * StartTransactionCommand, else we might be trying to delete
219          * the active context!
220          */
221         MemoryContextDelete(vac_context);
222         vac_context = NULL;
223 }
224
225 /*
226  *      vac_vacuum() -- vacuum the database.
227  *
228  *              This routine builds a list of relations to vacuum, and then calls
229  *              code that vacuums them one at a time.  We are careful to vacuum each
230  *              relation in a separate transaction in order to avoid holding too many
231  *              locks at one time.
232  */
233 static void
234 vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2)
235 {
236         VRelList        vrl,
237                                 cur;
238
239         /* get list of relations */
240         vrl = getrels(VacRelP);
241
242         /* vacuum each heap relation */
243         for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
244         {
245                 vacuum_rel(cur->vrl_relid);
246                 /* analyze separately so locking is minimized */
247                 if (analyze)
248                         analyze_rel(cur->vrl_relid, anal_cols2, MESSAGE_LEVEL);
249         }
250 }
251
252 static VRelList
253 getrels(NameData *VacRelP)
254 {
255         Relation        rel;
256         TupleDesc       tupdesc;
257         HeapScanDesc scan;
258         HeapTuple       tuple;
259         VRelList        vrl,
260                                 cur;
261         Datum           d;
262         char       *rname;
263         char            rkind;
264         bool            n;
265         bool            found = false;
266         ScanKeyData key;
267
268         StartTransactionCommand();
269
270         if (NameStr(*VacRelP))
271         {
272
273                 /*
274                  * we could use the cache here, but it is clearer to use scankeys
275                  * for both vacuum cases, bjm 2000/01/19
276                  */
277                 char       *nontemp_relname;
278
279                 /* We must re-map temp table names bjm 2000-04-06 */
280                 nontemp_relname = get_temp_rel_by_username(NameStr(*VacRelP));
281                 if (nontemp_relname == NULL)
282                         nontemp_relname = NameStr(*VacRelP);
283
284                 ScanKeyEntryInitialize(&key, 0x0, Anum_pg_class_relname,
285                                                            F_NAMEEQ,
286                                                            PointerGetDatum(nontemp_relname));
287         }
288         else
289         {
290                 ScanKeyEntryInitialize(&key, 0x0, Anum_pg_class_relkind,
291                                                            F_CHAREQ, CharGetDatum('r'));
292         }
293
294         vrl = cur = (VRelList) NULL;
295
296         rel = heap_openr(RelationRelationName, AccessShareLock);
297         tupdesc = RelationGetDescr(rel);
298
299         scan = heap_beginscan(rel, false, SnapshotNow, 1, &key);
300
301         while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
302         {
303                 found = true;
304
305                 d = heap_getattr(tuple, Anum_pg_class_relname, tupdesc, &n);
306                 rname = (char *) d;
307
308                 d = heap_getattr(tuple, Anum_pg_class_relkind, tupdesc, &n);
309
310                 rkind = DatumGetChar(d);
311
312                 if (rkind != RELKIND_RELATION)
313                 {
314                         elog(NOTICE, "Vacuum: can not process indices, views and certain system tables");
315                         continue;
316                 }
317
318                 /* get a relation list entry for this guy */
319                 if (vrl == (VRelList) NULL)
320                         vrl = cur = (VRelList)
321                                 MemoryContextAlloc(vac_context, sizeof(VRelListData));
322                 else
323                 {
324                         cur->vrl_next = (VRelList)
325                                 MemoryContextAlloc(vac_context, sizeof(VRelListData));
326                         cur = cur->vrl_next;
327                 }
328
329                 cur->vrl_relid = tuple->t_data->t_oid;
330                 cur->vrl_next = (VRelList) NULL;
331         }
332
333         heap_endscan(scan);
334         heap_close(rel, AccessShareLock);
335
336         if (!found)
337                 elog(NOTICE, "Vacuum: table not found");
338
339         CommitTransactionCommand();
340
341         return vrl;
342 }
343
344 /*
345  *      vacuum_rel() -- vacuum one heap relation
346  *
347  *              This routine vacuums a single heap, cleans out its indices, and
348  *              updates its num_pages and num_tuples statistics.
349  *
350  *              Doing one heap at a time incurs extra overhead, since we need to
351  *              check that the heap exists again just before we vacuum it.      The
352  *              reason that we do this is so that vacuuming can be spread across
353  *              many small transactions.  Otherwise, two-phase locking would require
354  *              us to lock the entire database during one pass of the vacuum cleaner.
355  *
356  *              At entry and exit, we are not inside a transaction.
357  */
358 static void
359 vacuum_rel(Oid relid)
360 {
361         Relation        onerel;
362         LockRelId       onerelid;
363         VacPageListData vacuum_pages; /* List of pages to vacuum and/or clean
364                                                                    * indices */
365         VacPageListData fraged_pages; /* List of pages with space enough for
366                                                                    * re-using */
367         Relation   *Irel;
368         int32           nindices,
369                                 i;
370         VRelStats  *vacrelstats;
371         bool            reindex = false;
372         Oid                     toast_relid;
373
374         /* Begin a transaction for vacuuming this relation */
375         StartTransactionCommand();
376
377         /*
378          * Check for user-requested abort.      Note we want this to be inside a
379          * transaction, so xact.c doesn't issue useless NOTICE.
380          */
381         CHECK_FOR_INTERRUPTS();
382
383         /*
384          * Race condition -- if the pg_class tuple has gone away since the
385          * last time we saw it, we don't need to vacuum it.
386          */
387         if (!SearchSysCacheExists(RELOID,
388                                                           ObjectIdGetDatum(relid),
389                                                           0, 0, 0))
390         {
391                 CommitTransactionCommand();
392                 return;
393         }
394
395         /*
396          * Open the class, get an exclusive lock on it, and check permissions.
397          *
398          * Note we choose to treat permissions failure as a NOTICE and keep
399          * trying to vacuum the rest of the DB --- is this appropriate?
400          */
401         onerel = heap_open(relid, AccessExclusiveLock);
402
403         if (!pg_ownercheck(GetUserId(), RelationGetRelationName(onerel),
404                                            RELNAME))
405         {
406                 elog(NOTICE, "Skipping \"%s\" --- only table owner can VACUUM it",
407                          RelationGetRelationName(onerel));
408                 heap_close(onerel, AccessExclusiveLock);
409                 CommitTransactionCommand();
410                 return;
411         }
412
413         /*
414          * Get a session-level exclusive lock too.  This will protect our
415          * exclusive access to the relation across multiple transactions,
416          * so that we can vacuum the relation's TOAST table (if any) secure
417          * in the knowledge that no one is diddling the parent relation.
418          *
419          * NOTE: this cannot block, even if someone else is waiting for access,
420          * because the lock manager knows that both lock requests are from the
421          * same process.
422          */
423         onerelid = onerel->rd_lockInfo.lockRelId;
424         LockRelationForSession(&onerelid, AccessExclusiveLock);
425
426         /*
427          * Remember the relation's TOAST relation for later
428          */
429         toast_relid = onerel->rd_rel->reltoastrelid;
430
431         /*
432          * Set up statistics-gathering machinery.
433          */
434         vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
435         vacrelstats->relid = relid;
436         vacrelstats->num_pages = vacrelstats->num_tuples = 0;
437         vacrelstats->hasindex = false;
438
439         GetXmaxRecent(&XmaxRecent);
440
441         /* scan it */
442         reindex = false;
443         vacuum_pages.num_pages = fraged_pages.num_pages = 0;
444         scan_heap(vacrelstats, onerel, &vacuum_pages, &fraged_pages);
445         if (IsIgnoringSystemIndexes() &&
446                 IsSystemRelationName(RelationGetRelationName(onerel)))
447                 reindex = true;
448
449         /* Now open indices */
450         nindices = 0;
451         Irel = (Relation *) NULL;
452         get_indices(onerel, &nindices, &Irel);
453         if (!Irel)
454                 reindex = false;
455         else if (!RelationGetForm(onerel)->relhasindex)
456                 reindex = true;
457         if (nindices > 0)
458                 vacrelstats->hasindex = true;
459         else
460                 vacrelstats->hasindex = false;
461         if (reindex)
462         {
463                 for (i = 0; i < nindices; i++)
464                         index_close(Irel[i]);
465                 Irel = (Relation *) NULL;
466                 activate_indexes_of_a_table(relid, false);
467         }
468
469         /* Clean/scan index relation(s) */
470         if (Irel != (Relation *) NULL)
471         {
472                 if (vacuum_pages.num_pages > 0)
473                 {
474                         for (i = 0; i < nindices; i++)
475                                 vacuum_index(&vacuum_pages, Irel[i],
476                                                          vacrelstats->num_tuples, 0);
477                 }
478                 else
479                 {
480                         /* just scan indices to update statistic */
481                         for (i = 0; i < nindices; i++)
482                                 scan_index(Irel[i], vacrelstats->num_tuples);
483                 }
484         }
485
486         if (fraged_pages.num_pages > 0)
487         {
488                 /* Try to shrink heap */
489                 repair_frag(vacrelstats, onerel, &vacuum_pages, &fraged_pages,
490                                         nindices, Irel);
491         }
492         else
493         {
494                 if (Irel != (Relation *) NULL)
495                         close_indices(nindices, Irel);
496                 if (vacuum_pages.num_pages > 0)
497                 {
498                         /* Clean pages from vacuum_pages list */
499                         vacuum_heap(vacrelstats, onerel, &vacuum_pages);
500                 }
501                 else
502                 {
503                         /*
504                          * Flush dirty pages out to disk.  We must do this even if we
505                          * didn't do anything else, because we want to ensure that all
506                          * tuples have correct on-row commit status on disk (see
507                          * bufmgr.c's comments for FlushRelationBuffers()).
508                          */
509                         i = FlushRelationBuffers(onerel, vacrelstats->num_pages);
510                         if (i < 0)
511                                 elog(ERROR, "VACUUM (vacuum_rel): FlushRelationBuffers returned %d",
512                                          i);
513                 }
514         }
515         if (reindex)
516                 activate_indexes_of_a_table(relid, true);
517
518         /* all done with this class, but hold lock until commit */
519         heap_close(onerel, NoLock);
520
521         /* update statistics in pg_class */
522         update_relstats(vacrelstats->relid, vacrelstats->num_pages,
523                                         vacrelstats->num_tuples, vacrelstats->hasindex,
524                                         vacrelstats);
525
526         /*
527          * Complete the transaction and free all temporary memory used.
528          */
529         CommitTransactionCommand();
530
531         /*
532          * If the relation has a secondary toast one, vacuum that too
533          * while we still hold the session lock on the master table.
534          * We don't need to propagate "analyze" to it, because the toaster
535          * always uses hardcoded index access and statistics are
536          * totally unimportant for toast relations
537          */
538         if (toast_relid != InvalidOid)
539                 vacuum_rel(toast_relid);
540
541         /*
542          * Now release the session-level lock on the master table.
543          */
544         UnlockRelationForSession(&onerelid, AccessExclusiveLock);
545 }
546
547 /*
548  *      scan_heap() -- scan an open heap relation
549  *
550  *              This routine sets commit times, constructs vacuum_pages list of
551  *              empty/uninitialized pages and pages with dead tuples and
552  *              ~LP_USED line pointers, constructs fraged_pages list of pages
553  *              appropriate for purposes of shrinking and maintains statistics
554  *              on the number of live tuples in a heap.
555  */
556 static void
557 scan_heap(VRelStats *vacrelstats, Relation onerel,
558                         VacPageList vacuum_pages, VacPageList fraged_pages)
559 {
560         BlockNumber nblocks,
561                                 blkno;
562         ItemId          itemid;
563         Buffer          buf;
564         HeapTupleData tuple;
565         Page            page,
566                                 tempPage = NULL;
567         OffsetNumber offnum,
568                                 maxoff;
569         bool            pgchanged,
570                                 tupgone,
571                                 dobufrel,
572                                 notup;
573         char       *relname;
574         VacPage         vacpage,
575                                 vp;
576         uint32          tups_vacuumed,
577                                 num_tuples,
578                                 nkeep,
579                                 nunused,
580                                 ncrash,
581                                 empty_pages,
582                                 new_pages,
583                                 changed_pages,
584                                 empty_end_pages;
585         Size            free_size,
586                                 usable_free_size;
587         Size            min_tlen = MaxTupleSize;
588         Size            max_tlen = 0;
589         int32           i;
590         bool            do_shrinking = true;
591         VTupleLink      vtlinks = (VTupleLink) palloc(100 * sizeof(VTupleLinkData));
592         int                     num_vtlinks = 0;
593         int                     free_vtlinks = 100;
594         struct rusage ru0;
595
596         getrusage(RUSAGE_SELF, &ru0);
597
598         relname = RelationGetRelationName(onerel);
599         elog(MESSAGE_LEVEL, "--Relation %s--", relname);
600
601         tups_vacuumed = num_tuples = nkeep = nunused = ncrash = empty_pages =
602                 new_pages = changed_pages = empty_end_pages = 0;
603         free_size = usable_free_size = 0;
604
605         nblocks = RelationGetNumberOfBlocks(onerel);
606
607         vacpage = (VacPage) palloc(sizeof(VacPageData) + MaxOffsetNumber * sizeof(OffsetNumber));
608         vacpage->offsets_used = 0;
609
610         for (blkno = 0; blkno < nblocks; blkno++)
611         {
612                 buf = ReadBuffer(onerel, blkno);
613                 page = BufferGetPage(buf);
614                 vacpage->blkno = blkno;
615                 vacpage->offsets_free = 0;
616
617                 if (PageIsNew(page))
618                 {
619                         elog(NOTICE, "Rel %s: Uninitialized page %u - fixing",
620                                  relname, blkno);
621                         PageInit(page, BufferGetPageSize(buf), 0);
622                         vacpage->free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower;
623                         free_size += (vacpage->free - sizeof(ItemIdData));
624                         new_pages++;
625                         empty_end_pages++;
626                         reap_page(vacuum_pages, vacpage);
627                         WriteBuffer(buf);
628                         continue;
629                 }
630
631                 if (PageIsEmpty(page))
632                 {
633                         vacpage->free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower;
634                         free_size += (vacpage->free - sizeof(ItemIdData));
635                         empty_pages++;
636                         empty_end_pages++;
637                         reap_page(vacuum_pages, vacpage);
638                         ReleaseBuffer(buf);
639                         continue;
640                 }
641
642                 pgchanged = false;
643                 notup = true;
644                 maxoff = PageGetMaxOffsetNumber(page);
645                 for (offnum = FirstOffsetNumber;
646                          offnum <= maxoff;
647                          offnum = OffsetNumberNext(offnum))
648                 {
649                         itemid = PageGetItemId(page, offnum);
650
651                         /*
652                          * Collect un-used items too - it's possible to have indices
653                          * pointing here after crash.
654                          */
655                         if (!ItemIdIsUsed(itemid))
656                         {
657                                 vacpage->offsets[vacpage->offsets_free++] = offnum;
658                                 nunused++;
659                                 continue;
660                         }
661
662                         tuple.t_datamcxt = NULL;
663                         tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
664                         tuple.t_len = ItemIdGetLength(itemid);
665                         ItemPointerSet(&(tuple.t_self), blkno, offnum);
666                         tupgone = false;
667
668                         if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
669                         {
670                                 if (tuple.t_data->t_infomask & HEAP_XMIN_INVALID)
671                                         tupgone = true;
672                                 else if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
673                                 {
674                                         if (TransactionIdDidCommit((TransactionId)
675                                                                                            tuple.t_data->t_cmin))
676                                         {
677                                                 tuple.t_data->t_infomask |= HEAP_XMIN_INVALID;
678                                                 pgchanged = true;
679                                                 tupgone = true;
680                                         }
681                                         else
682                                         {
683                                                 tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
684                                                 pgchanged = true;
685                                         }
686                                 }
687                                 else if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
688                                 {
689                                         if (!TransactionIdDidCommit((TransactionId)
690                                                                                                 tuple.t_data->t_cmin))
691                                         {
692                                                 tuple.t_data->t_infomask |= HEAP_XMIN_INVALID;
693                                                 pgchanged = true;
694                                                 tupgone = true;
695                                         }
696                                         else
697                                         {
698                                                 tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
699                                                 pgchanged = true;
700                                         }
701                                 }
702                                 else
703                                 {
704                                         if (TransactionIdDidAbort(tuple.t_data->t_xmin))
705                                                 tupgone = true;
706                                         else if (TransactionIdDidCommit(tuple.t_data->t_xmin))
707                                         {
708                                                 tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
709                                                 pgchanged = true;
710                                         }
711                                         else if (!TransactionIdIsInProgress(tuple.t_data->t_xmin))
712                                         {
713
714                                                 /*
715                                                  * Not Aborted, Not Committed, Not in Progress -
716                                                  * so it's from crashed process. - vadim 11/26/96
717                                                  */
718                                                 ncrash++;
719                                                 tupgone = true;
720                                         }
721                                         else
722                                         {
723                                                 elog(NOTICE, "Rel %s: TID %u/%u: InsertTransactionInProgress %u - can't shrink relation",
724                                                    relname, blkno, offnum, tuple.t_data->t_xmin);
725                                                 do_shrinking = false;
726                                         }
727                                 }
728                         }
729
730                         /*
731                          * here we are concerned about tuples with xmin committed and
732                          * xmax unknown or committed
733                          */
734                         if (tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED &&
735                                 !(tuple.t_data->t_infomask & HEAP_XMAX_INVALID))
736                         {
737                                 if (tuple.t_data->t_infomask & HEAP_XMAX_COMMITTED)
738                                 {
739                                         if (tuple.t_data->t_infomask & HEAP_MARKED_FOR_UPDATE)
740                                         {
741                                                 tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
742                                                 tuple.t_data->t_infomask &=
743                                                         ~(HEAP_XMAX_COMMITTED | HEAP_MARKED_FOR_UPDATE);
744                                                 pgchanged = true;
745                                         }
746                                         else
747                                                 tupgone = true;
748                                 }
749                                 else if (TransactionIdDidAbort(tuple.t_data->t_xmax))
750                                 {
751                                         tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
752                                         pgchanged = true;
753                                 }
754                                 else if (TransactionIdDidCommit(tuple.t_data->t_xmax))
755                                 {
756                                         if (tuple.t_data->t_infomask & HEAP_MARKED_FOR_UPDATE)
757                                         {
758                                                 tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
759                                                 tuple.t_data->t_infomask &=
760                                                         ~(HEAP_XMAX_COMMITTED | HEAP_MARKED_FOR_UPDATE);
761                                                 pgchanged = true;
762                                         }
763                                         else
764                                                 tupgone = true;
765                                 }
766                                 else if (!TransactionIdIsInProgress(tuple.t_data->t_xmax))
767                                 {
768
769                                         /*
770                                          * Not Aborted, Not Committed, Not in Progress - so it
771                                          * from crashed process. - vadim 06/02/97
772                                          */
773                                         tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
774                                         tuple.t_data->t_infomask &=
775                                                 ~(HEAP_XMAX_COMMITTED | HEAP_MARKED_FOR_UPDATE);
776                                         pgchanged = true;
777                                 }
778                                 else
779                                 {
780                                         elog(NOTICE, "Rel %s: TID %u/%u: DeleteTransactionInProgress %u - can't shrink relation",
781                                                  relname, blkno, offnum, tuple.t_data->t_xmax);
782                                         do_shrinking = false;
783                                 }
784
785                                 /*
786                                  * If tuple is recently deleted then we must not remove it
787                                  * from relation.
788                                  */
789                                 if (tupgone && (tuple.t_data->t_infomask & HEAP_XMIN_INVALID) == 0 && tuple.t_data->t_xmax >= XmaxRecent)
790                                 {
791                                         tupgone = false;
792                                         nkeep++;
793                                         if (!(tuple.t_data->t_infomask & HEAP_XMAX_COMMITTED))
794                                         {
795                                                 tuple.t_data->t_infomask |= HEAP_XMAX_COMMITTED;
796                                                 pgchanged = true;
797                                         }
798
799                                         /*
800                                          * If we do shrinking and this tuple is updated one
801                                          * then remember it to construct updated tuple
802                                          * dependencies.
803                                          */
804                                         if (do_shrinking && !(ItemPointerEquals(&(tuple.t_self),
805                                                                                            &(tuple.t_data->t_ctid))))
806                                         {
807                                                 if (free_vtlinks == 0)
808                                                 {
809                                                         free_vtlinks = 1000;
810                                                         vtlinks = (VTupleLink) repalloc(vtlinks,
811                                                                                    (free_vtlinks + num_vtlinks) *
812                                                                                                  sizeof(VTupleLinkData));
813                                                 }
814                                                 vtlinks[num_vtlinks].new_tid = tuple.t_data->t_ctid;
815                                                 vtlinks[num_vtlinks].this_tid = tuple.t_self;
816                                                 free_vtlinks--;
817                                                 num_vtlinks++;
818                                         }
819                                 }
820                         }
821
822                         /*
823                          * Other checks...
824                          */
825                         if (!OidIsValid(tuple.t_data->t_oid))
826                         {
827                                 elog(NOTICE, "Rel %s: TID %u/%u: OID IS INVALID. TUPGONE %d.",
828                                          relname, blkno, offnum, tupgone);
829                         }
830
831                         if (tupgone)
832                         {
833                                 ItemId          lpp;
834
835                                 /*
836                                  * Here we are building a temporary copy of the page with
837                                  * dead tuples removed.  Below we will apply
838                                  * PageRepairFragmentation to the copy, so that we can
839                                  * determine how much space will be available after
840                                  * removal of dead tuples.  But note we are NOT changing
841                                  * the real page yet...
842                                  */
843                                 if (tempPage == (Page) NULL)
844                                 {
845                                         Size            pageSize;
846
847                                         pageSize = PageGetPageSize(page);
848                                         tempPage = (Page) palloc(pageSize);
849                                         memmove(tempPage, page, pageSize);
850                                 }
851
852                                 /* mark it unused on the temp page */
853                                 lpp = &(((PageHeader) tempPage)->pd_linp[offnum - 1]);
854                                 lpp->lp_flags &= ~LP_USED;
855
856                                 vacpage->offsets[vacpage->offsets_free++] = offnum;
857                                 tups_vacuumed++;
858                         }
859                         else
860                         {
861                                 num_tuples++;
862                                 notup = false;
863                                 if (tuple.t_len < min_tlen)
864                                         min_tlen = tuple.t_len;
865                                 if (tuple.t_len > max_tlen)
866                                         max_tlen = tuple.t_len;
867                         }
868                 }
869
870                 if (pgchanged)
871                 {
872                         WriteBuffer(buf);
873                         dobufrel = false;
874                         changed_pages++;
875                 }
876                 else
877                         dobufrel = true;
878
879                 if (tempPage != (Page) NULL)
880                 {                                               /* Some tuples are gone */
881                         PageRepairFragmentation(tempPage, NULL);
882                         vacpage->free = ((PageHeader) tempPage)->pd_upper - ((PageHeader) tempPage)->pd_lower;
883                         free_size += vacpage->free;
884                         reap_page(vacuum_pages, vacpage);
885                         pfree(tempPage);
886                         tempPage = (Page) NULL;
887                 }
888                 else if (vacpage->offsets_free > 0)
889                 {                                               /* there are only ~LP_USED line pointers */
890                         vacpage->free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower;
891                         free_size += vacpage->free;
892                         reap_page(vacuum_pages, vacpage);
893                 }
894                 if (dobufrel)
895                         ReleaseBuffer(buf);
896                 if (notup)
897                         empty_end_pages++;
898                 else
899                         empty_end_pages = 0;
900         }
901
902         pfree(vacpage);
903
904         /* save stats in the rel list for use later */
905         vacrelstats->num_tuples = num_tuples;
906         vacrelstats->num_pages = nblocks;
907 /*        vacrelstats->natts = attr_cnt;*/
908         if (num_tuples == 0)
909                 min_tlen = max_tlen = 0;
910         vacrelstats->min_tlen = min_tlen;
911         vacrelstats->max_tlen = max_tlen;
912
913         vacuum_pages->empty_end_pages = empty_end_pages;
914         fraged_pages->empty_end_pages = empty_end_pages;
915
916         /*
917          * Try to make fraged_pages keeping in mind that we can't use free
918          * space of "empty" end-pages and last page if it reaped.
919          */
920         if (do_shrinking && vacuum_pages->num_pages - empty_end_pages > 0)
921         {
922                 int                     nusf;           /* blocks usefull for re-using */
923
924                 nusf = vacuum_pages->num_pages - empty_end_pages;
925                 if ((vacuum_pages->pagedesc[nusf - 1])->blkno == nblocks - empty_end_pages - 1)
926                         nusf--;
927
928                 for (i = 0; i < nusf; i++)
929                 {
930                         vp = vacuum_pages->pagedesc[i];
931                         if (enough_space(vp, min_tlen))
932                         {
933                                 vpage_insert(fraged_pages, vp);
934                                 usable_free_size += vp->free;
935                         }
936                 }
937         }
938
939         if (usable_free_size > 0 && num_vtlinks > 0)
940         {
941                 qsort((char *) vtlinks, num_vtlinks, sizeof(VTupleLinkData),
942                           vac_cmp_vtlinks);
943                 vacrelstats->vtlinks = vtlinks;
944                 vacrelstats->num_vtlinks = num_vtlinks;
945         }
946         else
947         {
948                 vacrelstats->vtlinks = NULL;
949                 vacrelstats->num_vtlinks = 0;
950                 pfree(vtlinks);
951         }
952
953         elog(MESSAGE_LEVEL, "Pages %u: Changed %u, reaped %u, Empty %u, New %u; \
954 Tup %u: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
955 Re-using: Free/Avail. Space %lu/%lu; EndEmpty/Avail. Pages %u/%u. %s",
956                  nblocks, changed_pages, vacuum_pages->num_pages, empty_pages,
957                  new_pages, num_tuples, tups_vacuumed,
958                  nkeep, vacrelstats->num_vtlinks, ncrash,
959                  nunused, (unsigned long)min_tlen, (unsigned long)max_tlen,
960                  (unsigned long)free_size, (unsigned long)usable_free_size,
961                  empty_end_pages, fraged_pages->num_pages,
962                  show_rusage(&ru0));
963
964 }
965
966
967 /*
968  *      repair_frag() -- try to repair relation's fragmentation
969  *
970  *              This routine marks dead tuples as unused and tries re-use dead space
971  *              by moving tuples (and inserting indices if needed). It constructs
972  *              Nvacpagelist list of free-ed pages (moved tuples) and clean indices
973  *              for them after committing (in hack-manner - without losing locks
974  *              and freeing memory!) current transaction. It truncates relation
975  *              if some end-blocks are gone away.
976  */
977 static void
978 repair_frag(VRelStats *vacrelstats, Relation onerel,
979                            VacPageList vacuum_pages, VacPageList fraged_pages,
980                            int nindices, Relation *Irel)
981 {
982         TransactionId myXID;
983         CommandId       myCID;
984         Buffer          buf,
985                                 cur_buffer;
986         int                     nblocks,
987                                 blkno;
988         Page            page,
989                                 ToPage = NULL;
990         OffsetNumber offnum = 0,
991                                 maxoff = 0,
992                                 newoff,
993                                 max_offset;
994         ItemId          itemid,
995                                 newitemid;
996         HeapTupleData tuple,
997                                 newtup;
998         TupleDesc       tupdesc;
999         IndexInfo **indexInfo = NULL;
1000         Datum           idatum[INDEX_MAX_KEYS];
1001         char            inulls[INDEX_MAX_KEYS];
1002         InsertIndexResult iresult;
1003         VacPageListData Nvacpagelist;
1004         VacPage         cur_page = NULL,
1005                                 last_vacuum_page,
1006                                 vacpage,
1007                            *curpage;
1008         int                     cur_item = 0;
1009         int                     last_move_dest_block = -1,
1010                                 last_vacuum_block,
1011                                 i = 0;
1012         Size            tuple_len;
1013         int                     num_moved,
1014                                 num_fraged_pages,
1015                                 vacuumed_pages;
1016         int                     checked_moved,
1017                                 num_tuples,
1018                                 keep_tuples = 0;
1019         bool            isempty,
1020                                 dowrite,
1021                                 chain_tuple_moved;
1022         struct rusage ru0;
1023
1024         getrusage(RUSAGE_SELF, &ru0);
1025
1026         myXID = GetCurrentTransactionId();
1027         myCID = GetCurrentCommandId();
1028
1029         tupdesc = RelationGetDescr(onerel);
1030
1031         if (Irel != (Relation *) NULL)          /* preparation for index' inserts */
1032                 indexInfo = get_index_desc(onerel, nindices, Irel);
1033
1034         Nvacpagelist.num_pages = 0;
1035         num_fraged_pages = fraged_pages->num_pages;
1036         Assert(vacuum_pages->num_pages > vacuum_pages->empty_end_pages);
1037         vacuumed_pages = vacuum_pages->num_pages - vacuum_pages->empty_end_pages;
1038         last_vacuum_page = vacuum_pages->pagedesc[vacuumed_pages - 1];
1039         last_vacuum_block = last_vacuum_page->blkno;
1040         cur_buffer = InvalidBuffer;
1041         num_moved = 0;
1042
1043         vacpage = (VacPage) palloc(sizeof(VacPageData) + MaxOffsetNumber * sizeof(OffsetNumber));
1044         vacpage->offsets_used = vacpage->offsets_free = 0;
1045
1046         /*
1047          * Scan pages backwards from the last nonempty page, trying to move
1048          * tuples down to lower pages.  Quit when we reach a page that we have
1049          * moved any tuples onto.  Note that if a page is still in the
1050          * fraged_pages list (list of candidate move-target pages) when we
1051          * reach it, we will remove it from the list.  This ensures we never
1052          * move a tuple up to a higher page number.
1053          *
1054          * NB: this code depends on the vacuum_pages and fraged_pages lists being
1055          * in order, and on fraged_pages being a subset of vacuum_pages.
1056          */
1057         nblocks = vacrelstats->num_pages;
1058         for (blkno = nblocks - vacuum_pages->empty_end_pages - 1;
1059                  blkno > last_move_dest_block;
1060                  blkno--)
1061         {
1062                 buf = ReadBuffer(onerel, blkno);
1063                 page = BufferGetPage(buf);
1064
1065                 vacpage->offsets_free = 0;
1066
1067                 isempty = PageIsEmpty(page);
1068
1069                 dowrite = false;
1070                 if (blkno == last_vacuum_block) /* it's reaped page */
1071                 {
1072                         if (last_vacuum_page->offsets_free > 0) /* there are dead tuples */
1073                         {                                       /* on this page - clean */
1074                                 Assert(!isempty);
1075                                 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1076                                 vacuum_page(onerel, buf, last_vacuum_page);
1077                                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1078                                 dowrite = true;
1079                         }
1080                         else
1081                                 Assert(isempty);
1082                         --vacuumed_pages;
1083                         if (vacuumed_pages > 0)
1084                         {
1085                                 /* get prev reaped page from vacuum_pages */
1086                                 last_vacuum_page = vacuum_pages->pagedesc[vacuumed_pages - 1];
1087                                 last_vacuum_block = last_vacuum_page->blkno;
1088                         }
1089                         else
1090                         {
1091                                 last_vacuum_page = NULL;
1092                                 last_vacuum_block = -1;
1093                         }
1094                         if (num_fraged_pages > 0 &&
1095                         fraged_pages->pagedesc[num_fraged_pages - 1]->blkno ==
1096                                 (BlockNumber) blkno)
1097                         {
1098                                 /* page is in fraged_pages too; remove it */
1099                                 --num_fraged_pages;
1100                         }
1101                         if (isempty)
1102                         {
1103                                 ReleaseBuffer(buf);
1104                                 continue;
1105                         }
1106                 }
1107                 else
1108                         Assert(!isempty);
1109
1110                 chain_tuple_moved = false;              /* no one chain-tuple was moved
1111                                                                                  * off this page, yet */
1112                 vacpage->blkno = blkno;
1113                 maxoff = PageGetMaxOffsetNumber(page);
1114                 for (offnum = FirstOffsetNumber;
1115                          offnum <= maxoff;
1116                          offnum = OffsetNumberNext(offnum))
1117                 {
1118                         itemid = PageGetItemId(page, offnum);
1119
1120                         if (!ItemIdIsUsed(itemid))
1121                                 continue;
1122
1123                         tuple.t_datamcxt = NULL;
1124                         tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1125                         tuple_len = tuple.t_len = ItemIdGetLength(itemid);
1126                         ItemPointerSet(&(tuple.t_self), blkno, offnum);
1127
1128                         if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
1129                         {
1130                                 if ((TransactionId) tuple.t_data->t_cmin != myXID)
1131                                         elog(ERROR, "Invalid XID in t_cmin");
1132                                 if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
1133                                         elog(ERROR, "HEAP_MOVED_IN was not expected");
1134
1135                                 /*
1136                                  * If this (chain) tuple is moved by me already then I
1137                                  * have to check is it in vacpage or not - i.e. is it moved
1138                                  * while cleaning this page or some previous one.
1139                                  */
1140                                 if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
1141                                 {
1142                                         if (keep_tuples == 0)
1143                                                 continue;
1144                                         if (chain_tuple_moved)          /* some chains was moved
1145                                                                                                  * while */
1146                                         {                       /* cleaning this page */
1147                                                 Assert(vacpage->offsets_free > 0);
1148                                                 for (i = 0; i < vacpage->offsets_free; i++)
1149                                                 {
1150                                                         if (vacpage->offsets[i] == offnum)
1151                                                                 break;
1152                                                 }
1153                                                 if (i >= vacpage->offsets_free) /* not found */
1154                                                 {
1155                                                         vacpage->offsets[vacpage->offsets_free++] = offnum;
1156                                                         keep_tuples--;
1157                                                 }
1158                                         }
1159                                         else
1160                                         {
1161                                                 vacpage->offsets[vacpage->offsets_free++] = offnum;
1162                                                 keep_tuples--;
1163                                         }
1164                                         continue;
1165                                 }
1166                                 elog(ERROR, "HEAP_MOVED_OFF was expected");
1167                         }
1168
1169                         /*
1170                          * If this tuple is in the chain of tuples created in updates
1171                          * by "recent" transactions then we have to move all chain of
1172                          * tuples to another places.
1173                          */
1174                         if ((tuple.t_data->t_infomask & HEAP_UPDATED &&
1175                                  tuple.t_data->t_xmin >= XmaxRecent) ||
1176                                 (!(tuple.t_data->t_infomask & HEAP_XMAX_INVALID) &&
1177                                  !(ItemPointerEquals(&(tuple.t_self), &(tuple.t_data->t_ctid)))))
1178                         {
1179                                 Buffer          Cbuf = buf;
1180                                 Page            Cpage;
1181                                 ItemId          Citemid;
1182                                 ItemPointerData Ctid;
1183                                 HeapTupleData tp = tuple;
1184                                 Size            tlen = tuple_len;
1185                                 VTupleMove      vtmove = (VTupleMove)
1186                                 palloc(100 * sizeof(VTupleMoveData));
1187                                 int                     num_vtmove = 0;
1188                                 int                     free_vtmove = 100;
1189                                 VacPage         to_vacpage = NULL;
1190                                 int                     to_item = 0;
1191                                 bool            freeCbuf = false;
1192                                 int                     ti;
1193
1194                                 if (vacrelstats->vtlinks == NULL)
1195                                         elog(ERROR, "No one parent tuple was found");
1196                                 if (cur_buffer != InvalidBuffer)
1197                                 {
1198                                         WriteBuffer(cur_buffer);
1199                                         cur_buffer = InvalidBuffer;
1200                                 }
1201
1202                                 /*
1203                                  * If this tuple is in the begin/middle of the chain then
1204                                  * we have to move to the end of chain.
1205                                  */
1206                                 while (!(tp.t_data->t_infomask & HEAP_XMAX_INVALID) &&
1207                                 !(ItemPointerEquals(&(tp.t_self), &(tp.t_data->t_ctid))))
1208                                 {
1209                                         Ctid = tp.t_data->t_ctid;
1210                                         if (freeCbuf)
1211                                                 ReleaseBuffer(Cbuf);
1212                                         freeCbuf = true;
1213                                         Cbuf = ReadBuffer(onerel,
1214                                                                           ItemPointerGetBlockNumber(&Ctid));
1215                                         Cpage = BufferGetPage(Cbuf);
1216                                         Citemid = PageGetItemId(Cpage,
1217                                                                           ItemPointerGetOffsetNumber(&Ctid));
1218                                         if (!ItemIdIsUsed(Citemid))
1219                                         {
1220
1221                                                 /*
1222                                                  * This means that in the middle of chain there
1223                                                  * was tuple updated by older (than XmaxRecent)
1224                                                  * xaction and this tuple is already deleted by
1225                                                  * me. Actually, upper part of chain should be
1226                                                  * removed and seems that this should be handled
1227                                                  * in scan_heap(), but it's not implemented at
1228                                                  * the moment and so we just stop shrinking here.
1229                                                  */
1230                                                 ReleaseBuffer(Cbuf);
1231                                                 pfree(vtmove);
1232                                                 vtmove = NULL;
1233                                                 elog(NOTICE, "Child itemid in update-chain marked as unused - can't continue repair_frag");
1234                                                 break;
1235                                         }
1236                                         tp.t_datamcxt = NULL;
1237                                         tp.t_data = (HeapTupleHeader) PageGetItem(Cpage, Citemid);
1238                                         tp.t_self = Ctid;
1239                                         tlen = tp.t_len = ItemIdGetLength(Citemid);
1240                                 }
1241                                 if (vtmove == NULL)
1242                                         break;
1243                                 /* first, can chain be moved ? */
1244                                 for (;;)
1245                                 {
1246                                         if (to_vacpage == NULL ||
1247                                                 !enough_space(to_vacpage, tlen))
1248                                         {
1249
1250                                                 /*
1251                                                  * if to_vacpage no longer has enough free space to be
1252                                                  * useful, remove it from fraged_pages list
1253                                                  */
1254                                                 if (to_vacpage != NULL &&
1255                                                  !enough_space(to_vacpage, vacrelstats->min_tlen))
1256                                                 {
1257                                                         Assert(num_fraged_pages > to_item);
1258                                                         memmove(fraged_pages->pagedesc + to_item,
1259                                                                 fraged_pages->pagedesc + to_item + 1,
1260                                                                         sizeof(VacPage) * (num_fraged_pages - to_item - 1));
1261                                                         num_fraged_pages--;
1262                                                 }
1263                                                 for (i = 0; i < num_fraged_pages; i++)
1264                                                 {
1265                                                         if (enough_space(fraged_pages->pagedesc[i], tlen))
1266                                                                 break;
1267                                                 }
1268
1269                                                 /* can't move item anywhere */
1270                                                 if (i == num_fraged_pages)
1271                                                 {
1272                                                         for (i = 0; i < num_vtmove; i++)
1273                                                         {
1274                                                                 Assert(vtmove[i].vacpage->offsets_used > 0);
1275                                                                 (vtmove[i].vacpage->offsets_used)--;
1276                                                         }
1277                                                         num_vtmove = 0;
1278                                                         break;
1279                                                 }
1280                                                 to_item = i;
1281                                                 to_vacpage = fraged_pages->pagedesc[to_item];
1282                                         }
1283                                         to_vacpage->free -= MAXALIGN(tlen);
1284                                         if (to_vacpage->offsets_used >= to_vacpage->offsets_free)
1285                                                 to_vacpage->free -= MAXALIGN(sizeof(ItemIdData));
1286                                         (to_vacpage->offsets_used)++;
1287                                         if (free_vtmove == 0)
1288                                         {
1289                                                 free_vtmove = 1000;
1290                                                 vtmove = (VTupleMove) repalloc(vtmove,
1291                                                                                          (free_vtmove + num_vtmove) *
1292                                                                                                  sizeof(VTupleMoveData));
1293                                         }
1294                                         vtmove[num_vtmove].tid = tp.t_self;
1295                                         vtmove[num_vtmove].vacpage = to_vacpage;
1296                                         if (to_vacpage->offsets_used == 1)
1297                                                 vtmove[num_vtmove].cleanVpd = true;
1298                                         else
1299                                                 vtmove[num_vtmove].cleanVpd = false;
1300                                         free_vtmove--;
1301                                         num_vtmove++;
1302
1303                                         /* All done ? */
1304                                         if (!(tp.t_data->t_infomask & HEAP_UPDATED) ||
1305                                                 tp.t_data->t_xmin < XmaxRecent)
1306                                                 break;
1307
1308                                         /* Well, try to find tuple with old row version */
1309                                         for (;;)
1310                                         {
1311                                                 Buffer          Pbuf;
1312                                                 Page            Ppage;
1313                                                 ItemId          Pitemid;
1314                                                 HeapTupleData Ptp;
1315                                                 VTupleLinkData vtld,
1316                                                                    *vtlp;
1317
1318                                                 vtld.new_tid = tp.t_self;
1319                                                 vtlp = (VTupleLink)
1320                                                         vac_find_eq((void *) (vacrelstats->vtlinks),
1321                                                                            vacrelstats->num_vtlinks,
1322                                                                            sizeof(VTupleLinkData),
1323                                                                            (void *) &vtld,
1324                                                                            vac_cmp_vtlinks);
1325                                                 if (vtlp == NULL)
1326                                                         elog(ERROR, "Parent tuple was not found");
1327                                                 tp.t_self = vtlp->this_tid;
1328                                                 Pbuf = ReadBuffer(onerel,
1329                                                                 ItemPointerGetBlockNumber(&(tp.t_self)));
1330                                                 Ppage = BufferGetPage(Pbuf);
1331                                                 Pitemid = PageGetItemId(Ppage,
1332                                                            ItemPointerGetOffsetNumber(&(tp.t_self)));
1333                                                 if (!ItemIdIsUsed(Pitemid))
1334                                                         elog(ERROR, "Parent itemid marked as unused");
1335                                                 Ptp.t_datamcxt = NULL;
1336                                                 Ptp.t_data = (HeapTupleHeader) PageGetItem(Ppage, Pitemid);
1337                                                 Assert(ItemPointerEquals(&(vtld.new_tid),
1338                                                                                                  &(Ptp.t_data->t_ctid)));
1339
1340                                                 /*
1341                                                  * Read above about cases when
1342                                                  * !ItemIdIsUsed(Citemid) (child item is
1343                                                  * removed)... Due to the fact that at the moment
1344                                                  * we don't remove unuseful part of update-chain,
1345                                                  * it's possible to get too old parent row here.
1346                                                  * Like as in the case which caused this problem,
1347                                                  * we stop shrinking here. I could try to find
1348                                                  * real parent row but want not to do it because
1349                                                  * of real solution will be implemented anyway,
1350                                                  * latter, and we are too close to 6.5 release. -
1351                                                  * vadim 06/11/99
1352                                                  */
1353                                                 if (Ptp.t_data->t_xmax != tp.t_data->t_xmin)
1354                                                 {
1355                                                         if (freeCbuf)
1356                                                                 ReleaseBuffer(Cbuf);
1357                                                         freeCbuf = false;
1358                                                         ReleaseBuffer(Pbuf);
1359                                                         for (i = 0; i < num_vtmove; i++)
1360                                                         {
1361                                                                 Assert(vtmove[i].vacpage->offsets_used > 0);
1362                                                                 (vtmove[i].vacpage->offsets_used)--;
1363                                                         }
1364                                                         num_vtmove = 0;
1365                                                         elog(NOTICE, "Too old parent tuple found - can't continue repair_frag");
1366                                                         break;
1367                                                 }
1368 #ifdef NOT_USED                                 /* I'm not sure that this will wotk
1369                                                                  * properly... */
1370
1371                                                 /*
1372                                                  * If this tuple is updated version of row and it
1373                                                  * was created by the same transaction then no one
1374                                                  * is interested in this tuple - mark it as
1375                                                  * removed.
1376                                                  */
1377                                                 if (Ptp.t_data->t_infomask & HEAP_UPDATED &&
1378                                                         Ptp.t_data->t_xmin == Ptp.t_data->t_xmax)
1379                                                 {
1380                                                         TransactionIdStore(myXID,
1381                                                                 (TransactionId *) &(Ptp.t_data->t_cmin));
1382                                                         Ptp.t_data->t_infomask &=
1383                                                                 ~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_IN);
1384                                                         Ptp.t_data->t_infomask |= HEAP_MOVED_OFF;
1385                                                         WriteBuffer(Pbuf);
1386                                                         continue;
1387                                                 }
1388 #endif
1389                                                 tp.t_datamcxt = Ptp.t_datamcxt;
1390                                                 tp.t_data = Ptp.t_data;
1391                                                 tlen = tp.t_len = ItemIdGetLength(Pitemid);
1392                                                 if (freeCbuf)
1393                                                         ReleaseBuffer(Cbuf);
1394                                                 Cbuf = Pbuf;
1395                                                 freeCbuf = true;
1396                                                 break;
1397                                         }
1398                                         if (num_vtmove == 0)
1399                                                 break;
1400                                 }
1401                                 if (freeCbuf)
1402                                         ReleaseBuffer(Cbuf);
1403                                 if (num_vtmove == 0)    /* chain can't be moved */
1404                                 {
1405                                         pfree(vtmove);
1406                                         break;
1407                                 }
1408                                 ItemPointerSetInvalid(&Ctid);
1409                                 for (ti = 0; ti < num_vtmove; ti++)
1410                                 {
1411                                         VacPage destvacpage = vtmove[ti].vacpage;
1412
1413                                         /* Get page to move from */
1414                                         tuple.t_self = vtmove[ti].tid;
1415                                         Cbuf = ReadBuffer(onerel,
1416                                                          ItemPointerGetBlockNumber(&(tuple.t_self)));
1417
1418                                         /* Get page to move to */
1419                                         cur_buffer = ReadBuffer(onerel, destvacpage->blkno);
1420
1421                                         LockBuffer(cur_buffer, BUFFER_LOCK_EXCLUSIVE);
1422                                         if (cur_buffer != Cbuf)
1423                                                 LockBuffer(Cbuf, BUFFER_LOCK_EXCLUSIVE);
1424
1425                                         ToPage = BufferGetPage(cur_buffer);
1426                                         Cpage = BufferGetPage(Cbuf);
1427
1428                                         /* NO ELOG(ERROR) TILL CHANGES ARE LOGGED */
1429                                         START_CRIT_SECTION();
1430
1431                                         Citemid = PageGetItemId(Cpage,
1432                                                         ItemPointerGetOffsetNumber(&(tuple.t_self)));
1433                                         tuple.t_datamcxt = NULL;
1434                                         tuple.t_data = (HeapTupleHeader) PageGetItem(Cpage, Citemid);
1435                                         tuple_len = tuple.t_len = ItemIdGetLength(Citemid);
1436
1437                                         /*
1438                                          * make a copy of the source tuple, and then mark the
1439                                          * source tuple MOVED_OFF.
1440                                          */
1441                                         heap_copytuple_with_tuple(&tuple, &newtup);
1442
1443                                         RelationInvalidateHeapTuple(onerel, &tuple);
1444
1445                                         TransactionIdStore(myXID, (TransactionId *) &(tuple.t_data->t_cmin));
1446                                         tuple.t_data->t_infomask &=
1447                                                 ~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_IN);
1448                                         tuple.t_data->t_infomask |= HEAP_MOVED_OFF;
1449
1450                                         /*
1451                                          * If this page was not used before - clean it.
1452                                          *
1453                                          * NOTE: a nasty bug used to lurk here.  It is possible
1454                                          * for the source and destination pages to be the same
1455                                          * (since this tuple-chain member can be on a page lower
1456                                          * than the one we're currently processing in the outer
1457                                          * loop).  If that's true, then after vacuum_page() the
1458                                          * source tuple will have been moved, and tuple.t_data
1459                                          * will be pointing at garbage.  Therefore we must do
1460                                          * everything that uses tuple.t_data BEFORE this step!!
1461                                          *
1462                                          * This path is different from the other callers of
1463                                          * vacuum_page, because we have already incremented the
1464                                          * vacpage's offsets_used field to account for the
1465                                          * tuple(s) we expect to move onto the page. Therefore
1466                                          * vacuum_page's check for offsets_used == 0 is
1467                                          * wrong. But since that's a good debugging check for
1468                                          * all other callers, we work around it here rather
1469                                          * than remove it.
1470                                          */
1471                                         if (!PageIsEmpty(ToPage) && vtmove[ti].cleanVpd)
1472                                         {
1473                                                 int                     sv_offsets_used = destvacpage->offsets_used;
1474
1475                                                 destvacpage->offsets_used = 0;
1476                                                 vacuum_page(onerel, cur_buffer, destvacpage);
1477                                                 destvacpage->offsets_used = sv_offsets_used;
1478                                         }
1479
1480                                         /*
1481                                          * Update the state of the copied tuple, and store it
1482                                          * on the destination page.
1483                                          */
1484                                         TransactionIdStore(myXID, (TransactionId *) &(newtup.t_data->t_cmin));
1485                                         newtup.t_data->t_infomask &=
1486                                                 ~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_OFF);
1487                                         newtup.t_data->t_infomask |= HEAP_MOVED_IN;
1488                                         newoff = PageAddItem(ToPage, (Item) newtup.t_data, tuple_len,
1489                                                                                  InvalidOffsetNumber, LP_USED);
1490                                         if (newoff == InvalidOffsetNumber)
1491                                         {
1492                                                 elog(STOP, "moving chain: failed to add item with len = %lu to page %u",
1493                                                          (unsigned long)tuple_len, destvacpage->blkno);
1494                                         }
1495                                         newitemid = PageGetItemId(ToPage, newoff);
1496                                         pfree(newtup.t_data);
1497                                         newtup.t_datamcxt = NULL;
1498                                         newtup.t_data = (HeapTupleHeader) PageGetItem(ToPage, newitemid);
1499                                         ItemPointerSet(&(newtup.t_self), destvacpage->blkno, newoff);
1500
1501                                         {
1502                                                 XLogRecPtr      recptr = 
1503                                                         log_heap_move(onerel, Cbuf, tuple.t_self,
1504                                                                                                 cur_buffer, &newtup);
1505
1506                                                 if (Cbuf != cur_buffer)
1507                                                 {
1508                                                         PageSetLSN(Cpage, recptr);
1509                                                         PageSetSUI(Cpage, ThisStartUpID);
1510                                                 }
1511                                                 PageSetLSN(ToPage, recptr);
1512                                                 PageSetSUI(ToPage, ThisStartUpID);
1513                                         }
1514                                         END_CRIT_SECTION();
1515
1516                                         if (((int) destvacpage->blkno) > last_move_dest_block)
1517                                                 last_move_dest_block = destvacpage->blkno;
1518
1519                                         /*
1520                                          * Set new tuple's t_ctid pointing to itself for last
1521                                          * tuple in chain, and to next tuple in chain otherwise.
1522                                          */
1523                                         if (!ItemPointerIsValid(&Ctid))
1524                                                 newtup.t_data->t_ctid = newtup.t_self;
1525                                         else
1526                                                 newtup.t_data->t_ctid = Ctid;
1527                                         Ctid = newtup.t_self;
1528
1529                                         num_moved++;
1530
1531                                         /*
1532                                          * Remember that we moved tuple from the current page
1533                                          * (corresponding index tuple will be cleaned).
1534                                          */
1535                                         if (Cbuf == buf)
1536                                                 vacpage->offsets[vacpage->offsets_free++] =
1537                                                         ItemPointerGetOffsetNumber(&(tuple.t_self));
1538                                         else
1539                                                 keep_tuples++;
1540
1541                                         LockBuffer(cur_buffer, BUFFER_LOCK_UNLOCK);
1542                                         if (cur_buffer != Cbuf)
1543                                                 LockBuffer(Cbuf, BUFFER_LOCK_UNLOCK);
1544
1545                                         if (Irel != (Relation *) NULL)
1546                                         {
1547                                                 /*
1548                                                  * XXX using CurrentMemoryContext here means
1549                                                  * intra-vacuum memory leak for functional indexes.
1550                                                  * Should fix someday.
1551                                                  *
1552                                                  * XXX This code fails to handle partial indexes!
1553                                                  * Probably should change it to use ExecOpenIndices.
1554                                                  */
1555                                                 for (i = 0; i < nindices; i++)
1556                                                 {
1557                                                         FormIndexDatum(indexInfo[i],
1558                                                                                    &newtup,
1559                                                                                    tupdesc,
1560                                                                                    CurrentMemoryContext,
1561                                                                                    idatum,
1562                                                                                    inulls);
1563                                                         iresult = index_insert(Irel[i],
1564                                                                                                    idatum,
1565                                                                                                    inulls,
1566                                                                                                    &newtup.t_self,
1567                                                                                                    onerel);
1568                                                         if (iresult)
1569                                                                 pfree(iresult);
1570                                                 }
1571                                         }
1572                                         WriteBuffer(cur_buffer);
1573                                         WriteBuffer(Cbuf);
1574                                 }
1575                                 cur_buffer = InvalidBuffer;
1576                                 pfree(vtmove);
1577                                 chain_tuple_moved = true;
1578                                 continue;
1579                         }
1580
1581                         /* try to find new page for this tuple */
1582                         if (cur_buffer == InvalidBuffer ||
1583                                 !enough_space(cur_page, tuple_len))
1584                         {
1585                                 if (cur_buffer != InvalidBuffer)
1586                                 {
1587                                         WriteBuffer(cur_buffer);
1588                                         cur_buffer = InvalidBuffer;
1589
1590                                         /*
1591                                          * If previous target page is now too full to add *any*
1592                                          * tuple to it, remove it from fraged_pages.
1593                                          */
1594                                         if (!enough_space(cur_page, vacrelstats->min_tlen))
1595                                         {
1596                                                 Assert(num_fraged_pages > cur_item);
1597                                                 memmove(fraged_pages->pagedesc + cur_item,
1598                                                                 fraged_pages->pagedesc + cur_item + 1,
1599                                                                 sizeof(VacPage) * (num_fraged_pages - cur_item - 1));
1600                                                 num_fraged_pages--;
1601                                         }
1602                                 }
1603                                 for (i = 0; i < num_fraged_pages; i++)
1604                                 {
1605                                         if (enough_space(fraged_pages->pagedesc[i], tuple_len))
1606                                                 break;
1607                                 }
1608                                 if (i == num_fraged_pages)
1609                                         break;          /* can't move item anywhere */
1610                                 cur_item = i;
1611                                 cur_page = fraged_pages->pagedesc[cur_item];
1612                                 cur_buffer = ReadBuffer(onerel, cur_page->blkno);
1613                                 LockBuffer(cur_buffer, BUFFER_LOCK_EXCLUSIVE);
1614                                 ToPage = BufferGetPage(cur_buffer);
1615                                 /* if this page was not used before - clean it */
1616                                 if (!PageIsEmpty(ToPage) && cur_page->offsets_used == 0)
1617                                         vacuum_page(onerel, cur_buffer, cur_page);
1618                         }
1619                         else
1620                                 LockBuffer(cur_buffer, BUFFER_LOCK_EXCLUSIVE);
1621
1622                         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1623
1624                         /* copy tuple */
1625                         heap_copytuple_with_tuple(&tuple, &newtup);
1626
1627                         RelationInvalidateHeapTuple(onerel, &tuple);
1628
1629                         /*
1630                          * Mark new tuple as moved_in by vacuum and store vacuum XID
1631                          * in t_cmin !!!
1632                          */
1633                         TransactionIdStore(myXID, (TransactionId *) &(newtup.t_data->t_cmin));
1634                         newtup.t_data->t_infomask &=
1635                                 ~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_OFF);
1636                         newtup.t_data->t_infomask |= HEAP_MOVED_IN;
1637
1638                         /* NO ELOG(ERROR) TILL CHANGES ARE LOGGED */
1639                         START_CRIT_SECTION();
1640
1641                         /* add tuple to the page */
1642                         newoff = PageAddItem(ToPage, (Item) newtup.t_data, tuple_len,
1643                                                                  InvalidOffsetNumber, LP_USED);
1644                         if (newoff == InvalidOffsetNumber)
1645                         {
1646                                 elog(STOP, "\
1647 failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)",
1648                                          (unsigned long)tuple_len, cur_page->blkno, (unsigned long)cur_page->free,
1649                                  cur_page->offsets_used, cur_page->offsets_free);
1650                         }
1651                         newitemid = PageGetItemId(ToPage, newoff);
1652                         pfree(newtup.t_data);
1653                         newtup.t_datamcxt = NULL;
1654                         newtup.t_data = (HeapTupleHeader) PageGetItem(ToPage, newitemid);
1655                         ItemPointerSet(&(newtup.t_data->t_ctid), cur_page->blkno, newoff);
1656                         newtup.t_self = newtup.t_data->t_ctid;
1657
1658                         /*
1659                          * Mark old tuple as moved_off by vacuum and store vacuum XID
1660                          * in t_cmin !!!
1661                          */
1662                         TransactionIdStore(myXID, (TransactionId *) &(tuple.t_data->t_cmin));
1663                         tuple.t_data->t_infomask &=
1664                                 ~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_IN);
1665                         tuple.t_data->t_infomask |= HEAP_MOVED_OFF;
1666
1667                         {
1668                                 XLogRecPtr      recptr = 
1669                                         log_heap_move(onerel, buf, tuple.t_self,
1670                                                                                 cur_buffer, &newtup);
1671
1672                                 PageSetLSN(page, recptr);
1673                                 PageSetSUI(page, ThisStartUpID);
1674                                 PageSetLSN(ToPage, recptr);
1675                                 PageSetSUI(ToPage, ThisStartUpID);
1676                         }
1677                         END_CRIT_SECTION();
1678
1679                         cur_page->offsets_used++;
1680                         num_moved++;
1681                         cur_page->free = ((PageHeader) ToPage)->pd_upper - ((PageHeader) ToPage)->pd_lower;
1682                         if (((int) cur_page->blkno) > last_move_dest_block)
1683                                 last_move_dest_block = cur_page->blkno;
1684
1685                         vacpage->offsets[vacpage->offsets_free++] = offnum;
1686
1687                         LockBuffer(cur_buffer, BUFFER_LOCK_UNLOCK);
1688                         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1689
1690                         /* insert index' tuples if needed */
1691                         if (Irel != (Relation *) NULL)
1692                         {
1693                                 /*
1694                                  * XXX using CurrentMemoryContext here means
1695                                  * intra-vacuum memory leak for functional indexes.
1696                                  * Should fix someday.
1697                                  *
1698                                  * XXX This code fails to handle partial indexes!
1699                                  * Probably should change it to use ExecOpenIndices.
1700                                  */
1701                                 for (i = 0; i < nindices; i++)
1702                                 {
1703                                         FormIndexDatum(indexInfo[i],
1704                                                                    &newtup,
1705                                                                    tupdesc,
1706                                                                    CurrentMemoryContext,
1707                                                                    idatum,
1708                                                                    inulls);
1709                                         iresult = index_insert(Irel[i],
1710                                                                                    idatum,
1711                                                                                    inulls,
1712                                                                                    &newtup.t_self,
1713                                                                                    onerel);
1714                                         if (iresult)
1715                                                 pfree(iresult);
1716                                 }
1717                         }
1718
1719                 }                                               /* walk along page */
1720
1721                 if (offnum < maxoff && keep_tuples > 0)
1722                 {
1723                         OffsetNumber off;
1724
1725                         for (off = OffsetNumberNext(offnum);
1726                                  off <= maxoff;
1727                                  off = OffsetNumberNext(off))
1728                         {
1729                                 itemid = PageGetItemId(page, off);
1730                                 if (!ItemIdIsUsed(itemid))
1731                                         continue;
1732                                 tuple.t_datamcxt = NULL;
1733                                 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1734                                 if (tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED)
1735                                         continue;
1736                                 if ((TransactionId) tuple.t_data->t_cmin != myXID)
1737                                         elog(ERROR, "Invalid XID in t_cmin (4)");
1738                                 if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
1739                                         elog(ERROR, "HEAP_MOVED_IN was not expected (2)");
1740                                 if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
1741                                 {
1742                                         /* some chains was moved while */
1743                                         if (chain_tuple_moved)
1744                                         {                       /* cleaning this page */
1745                                                 Assert(vacpage->offsets_free > 0);
1746                                                 for (i = 0; i < vacpage->offsets_free; i++)
1747                                                 {
1748                                                         if (vacpage->offsets[i] == off)
1749                                                                 break;
1750                                                 }
1751                                                 if (i >= vacpage->offsets_free) /* not found */
1752                                                 {
1753                                                         vacpage->offsets[vacpage->offsets_free++] = off;
1754                                                         Assert(keep_tuples > 0);
1755                                                         keep_tuples--;
1756                                                 }
1757                                         }
1758                                         else
1759                                         {
1760                                                 vacpage->offsets[vacpage->offsets_free++] = off;
1761                                                 Assert(keep_tuples > 0);
1762                                                 keep_tuples--;
1763                                         }
1764                                 }
1765                         }
1766                 }
1767
1768                 if (vacpage->offsets_free > 0)  /* some tuples were moved */
1769                 {
1770                         if (chain_tuple_moved)          /* else - they are ordered */
1771                         {
1772                                 qsort((char *) (vacpage->offsets), vacpage->offsets_free,
1773                                           sizeof(OffsetNumber), vac_cmp_offno);
1774                         }
1775                         reap_page(&Nvacpagelist, vacpage);
1776                         WriteBuffer(buf);
1777                 }
1778                 else if (dowrite)
1779                         WriteBuffer(buf);
1780                 else
1781                         ReleaseBuffer(buf);
1782
1783                 if (offnum <= maxoff)
1784                         break;                          /* some item(s) left */
1785
1786         }                                                       /* walk along relation */
1787
1788         blkno++;                                        /* new number of blocks */
1789
1790         if (cur_buffer != InvalidBuffer)
1791         {
1792                 Assert(num_moved > 0);
1793                 WriteBuffer(cur_buffer);
1794         }
1795
1796         if (num_moved > 0)
1797         {
1798                 /*
1799                  * We have to commit our tuple movings before we truncate the
1800                  * relation.  Ideally we should do Commit/StartTransactionCommand
1801                  * here, relying on the session-level table lock to protect our
1802                  * exclusive access to the relation.  However, that would require
1803                  * a lot of extra code to close and re-open the relation, indices,
1804                  * etc.  For now, a quick hack: record status of current transaction
1805                  * as committed, and continue.
1806                  */
1807                 RecordTransactionCommit();
1808         }
1809
1810         /*
1811          * Clean uncleaned reaped pages from vacuum_pages list list and set
1812          * xmin committed for inserted tuples
1813          */
1814         checked_moved = 0;
1815         for (i = 0, curpage = vacuum_pages->pagedesc; i < vacuumed_pages; i++, curpage++)
1816         {
1817                 Assert((*curpage)->blkno < (BlockNumber) blkno);
1818                 buf = ReadBuffer(onerel, (*curpage)->blkno);
1819                 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1820                 page = BufferGetPage(buf);
1821                 if ((*curpage)->offsets_used == 0)              /* this page was not used */
1822                 {
1823                         if (!PageIsEmpty(page))
1824                                 vacuum_page(onerel, buf, *curpage);
1825                 }
1826                 else
1827 /* this page was used */
1828                 {
1829                         num_tuples = 0;
1830                         max_offset = PageGetMaxOffsetNumber(page);
1831                         for (newoff = FirstOffsetNumber;
1832                                  newoff <= max_offset;
1833                                  newoff = OffsetNumberNext(newoff))
1834                         {
1835                                 itemid = PageGetItemId(page, newoff);
1836                                 if (!ItemIdIsUsed(itemid))
1837                                         continue;
1838                                 tuple.t_datamcxt = NULL;
1839                                 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1840                                 if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
1841                                 {
1842                                         if ((TransactionId) tuple.t_data->t_cmin != myXID)
1843                                                 elog(ERROR, "Invalid XID in t_cmin (2)");
1844                                         if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
1845                                         {
1846                                                 tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
1847                                                 num_tuples++;
1848                                         }
1849                                         else if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
1850                                                 tuple.t_data->t_infomask |= HEAP_XMIN_INVALID;
1851                                         else
1852                                                 elog(ERROR, "HEAP_MOVED_OFF/HEAP_MOVED_IN was expected");
1853                                 }
1854                         }
1855                         Assert((*curpage)->offsets_used == num_tuples);
1856                         checked_moved += num_tuples;
1857                 }
1858                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1859                 WriteBuffer(buf);
1860         }
1861         Assert(num_moved == checked_moved);
1862
1863         elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u; Tuple(s) moved: %u. %s",
1864                  RelationGetRelationName(onerel),
1865                  nblocks, blkno, num_moved,
1866                  show_rusage(&ru0));
1867
1868         /* 
1869          * Reflect the motion of system tuples to catalog cache here.
1870          */
1871         CommandCounterIncrement();
1872
1873         if (Nvacpagelist.num_pages > 0)
1874         {
1875                 /* vacuum indices again if needed */
1876                 if (Irel != (Relation *) NULL)
1877                 {
1878                         VacPage    *vpleft,
1879                                            *vpright,
1880                                                 vpsave;
1881
1882                         /* re-sort Nvacpagelist.pagedesc */
1883                         for (vpleft = Nvacpagelist.pagedesc,
1884                                  vpright = Nvacpagelist.pagedesc + Nvacpagelist.num_pages - 1;
1885                                  vpleft < vpright; vpleft++, vpright--)
1886                         {
1887                                 vpsave = *vpleft;
1888                                 *vpleft = *vpright;
1889                                 *vpright = vpsave;
1890                         }
1891                         Assert(keep_tuples >= 0);
1892                         for (i = 0; i < nindices; i++)
1893                                 vacuum_index(&Nvacpagelist, Irel[i],
1894                                                          vacrelstats->num_tuples, keep_tuples);
1895                 }
1896
1897                 /* clean moved tuples from last page in Nvacpagelist list */
1898                 if (vacpage->blkno == (BlockNumber) (blkno - 1) &&
1899                         vacpage->offsets_free > 0)
1900                 {
1901                         char                    unbuf[BLCKSZ];
1902                         OffsetNumber   *unused = (OffsetNumber*)unbuf;
1903                         int                             uncnt;
1904
1905                         buf = ReadBuffer(onerel, vacpage->blkno);
1906                         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1907                         START_CRIT_SECTION();
1908                         page = BufferGetPage(buf);
1909                         num_tuples = 0;
1910                         for (offnum = FirstOffsetNumber;
1911                                  offnum <= maxoff;
1912                                  offnum = OffsetNumberNext(offnum))
1913                         {
1914                                 itemid = PageGetItemId(page, offnum);
1915                                 if (!ItemIdIsUsed(itemid))
1916                                         continue;
1917                                 tuple.t_datamcxt = NULL;
1918                                 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1919
1920                                 if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
1921                                 {
1922                                         if ((TransactionId) tuple.t_data->t_cmin != myXID)
1923                                                 elog(ERROR, "Invalid XID in t_cmin (3)");
1924                                         if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
1925                                         {
1926                                                 itemid->lp_flags &= ~LP_USED;
1927                                                 num_tuples++;
1928                                         }
1929                                         else
1930                                                 elog(ERROR, "HEAP_MOVED_OFF was expected (2)");
1931                                 }
1932
1933                         }
1934                         Assert(vacpage->offsets_free == num_tuples);
1935                         uncnt = PageRepairFragmentation(page, unused);
1936                         {
1937                                 XLogRecPtr      recptr;
1938                                 recptr = log_heap_clean(onerel, buf, (char*)unused,
1939                                         (char*)(&(unused[uncnt])) - (char*)unused);
1940                                 PageSetLSN(page, recptr);
1941                                 PageSetSUI(page, ThisStartUpID);
1942                         }
1943                         END_CRIT_SECTION();
1944                         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1945                         WriteBuffer(buf);
1946                 }
1947
1948                 /* now - free new list of reaped pages */
1949                 curpage = Nvacpagelist.pagedesc;
1950                 for (i = 0; i < Nvacpagelist.num_pages; i++, curpage++)
1951                         pfree(*curpage);
1952                 pfree(Nvacpagelist.pagedesc);
1953         }
1954
1955         /*
1956          * Flush dirty pages out to disk.  We do this unconditionally, even if
1957          * we don't need to truncate, because we want to ensure that all tuples
1958          * have correct on-row commit status on disk (see bufmgr.c's comments
1959          * for FlushRelationBuffers()).
1960          */
1961         i = FlushRelationBuffers(onerel, blkno);
1962         if (i < 0)
1963                 elog(ERROR, "VACUUM (repair_frag): FlushRelationBuffers returned %d",
1964                          i);
1965
1966         /* truncate relation, if needed */
1967         if (blkno < nblocks)
1968         {
1969                 blkno = smgrtruncate(DEFAULT_SMGR, onerel, blkno);
1970                 Assert(blkno >= 0);
1971                 vacrelstats->num_pages = blkno; /* set new number of blocks */
1972         }
1973
1974         if (Irel != (Relation *) NULL)          /* pfree index' allocations */
1975         {
1976                 close_indices(nindices, Irel);
1977                 pfree(indexInfo);
1978         }
1979
1980         pfree(vacpage);
1981         if (vacrelstats->vtlinks != NULL)
1982                 pfree(vacrelstats->vtlinks);
1983 }
1984
1985 /*
1986  *      vacuum_heap() -- free dead tuples
1987  *
1988  *              This routine marks dead tuples as unused and truncates relation
1989  *              if there are "empty" end-blocks.
1990  */
1991 static void
1992 vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
1993 {
1994         Buffer          buf;
1995         VacPage    *vacpage;
1996         int                     nblocks;
1997         int                     i;
1998
1999         nblocks = vacuum_pages->num_pages;
2000         nblocks -= vacuum_pages->empty_end_pages;               /* nothing to do with
2001                                                                                                          * them */
2002
2003         for (i = 0, vacpage = vacuum_pages->pagedesc; i < nblocks; i++, vacpage++)
2004         {
2005                 if ((*vacpage)->offsets_free > 0)
2006                 {
2007                         buf = ReadBuffer(onerel, (*vacpage)->blkno);
2008                         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2009                         vacuum_page(onerel, buf, *vacpage);
2010                         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
2011                         WriteBuffer(buf);
2012                 }
2013         }
2014
2015         /*
2016          * Flush dirty pages out to disk.  We do this unconditionally, even if
2017          * we don't need to truncate, because we want to ensure that all tuples
2018          * have correct on-row commit status on disk (see bufmgr.c's comments
2019          * for FlushRelationBuffers()).
2020          */
2021         Assert(vacrelstats->num_pages >= vacuum_pages->empty_end_pages);
2022         nblocks = vacrelstats->num_pages - vacuum_pages->empty_end_pages;
2023
2024         i = FlushRelationBuffers(onerel, nblocks);
2025         if (i < 0)
2026                 elog(ERROR, "VACUUM (vacuum_heap): FlushRelationBuffers returned %d",
2027                          i);
2028
2029         /* truncate relation if there are some empty end-pages */
2030         if (vacuum_pages->empty_end_pages > 0)
2031         {
2032                 elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u.",
2033                          RelationGetRelationName(onerel),
2034                          vacrelstats->num_pages, nblocks);
2035                 nblocks = smgrtruncate(DEFAULT_SMGR, onerel, nblocks);
2036                 Assert(nblocks >= 0);
2037                 vacrelstats->num_pages = nblocks; /* set new number of blocks */
2038         }
2039 }
2040
2041 /*
2042  *      vacuum_page() -- free dead tuples on a page
2043  *                                       and repair its fragmentation.
2044  */
2045 static void
2046 vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
2047 {
2048         char                    unbuf[BLCKSZ];
2049         OffsetNumber   *unused = (OffsetNumber*)unbuf;
2050         int                             uncnt;
2051         Page                    page = BufferGetPage(buffer);
2052         ItemId                  itemid;
2053         int                             i;
2054
2055         /* There shouldn't be any tuples moved onto the page yet! */
2056         Assert(vacpage->offsets_used == 0);
2057
2058         START_CRIT_SECTION();
2059         for (i = 0; i < vacpage->offsets_free; i++)
2060         {
2061                 itemid = &(((PageHeader) page)->pd_linp[vacpage->offsets[i] - 1]);
2062                 itemid->lp_flags &= ~LP_USED;
2063         }
2064         uncnt = PageRepairFragmentation(page, unused);
2065         {
2066                 XLogRecPtr      recptr;
2067                 recptr = log_heap_clean(onerel, buffer, (char*)unused,
2068                                         (char*)(&(unused[uncnt])) - (char*)unused);
2069                 PageSetLSN(page, recptr);
2070                 PageSetSUI(page, ThisStartUpID);
2071         }
2072         END_CRIT_SECTION();
2073
2074 }
2075
2076 /*
2077  *      _scan_index() -- scan one index relation to update statistic.
2078  *
2079  */
2080 static void
2081 scan_index(Relation indrel, int num_tuples)
2082 {
2083         RetrieveIndexResult res;
2084         IndexScanDesc iscan;
2085         int                     nitups;
2086         int                     nipages;
2087         struct rusage ru0;
2088
2089         getrusage(RUSAGE_SELF, &ru0);
2090
2091         /* walk through the entire index */
2092         iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL);
2093         nitups = 0;
2094
2095         while ((res = index_getnext(iscan, ForwardScanDirection))
2096                    != (RetrieveIndexResult) NULL)
2097         {
2098                 nitups++;
2099                 pfree(res);
2100         }
2101
2102         index_endscan(iscan);
2103
2104         /* now update statistics in pg_class */
2105         nipages = RelationGetNumberOfBlocks(indrel);
2106         update_relstats(RelationGetRelid(indrel), nipages, nitups, false, NULL);
2107
2108         elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. %s",
2109                  RelationGetRelationName(indrel), nipages, nitups,
2110                  show_rusage(&ru0));
2111
2112         if (nitups != num_tuples)
2113                 elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
2114 \n\tRecreate the index.",
2115                          RelationGetRelationName(indrel), nitups, num_tuples);
2116
2117 }
2118
2119 /*
2120  *      vacuum_index() -- vacuum one index relation.
2121  *
2122  *              Vpl is the VacPageList of the heap we're currently vacuuming.
2123  *              It's locked. Indrel is an index relation on the vacuumed heap.
2124  *              We don't set locks on the index relation here, since the indexed
2125  *              access methods support locking at different granularities.
2126  *              We let them handle it.
2127  *
2128  *              Finally, we arrange to update the index relation's statistics in
2129  *              pg_class.
2130  */
2131 static void
2132 vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples)
2133 {
2134         RetrieveIndexResult res;
2135         IndexScanDesc iscan;
2136         ItemPointer heapptr;
2137         int                     tups_vacuumed;
2138         int                     num_index_tuples;
2139         int                     num_pages;
2140         VacPage         vp;
2141         struct rusage ru0;
2142
2143         getrusage(RUSAGE_SELF, &ru0);
2144
2145         /* walk through the entire index */
2146         iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL);
2147         tups_vacuumed = 0;
2148         num_index_tuples = 0;
2149
2150         while ((res = index_getnext(iscan, ForwardScanDirection))
2151                    != (RetrieveIndexResult) NULL)
2152         {
2153                 heapptr = &res->heap_iptr;
2154
2155                 if ((vp = tid_reaped(heapptr, vacpagelist)) != (VacPage) NULL)
2156                 {
2157 #ifdef NOT_USED
2158                         elog(DEBUG, "<%x,%x> -> <%x,%x>",
2159                                  ItemPointerGetBlockNumber(&(res->index_iptr)),
2160                                  ItemPointerGetOffsetNumber(&(res->index_iptr)),
2161                                  ItemPointerGetBlockNumber(&(res->heap_iptr)),
2162                                  ItemPointerGetOffsetNumber(&(res->heap_iptr)));
2163 #endif
2164                         if (vp->offsets_free == 0)
2165                         {
2166                                 elog(NOTICE, "Index %s: pointer to EmptyPage (blk %u off %u) - fixing",
2167                                          RelationGetRelationName(indrel),
2168                                          vp->blkno, ItemPointerGetOffsetNumber(heapptr));
2169                         }
2170                         ++tups_vacuumed;
2171                         index_delete(indrel, &res->index_iptr);
2172                 }
2173                 else
2174                         num_index_tuples++;
2175
2176                 pfree(res);
2177         }
2178
2179         index_endscan(iscan);
2180
2181         /* now update statistics in pg_class */
2182         num_pages = RelationGetNumberOfBlocks(indrel);
2183         update_relstats(RelationGetRelid(indrel), num_pages, num_index_tuples, false, NULL);
2184
2185         elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. %s",
2186                  RelationGetRelationName(indrel), num_pages,
2187                  num_index_tuples - keep_tuples, tups_vacuumed,
2188                  show_rusage(&ru0));
2189
2190         if (num_index_tuples != num_tuples + keep_tuples)
2191                 elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
2192 \n\tRecreate the index.",
2193                   RelationGetRelationName(indrel), num_index_tuples, num_tuples);
2194
2195 }
2196
2197 /*
2198  *      tid_reaped() -- is a particular tid reaped?
2199  *
2200  *              vacpagelist->VacPage_array is sorted in right order.
2201  */
2202 static VacPage
2203 tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
2204 {
2205         OffsetNumber ioffno;
2206         OffsetNumber *voff;
2207         VacPage         vp,
2208                            *vpp;
2209         VacPageData vacpage;
2210
2211         vacpage.blkno = ItemPointerGetBlockNumber(itemptr);
2212         ioffno = ItemPointerGetOffsetNumber(itemptr);
2213
2214         vp = &vacpage;
2215         vpp = (VacPage *) vac_find_eq((void *) (vacpagelist->pagedesc),
2216                                         vacpagelist->num_pages, sizeof(VacPage), (void *) &vp,
2217                                                                         vac_cmp_blk);
2218
2219         if (vpp == (VacPage *) NULL)
2220                 return (VacPage) NULL;
2221         vp = *vpp;
2222
2223         /* ok - we are on true page */
2224
2225         if (vp->offsets_free == 0)
2226         {                                                       /* this is EmptyPage !!! */
2227                 return vp;
2228         }
2229
2230         voff = (OffsetNumber *) vac_find_eq((void *) (vp->offsets),
2231                         vp->offsets_free, sizeof(OffsetNumber), (void *) &ioffno,
2232                                                                            vac_cmp_offno);
2233
2234         if (voff == (OffsetNumber *) NULL)
2235                 return (VacPage) NULL;
2236
2237         return vp;
2238
2239 }
2240
2241 /*
2242  *      update_relstats() -- update statistics for one relation
2243  *
2244  *              Update the whole-relation statistics that are kept in its pg_class
2245  *              row.  There are additional stats that will be updated if we are
2246  *              doing VACUUM ANALYZE, but we always update these stats.
2247  *
2248  *              This routine works for both index and heap relation entries in
2249  *              pg_class.  We violate no-overwrite semantics here by storing new
2250  *              values for the statistics columns directly into the pg_class
2251  *              tuple that's already on the page.  The reason for this is that if
2252  *              we updated these tuples in the usual way, vacuuming pg_class itself
2253  *              wouldn't work very well --- by the time we got done with a vacuum
2254  *              cycle, most of the tuples in pg_class would've been obsoleted.
2255  *              Updating pg_class's own statistics would be especially tricky.
2256  *              Of course, this only works for fixed-size never-null columns, but
2257  *              these are.
2258  */
2259 static void
2260 update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex,
2261                         VRelStats *vacrelstats)
2262 {
2263         Relation        rd;
2264         HeapTupleData rtup;
2265         HeapTuple       ctup;
2266         Form_pg_class pgcform;
2267         Buffer          buffer;
2268
2269         /*
2270          * update number of tuples and number of pages in pg_class
2271          */
2272         rd = heap_openr(RelationRelationName, RowExclusiveLock);
2273
2274         ctup = SearchSysCache(RELOID,
2275                                                   ObjectIdGetDatum(relid),
2276                                                   0, 0, 0);
2277         if (!HeapTupleIsValid(ctup))
2278                 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
2279                          relid);
2280
2281         /* get the buffer cache tuple */
2282         rtup.t_self = ctup->t_self;
2283         ReleaseSysCache(ctup);
2284         heap_fetch(rd, SnapshotNow, &rtup, &buffer);
2285
2286         /* overwrite the existing statistics in the tuple */
2287         pgcform = (Form_pg_class) GETSTRUCT(&rtup);
2288         pgcform->reltuples = num_tuples;
2289         pgcform->relpages = num_pages;
2290         pgcform->relhasindex = hasindex;
2291
2292         /* invalidate the tuple in the cache and write the buffer */
2293         RelationInvalidateHeapTuple(rd, &rtup);
2294         WriteBuffer(buffer);
2295
2296         heap_close(rd, RowExclusiveLock);
2297 }
2298
2299 /*
2300  *      reap_page() -- save a page on the array of reaped pages.
2301  *
2302  *              As a side effect of the way that the vacuuming loop for a given
2303  *              relation works, higher pages come after lower pages in the array
2304  *              (and highest tid on a page is last).
2305  */
2306 static void
2307 reap_page(VacPageList vacpagelist, VacPage vacpage)
2308 {
2309         VacPage newvacpage;
2310
2311         /* allocate a VacPageData entry */
2312         newvacpage = (VacPage) palloc(sizeof(VacPageData) + vacpage->offsets_free * sizeof(OffsetNumber));
2313
2314         /* fill it in */
2315         if (vacpage->offsets_free > 0)
2316                 memmove(newvacpage->offsets, vacpage->offsets, vacpage->offsets_free * sizeof(OffsetNumber));
2317         newvacpage->blkno = vacpage->blkno;
2318         newvacpage->free = vacpage->free;
2319         newvacpage->offsets_used = vacpage->offsets_used;
2320         newvacpage->offsets_free = vacpage->offsets_free;
2321
2322         /* insert this page into vacpagelist list */
2323         vpage_insert(vacpagelist, newvacpage);
2324
2325 }
2326
2327 static void
2328 vpage_insert(VacPageList vacpagelist, VacPage vpnew)
2329 {
2330 #define PG_NPAGEDESC 1024
2331
2332         /* allocate a VacPage entry if needed */
2333         if (vacpagelist->num_pages == 0)
2334         {
2335                 vacpagelist->pagedesc = (VacPage *) palloc(PG_NPAGEDESC * sizeof(VacPage));
2336                 vacpagelist->num_allocated_pages = PG_NPAGEDESC;
2337         }
2338         else if (vacpagelist->num_pages >= vacpagelist->num_allocated_pages)
2339         {
2340                 vacpagelist->num_allocated_pages *= 2;
2341                 vacpagelist->pagedesc = (VacPage *) repalloc(vacpagelist->pagedesc, vacpagelist->num_allocated_pages * sizeof(VacPage));
2342         }
2343         vacpagelist->pagedesc[vacpagelist->num_pages] = vpnew;
2344         (vacpagelist->num_pages)++;
2345
2346 }
2347
2348 static void *
2349 vac_find_eq(void *bot, int nelem, int size, void *elm,
2350                    int (*compar) (const void *, const void *))
2351 {
2352         int                     res;
2353         int                     last = nelem - 1;
2354         int                     celm = nelem / 2;
2355         bool            last_move,
2356                                 first_move;
2357
2358         last_move = first_move = true;
2359         for (;;)
2360         {
2361                 if (first_move == true)
2362                 {
2363                         res = compar(bot, elm);
2364                         if (res > 0)
2365                                 return NULL;
2366                         if (res == 0)
2367                                 return bot;
2368                         first_move = false;
2369                 }
2370                 if (last_move == true)
2371                 {
2372                         res = compar(elm, (void *) ((char *) bot + last * size));
2373                         if (res > 0)
2374                                 return NULL;
2375                         if (res == 0)
2376                                 return (void *) ((char *) bot + last * size);
2377                         last_move = false;
2378                 }
2379                 res = compar(elm, (void *) ((char *) bot + celm * size));
2380                 if (res == 0)
2381                         return (void *) ((char *) bot + celm * size);
2382                 if (res < 0)
2383                 {
2384                         if (celm == 0)
2385                                 return NULL;
2386                         last = celm - 1;
2387                         celm = celm / 2;
2388                         last_move = true;
2389                         continue;
2390                 }
2391
2392                 if (celm == last)
2393                         return NULL;
2394
2395                 last = last - celm - 1;
2396                 bot = (void *) ((char *) bot + (celm + 1) * size);
2397                 celm = (last + 1) / 2;
2398                 first_move = true;
2399         }
2400
2401 }
2402
2403 static int
2404 vac_cmp_blk(const void *left, const void *right)
2405 {
2406         BlockNumber lblk,
2407                                 rblk;
2408
2409         lblk = (*((VacPage *) left))->blkno;
2410         rblk = (*((VacPage *) right))->blkno;
2411
2412         if (lblk < rblk)
2413                 return -1;
2414         if (lblk == rblk)
2415                 return 0;
2416         return 1;
2417
2418 }
2419
2420 static int
2421 vac_cmp_offno(const void *left, const void *right)
2422 {
2423
2424         if (*(OffsetNumber *) left < *(OffsetNumber *) right)
2425                 return -1;
2426         if (*(OffsetNumber *) left == *(OffsetNumber *) right)
2427                 return 0;
2428         return 1;
2429
2430 }
2431
2432 static int
2433 vac_cmp_vtlinks(const void *left, const void *right)
2434 {
2435
2436         if (((VTupleLink) left)->new_tid.ip_blkid.bi_hi <
2437                 ((VTupleLink) right)->new_tid.ip_blkid.bi_hi)
2438                 return -1;
2439         if (((VTupleLink) left)->new_tid.ip_blkid.bi_hi >
2440                 ((VTupleLink) right)->new_tid.ip_blkid.bi_hi)
2441                 return 1;
2442         /* bi_hi-es are equal */
2443         if (((VTupleLink) left)->new_tid.ip_blkid.bi_lo <
2444                 ((VTupleLink) right)->new_tid.ip_blkid.bi_lo)
2445                 return -1;
2446         if (((VTupleLink) left)->new_tid.ip_blkid.bi_lo >
2447                 ((VTupleLink) right)->new_tid.ip_blkid.bi_lo)
2448                 return 1;
2449         /* bi_lo-es are equal */
2450         if (((VTupleLink) left)->new_tid.ip_posid <
2451                 ((VTupleLink) right)->new_tid.ip_posid)
2452                 return -1;
2453         if (((VTupleLink) left)->new_tid.ip_posid >
2454                 ((VTupleLink) right)->new_tid.ip_posid)
2455                 return 1;
2456         return 0;
2457
2458 }
2459
2460
2461 static void
2462 get_indices(Relation relation, int *nindices, Relation **Irel)
2463 {
2464         List       *indexoidlist,
2465                            *indexoidscan;
2466         int                     i;
2467
2468         indexoidlist = RelationGetIndexList(relation);
2469
2470         *nindices = length(indexoidlist);
2471
2472         if (*nindices > 0)
2473                 *Irel = (Relation *) palloc(*nindices * sizeof(Relation));
2474         else
2475                 *Irel = NULL;
2476
2477         i = 0;
2478         foreach(indexoidscan, indexoidlist)
2479         {
2480                 Oid                     indexoid = lfirsti(indexoidscan);
2481
2482                 (*Irel)[i] = index_open(indexoid);
2483                 i++;
2484         }
2485
2486         freeList(indexoidlist);
2487 }
2488
2489
2490 static void
2491 close_indices(int nindices, Relation *Irel)
2492 {
2493
2494         if (Irel == (Relation *) NULL)
2495                 return;
2496
2497         while (nindices--)
2498                 index_close(Irel[nindices]);
2499         pfree(Irel);
2500
2501 }
2502
2503
2504 /*
2505  * Obtain IndexInfo data for each index on the rel
2506  */
2507 static IndexInfo **
2508 get_index_desc(Relation onerel, int nindices, Relation *Irel)
2509 {
2510         IndexInfo **indexInfo;
2511         int                     i;
2512         HeapTuple       cachetuple;
2513
2514         indexInfo = (IndexInfo **) palloc(nindices * sizeof(IndexInfo *));
2515
2516         for (i = 0; i < nindices; i++)
2517         {
2518                 cachetuple = SearchSysCache(INDEXRELID,
2519                                                          ObjectIdGetDatum(RelationGetRelid(Irel[i])),
2520                                                                         0, 0, 0);
2521                 if (!HeapTupleIsValid(cachetuple))
2522                         elog(ERROR, "get_index_desc: index %u not found",
2523                                  RelationGetRelid(Irel[i]));
2524                 indexInfo[i] = BuildIndexInfo(cachetuple);
2525                 ReleaseSysCache(cachetuple);
2526         }
2527
2528         return indexInfo;
2529 }
2530
2531
2532 static bool
2533 enough_space(VacPage vacpage, Size len)
2534 {
2535
2536         len = MAXALIGN(len);
2537
2538         if (len > vacpage->free)
2539                 return false;
2540
2541         if (vacpage->offsets_used < vacpage->offsets_free)      /* there are free
2542                                                                                                                  * itemid(s) */
2543                 return true;                    /* and len <= free_space */
2544
2545         /* ok. noff_usd >= noff_free and so we'll have to allocate new itemid */
2546         if (len + MAXALIGN(sizeof(ItemIdData)) <= vacpage->free)
2547                 return true;
2548
2549         return false;
2550
2551 }
2552
2553
2554 /*
2555  * Compute elapsed time since ru0 usage snapshot, and format into
2556  * a displayable string.  Result is in a static string, which is
2557  * tacky, but no one ever claimed that the Postgres backend is
2558  * threadable...
2559  */
2560 static char *
2561 show_rusage(struct rusage * ru0)
2562 {
2563         static char result[64];
2564         struct rusage ru1;
2565
2566         getrusage(RUSAGE_SELF, &ru1);
2567
2568         if (ru1.ru_stime.tv_usec < ru0->ru_stime.tv_usec)
2569         {
2570                 ru1.ru_stime.tv_sec--;
2571                 ru1.ru_stime.tv_usec += 1000000;
2572         }
2573         if (ru1.ru_utime.tv_usec < ru0->ru_utime.tv_usec)
2574         {
2575                 ru1.ru_utime.tv_sec--;
2576                 ru1.ru_utime.tv_usec += 1000000;
2577         }
2578
2579         snprintf(result, sizeof(result),
2580                          "CPU %d.%02ds/%d.%02du sec.",
2581                          (int) (ru1.ru_stime.tv_sec - ru0->ru_stime.tv_sec),
2582                          (int) (ru1.ru_stime.tv_usec - ru0->ru_stime.tv_usec) / 10000,
2583                          (int) (ru1.ru_utime.tv_sec - ru0->ru_utime.tv_sec),
2584                    (int) (ru1.ru_utime.tv_usec - ru0->ru_utime.tv_usec) / 10000);
2585
2586         return result;
2587 }