]> granicus.if.org Git - postgresql/blob - src/backend/commands/vacuum.c
WAL misc
[postgresql] / src / backend / commands / vacuum.c
1 /*-------------------------------------------------------------------------
2  *
3  * vacuum.c
4  *        the postgres vacuum cleaner
5  *
6  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.170 2000/10/24 09:56:15 vadim Exp $
12  *
13
14  *-------------------------------------------------------------------------
15  */
16 #include <sys/types.h>
17 #include <sys/file.h>
18 #include <sys/stat.h>
19 #include <fcntl.h>
20 #include <unistd.h>
21
22 #include "postgres.h"
23
24 #include "access/genam.h"
25 #include "access/heapam.h"
26 #include "catalog/catalog.h"
27 #include "catalog/catname.h"
28 #include "catalog/index.h"
29 #include "commands/vacuum.h"
30 #include "miscadmin.h"
31 #include "nodes/execnodes.h"
32 #include "storage/sinval.h"
33 #include "storage/smgr.h"
34 #include "tcop/tcopprot.h"
35 #include "utils/acl.h"
36 #include "utils/builtins.h"
37 #include "utils/fmgroids.h"
38 #include "utils/inval.h"
39 #include "utils/relcache.h"
40 #include "utils/syscache.h"
41 #include "utils/temprel.h"
42
43 #ifndef HAVE_GETRUSAGE
44 #include "rusagestub.h"
45 #else
46 #include <sys/time.h>
47 #include <sys/resource.h>
48 #endif
49
50 #ifdef XLOG
51 #include "access/xlog.h"
52 XLogRecPtr      log_heap_move(Relation reln, 
53                                 ItemPointerData from, HeapTuple newtup);
54 #endif
55
56 static MemoryContext vac_context = NULL;
57
58 static int      MESSAGE_LEVEL;          /* message level */
59
60 static TransactionId XmaxRecent;
61
62 /* non-export function prototypes */
63 static void vacuum_init(void);
64 static void vacuum_shutdown(void);
65 static void vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2);
66 static VRelList getrels(NameData *VacRelP);
67 static void vacuum_rel(Oid relid, bool analyze, bool is_toastrel);
68 static void scan_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages);
69 static void repair_frag(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages, int nindices, Relation *Irel);
70 static void vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacpagelist);
71 static void vacuum_page(Page page, VacPage vacpage);
72 static void vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples);
73 static void scan_index(Relation indrel, int num_tuples);
74 static void update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats);
75 static VacPage tid_reaped(ItemPointer itemptr, VacPageList vacpagelist);
76 static void reap_page(VacPageList vacpagelist, VacPage vacpage);
77 static void vpage_insert(VacPageList vacpagelist, VacPage vpnew);
78 static void get_indices(Relation relation, int *nindices, Relation **Irel);
79 static void close_indices(int nindices, Relation *Irel);
80 static IndexInfo **get_index_desc(Relation onerel, int nindices,
81                                                                   Relation *Irel);
82 static void *vac_find_eq(void *bot, int nelem, int size, void *elm,
83                          int (*compar) (const void *, const void *));
84 static int      vac_cmp_blk(const void *left, const void *right);
85 static int      vac_cmp_offno(const void *left, const void *right);
86 static int      vac_cmp_vtlinks(const void *left, const void *right);
87 static bool enough_space(VacPage vacpage, Size len);
88 static char *show_rusage(struct rusage * ru0);
89
90
91 void
92 vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
93 {
94         NameData        VacRel;
95         Name            VacRelName;
96         MemoryContext old;
97         List       *le;
98         List       *anal_cols2 = NIL;
99
100         if (anal_cols != NIL && !analyze)
101                 elog(ERROR, "Can't vacuum columns, only tables.  You can 'vacuum analyze' columns.");
102
103         /*
104          * We cannot run VACUUM inside a user transaction block; if we were
105          * inside a transaction, then our commit- and
106          * start-transaction-command calls would not have the intended effect!
107          * Furthermore, the forced commit that occurs before truncating the
108          * relation's file would have the effect of committing the rest of the
109          * user's transaction too, which would certainly not be the desired
110          * behavior.
111          */
112         if (IsTransactionBlock())
113                 elog(ERROR, "VACUUM cannot run inside a BEGIN/END block");
114
115         if (verbose)
116                 MESSAGE_LEVEL = NOTICE;
117         else
118                 MESSAGE_LEVEL = DEBUG;
119
120         /*
121          * Create special memory context for cross-transaction storage.
122          *
123          * Since it is a child of QueryContext, it will go away eventually
124          * even if we suffer an error; there's no need for special abort
125          * cleanup logic.
126          */
127         vac_context = AllocSetContextCreate(QueryContext,
128                                                                                 "Vacuum",
129                                                                                 ALLOCSET_DEFAULT_MINSIZE,
130                                                                                 ALLOCSET_DEFAULT_INITSIZE,
131                                                                                 ALLOCSET_DEFAULT_MAXSIZE);
132
133         /* vacrel gets de-allocated on xact commit, so copy it to safe storage */
134         if (vacrel)
135         {
136                 namestrcpy(&VacRel, vacrel);
137                 VacRelName = &VacRel;
138         }
139         else
140                 VacRelName = NULL;
141
142         /* must also copy the column list, if any, to safe storage */
143         old = MemoryContextSwitchTo(vac_context);
144         foreach(le, anal_cols)
145         {
146                 char       *col = (char *) lfirst(le);
147
148                 anal_cols2 = lappend(anal_cols2, pstrdup(col));
149         }
150         MemoryContextSwitchTo(old);
151
152         /*
153          * Start up the vacuum cleaner.
154          *
155          * NOTE: since this commits the current transaction, the memory holding
156          * any passed-in parameters gets freed here.  We must have already
157          * copied pass-by-reference parameters to safe storage.  Don't make me
158          * fix this again!
159          */
160         vacuum_init();
161
162         /* vacuum the database */
163         vac_vacuum(VacRelName, analyze, anal_cols2);
164
165         /* clean up */
166         vacuum_shutdown();
167 }
168
169 /*
170  *      vacuum_init(), vacuum_shutdown() -- start up and shut down the vacuum cleaner.
171  *
172  *              Formerly, there was code here to prevent more than one VACUUM from
173  *              executing concurrently in the same database.  However, there's no
174  *              good reason to prevent that, and manually removing lockfiles after
175  *              a vacuum crash was a pain for dbadmins.  So, forget about lockfiles,
176  *              and just rely on the exclusive lock we grab on each target table
177  *              to ensure that there aren't two VACUUMs running on the same table
178  *              at the same time.
179  *
180  *              The strangeness with committing and starting transactions in the
181  *              init and shutdown routines is due to the fact that the vacuum cleaner
182  *              is invoked via an SQL command, and so is already executing inside
183  *              a transaction.  We need to leave ourselves in a predictable state
184  *              on entry and exit to the vacuum cleaner.  We commit the transaction
185  *              started in PostgresMain() inside vacuum_init(), and start one in
186  *              vacuum_shutdown() to match the commit waiting for us back in
187  *              PostgresMain().
188  */
189 static void
190 vacuum_init()
191 {
192         /* matches the StartTransaction in PostgresMain() */
193         CommitTransactionCommand();
194 }
195
196 static void
197 vacuum_shutdown()
198 {
199         /* on entry, we are not in a transaction */
200
201         /*
202          * Flush the init file that relcache.c uses to save startup time. The
203          * next backend startup will rebuild the init file with up-to-date
204          * information from pg_class.  This lets the optimizer see the stats
205          * that we've collected for certain critical system indexes.  See
206          * relcache.c for more details.
207          *
208          * Ignore any failure to unlink the file, since it might not be there if
209          * no backend has been started since the last vacuum...
210          */
211         unlink(RELCACHE_INIT_FILENAME);
212
213         /* matches the CommitTransaction in PostgresMain() */
214         StartTransactionCommand();
215
216         /*
217          * Clean up working storage --- note we must do this after
218          * StartTransactionCommand, else we might be trying to delete
219          * the active context!
220          */
221         MemoryContextDelete(vac_context);
222         vac_context = NULL;
223 }
224
225 /*
226  *      vac_vacuum() -- vacuum the database.
227  *
228  *              This routine builds a list of relations to vacuum, and then calls
229  *              code that vacuums them one at a time.  We are careful to vacuum each
230  *              relation in a separate transaction in order to avoid holding too many
231  *              locks at one time.
232  */
233 static void
234 vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2)
235 {
236         VRelList        vrl,
237                                 cur;
238
239         /* get list of relations */
240         vrl = getrels(VacRelP);
241
242         /* vacuum each heap relation */
243         for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
244         {
245                 vacuum_rel(cur->vrl_relid, analyze, false);
246                 /* analyze separately so locking is minimized */
247                 if (analyze)
248                         analyze_rel(cur->vrl_relid, anal_cols2, MESSAGE_LEVEL);
249         }
250 }
251
252 static VRelList
253 getrels(NameData *VacRelP)
254 {
255         Relation        rel;
256         TupleDesc       tupdesc;
257         HeapScanDesc scan;
258         HeapTuple       tuple;
259         VRelList        vrl,
260                                 cur;
261         Datum           d;
262         char       *rname;
263         char            rkind;
264         bool            n;
265         bool            found = false;
266         ScanKeyData key;
267
268         StartTransactionCommand();
269
270         if (NameStr(*VacRelP))
271         {
272
273                 /*
274                  * we could use the cache here, but it is clearer to use scankeys
275                  * for both vacuum cases, bjm 2000/01/19
276                  */
277                 char       *nontemp_relname;
278
279                 /* We must re-map temp table names bjm 2000-04-06 */
280                 if ((nontemp_relname =
281                          get_temp_rel_by_username(NameStr(*VacRelP))) == NULL)
282                         nontemp_relname = NameStr(*VacRelP);
283
284                 ScanKeyEntryInitialize(&key, 0x0, Anum_pg_class_relname,
285                                                            F_NAMEEQ,
286                                                            PointerGetDatum(nontemp_relname));
287         }
288         else
289         {
290                 ScanKeyEntryInitialize(&key, 0x0, Anum_pg_class_relkind,
291                                                            F_CHAREQ, CharGetDatum('r'));
292         }
293
294         vrl = cur = (VRelList) NULL;
295
296         rel = heap_openr(RelationRelationName, AccessShareLock);
297         tupdesc = RelationGetDescr(rel);
298
299         scan = heap_beginscan(rel, false, SnapshotNow, 1, &key);
300
301         while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
302         {
303                 found = true;
304
305                 d = heap_getattr(tuple, Anum_pg_class_relname, tupdesc, &n);
306                 rname = (char *) d;
307
308                 d = heap_getattr(tuple, Anum_pg_class_relkind, tupdesc, &n);
309
310                 rkind = DatumGetChar(d);
311
312                 if (rkind != RELKIND_RELATION)
313                 {
314                         elog(NOTICE, "Vacuum: can not process indecies, views and certain system tables");
315                         continue;
316                 }
317
318                 /* get a relation list entry for this guy */
319                 if (vrl == (VRelList) NULL)
320                         vrl = cur = (VRelList)
321                                 MemoryContextAlloc(vac_context, sizeof(VRelListData));
322                 else
323                 {
324                         cur->vrl_next = (VRelList)
325                                 MemoryContextAlloc(vac_context, sizeof(VRelListData));
326                         cur = cur->vrl_next;
327                 }
328
329                 cur->vrl_relid = tuple->t_data->t_oid;
330                 cur->vrl_next = (VRelList) NULL;
331         }
332
333         heap_endscan(scan);
334         heap_close(rel, AccessShareLock);
335
336         if (!found)
337                 elog(NOTICE, "Vacuum: table not found");
338
339         CommitTransactionCommand();
340
341         return vrl;
342 }
343
344 /*
345  *      vacuum_rel() -- vacuum one heap relation
346  *
347  *              This routine vacuums a single heap, cleans out its indices, and
348  *              updates its statistics num_pages and num_tuples statistics.
349  *
350  *              Doing one heap at a time incurs extra overhead, since we need to
351  *              check that the heap exists again just before we vacuum it.      The
352  *              reason that we do this is so that vacuuming can be spread across
353  *              many small transactions.  Otherwise, two-phase locking would require
354  *              us to lock the entire database during one pass of the vacuum cleaner.
355  */
356 static void
357 vacuum_rel(Oid relid, bool analyze, bool is_toastrel)
358 {
359         HeapTuple       tuple;
360         Relation        onerel;
361         VacPageListData vacuum_pages; /* List of pages to vacuum and/or clean
362                                                                  * indices */
363         VacPageListData fraged_pages; /* List of pages with space enough for
364                                                                  * re-using */
365         VacPage    *vacpage;
366         Relation   *Irel;
367         int32           nindices,
368                                 i;
369         VRelStats  *vacrelstats;
370         bool            reindex = false;
371         Oid                     toast_relid;
372
373         if (!is_toastrel)
374                 StartTransactionCommand();
375
376         /*
377          * Check for user-requested abort.      Note we want this to be inside a
378          * transaction, so xact.c doesn't issue useless NOTICE.
379          */
380         if (QueryCancel)
381                 CancelQuery();
382
383         /*
384          * Race condition -- if the pg_class tuple has gone away since the
385          * last time we saw it, we don't need to vacuum it.
386          */
387         tuple = SearchSysCacheTuple(RELOID,
388                                                                 ObjectIdGetDatum(relid),
389                                                                 0, 0, 0);
390         if (!HeapTupleIsValid(tuple))
391         {
392                 if (!is_toastrel)
393                         CommitTransactionCommand();
394                 return;
395         }
396
397         /*
398          * Open the class, get an exclusive lock on it, and check permissions.
399          *
400          * Note we choose to treat permissions failure as a NOTICE and keep
401          * trying to vacuum the rest of the DB --- is this appropriate?
402          */
403         onerel = heap_open(relid, AccessExclusiveLock);
404
405         if (!pg_ownercheck(GetUserId(), RelationGetRelationName(onerel),
406                                            RELNAME))
407         {
408                 elog(NOTICE, "Skipping \"%s\" --- only table owner can VACUUM it",
409                          RelationGetRelationName(onerel));
410                 heap_close(onerel, AccessExclusiveLock);
411                 if (!is_toastrel)
412                         CommitTransactionCommand();
413                 return;
414         }
415
416         /*
417          * Remember the relation'ss TOAST relation for later
418          */
419         toast_relid = onerel->rd_rel->reltoastrelid;
420
421         /*
422          * Set up statistics-gathering machinery.
423          */
424         vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
425         vacrelstats->relid = relid;
426         vacrelstats->num_pages = vacrelstats->num_tuples = 0;
427         vacrelstats->hasindex = false;
428
429         GetXmaxRecent(&XmaxRecent);
430
431         /* scan it */
432         reindex = false;
433         vacuum_pages.num_pages = fraged_pages.num_pages = 0;
434         scan_heap(vacrelstats, onerel, &vacuum_pages, &fraged_pages);
435         if (IsIgnoringSystemIndexes() &&
436                 IsSystemRelationName(RelationGetRelationName(onerel)))
437                 reindex = true;
438
439         /* Now open indices */
440         nindices = 0;
441         Irel = (Relation *) NULL;
442         get_indices(onerel, &nindices, &Irel);
443         if (!Irel)
444                 reindex = false;
445         else if (!RelationGetForm(onerel)->relhasindex)
446                 reindex = true;
447         if (nindices > 0)
448                 vacrelstats->hasindex = true;
449         else
450                 vacrelstats->hasindex = false;
451         if (reindex)
452         {
453                 for (i = 0; i < nindices; i++)
454                         index_close(Irel[i]);
455                 Irel = (Relation *) NULL;
456                 activate_indexes_of_a_table(relid, false);
457         }
458
459         /* Clean/scan index relation(s) */
460         if (Irel != (Relation *) NULL)
461         {
462                 if (vacuum_pages.num_pages > 0)
463                 {
464                         for (i = 0; i < nindices; i++)
465                                 vacuum_index(&vacuum_pages, Irel[i],
466                                                          vacrelstats->num_tuples, 0);
467                 }
468                 else
469                 {
470                         /* just scan indices to update statistic */
471                         for (i = 0; i < nindices; i++)
472                                 scan_index(Irel[i], vacrelstats->num_tuples);
473                 }
474         }
475
476         if (fraged_pages.num_pages > 0)
477         {
478                 /* Try to shrink heap */
479                 repair_frag(vacrelstats, onerel, &vacuum_pages, &fraged_pages,
480                                         nindices, Irel);
481         }
482         else
483         {
484                 if (Irel != (Relation *) NULL)
485                         close_indices(nindices, Irel);
486                 if (vacuum_pages.num_pages > 0)
487                 {
488                         /* Clean pages from vacuum_pages list */
489                         vacuum_heap(vacrelstats, onerel, &vacuum_pages);
490                 }
491                 else
492                 {
493                         /*
494                          * Flush dirty pages out to disk.  We must do this even if we
495                          * didn't do anything else, because we want to ensure that all
496                          * tuples have correct on-row commit status on disk (see
497                          * bufmgr.c's comments for FlushRelationBuffers()).
498                          */
499                         i = FlushRelationBuffers(onerel, vacrelstats->num_pages);
500                         if (i < 0)
501                                 elog(ERROR, "VACUUM (vacuum_rel): FlushRelationBuffers returned %d",
502                                          i);
503                 }
504         }
505         if (reindex)
506                 activate_indexes_of_a_table(relid, true);
507
508         /*
509          * ok - free vacuum_pages list of reaped pages
510          *
511          * Isn't this a waste of code?  Upcoming commit should free memory, no?
512          */
513         if (vacuum_pages.num_pages > 0)
514         {
515                 vacpage = vacuum_pages.pagedesc;
516                 for (i = 0; i < vacuum_pages.num_pages; i++, vacpage++)
517                         pfree(*vacpage);
518                 pfree(vacuum_pages.pagedesc);
519                 if (fraged_pages.num_pages > 0)
520                         pfree(fraged_pages.pagedesc);
521         }
522
523         /* all done with this class, but hold lock until commit */
524         heap_close(onerel, NoLock);
525
526         /* update statistics in pg_class */
527         update_relstats(vacrelstats->relid, vacrelstats->num_pages,
528                                         vacrelstats->num_tuples, vacrelstats->hasindex,
529                                         vacrelstats);
530
531         /*
532          * If the relation has a secondary toast one, vacuum that too
533          * while we still hold the lock on the master table. We don't
534          * need to propagate "analyze" to it, because the toaster
535          * always uses hardcoded index access and statistics are
536          * totally unimportant for toast relations
537          */
538         if (toast_relid != InvalidOid)
539                 vacuum_rel(toast_relid, false, true);
540
541         /* next command frees attribute stats */
542         if (!is_toastrel)
543                 CommitTransactionCommand();
544 }
545
546 /*
547  *      scan_heap() -- scan an open heap relation
548  *
549  *              This routine sets commit times, constructs vacuum_pages list of
550  *              empty/uninitialized pages and pages with dead tuples and
551  *              ~LP_USED line pointers, constructs fraged_pages list of pages
552  *              appropriate for purposes of shrinking and maintains statistics
553  *              on the number of live tuples in a heap.
554  */
555 static void
556 scan_heap(VRelStats *vacrelstats, Relation onerel,
557                         VacPageList vacuum_pages, VacPageList fraged_pages)
558 {
559         BlockNumber nblocks,
560                                 blkno;
561         ItemId          itemid;
562         Buffer          buf;
563         HeapTupleData tuple;
564         Page            page,
565                                 tempPage = NULL;
566         OffsetNumber offnum,
567                                 maxoff;
568         bool            pgchanged,
569                                 tupgone,
570                                 dobufrel,
571                                 notup;
572         char       *relname;
573         VacPage         vacpage,
574                                 vp;
575         uint32          tups_vacuumed,
576                                 num_tuples,
577                                 nkeep,
578                                 nunused,
579                                 ncrash,
580                                 empty_pages,
581                                 new_pages,
582                                 changed_pages,
583                                 empty_end_pages;
584         Size            free_size,
585                                 usable_free_size;
586         Size            min_tlen = MaxTupleSize;
587         Size            max_tlen = 0;
588         int32           i;
589         bool            do_shrinking = true;
590         VTupleLink      vtlinks = (VTupleLink) palloc(100 * sizeof(VTupleLinkData));
591         int                     num_vtlinks = 0;
592         int                     free_vtlinks = 100;
593         struct rusage ru0;
594
595         getrusage(RUSAGE_SELF, &ru0);
596
597         relname = RelationGetRelationName(onerel);
598         elog(MESSAGE_LEVEL, "--Relation %s--", relname);
599
600         tups_vacuumed = num_tuples = nkeep = nunused = ncrash = empty_pages =
601                 new_pages = changed_pages = empty_end_pages = 0;
602         free_size = usable_free_size = 0;
603
604         nblocks = RelationGetNumberOfBlocks(onerel);
605
606         vacpage = (VacPage) palloc(sizeof(VacPageData) + MaxOffsetNumber * sizeof(OffsetNumber));
607         vacpage->offsets_used = 0;
608
609         for (blkno = 0; blkno < nblocks; blkno++)
610         {
611                 buf = ReadBuffer(onerel, blkno);
612                 page = BufferGetPage(buf);
613                 vacpage->blkno = blkno;
614                 vacpage->offsets_free = 0;
615
616                 if (PageIsNew(page))
617                 {
618                         elog(NOTICE, "Rel %s: Uninitialized page %u - fixing",
619                                  relname, blkno);
620                         PageInit(page, BufferGetPageSize(buf), 0);
621                         vacpage->free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower;
622                         free_size += (vacpage->free - sizeof(ItemIdData));
623                         new_pages++;
624                         empty_end_pages++;
625                         reap_page(vacuum_pages, vacpage);
626                         WriteBuffer(buf);
627                         continue;
628                 }
629
630                 if (PageIsEmpty(page))
631                 {
632                         vacpage->free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower;
633                         free_size += (vacpage->free - sizeof(ItemIdData));
634                         empty_pages++;
635                         empty_end_pages++;
636                         reap_page(vacuum_pages, vacpage);
637                         ReleaseBuffer(buf);
638                         continue;
639                 }
640
641                 pgchanged = false;
642                 notup = true;
643                 maxoff = PageGetMaxOffsetNumber(page);
644                 for (offnum = FirstOffsetNumber;
645                          offnum <= maxoff;
646                          offnum = OffsetNumberNext(offnum))
647                 {
648                         itemid = PageGetItemId(page, offnum);
649
650                         /*
651                          * Collect un-used items too - it's possible to have indices
652                          * pointing here after crash.
653                          */
654                         if (!ItemIdIsUsed(itemid))
655                         {
656                                 vacpage->offsets[vacpage->offsets_free++] = offnum;
657                                 nunused++;
658                                 continue;
659                         }
660
661                         tuple.t_datamcxt = NULL;
662                         tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
663                         tuple.t_len = ItemIdGetLength(itemid);
664                         ItemPointerSet(&(tuple.t_self), blkno, offnum);
665                         tupgone = false;
666
667                         if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
668                         {
669                                 if (tuple.t_data->t_infomask & HEAP_XMIN_INVALID)
670                                         tupgone = true;
671                                 else if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
672                                 {
673                                         if (TransactionIdDidCommit((TransactionId)
674                                                                                            tuple.t_data->t_cmin))
675                                         {
676                                                 tuple.t_data->t_infomask |= HEAP_XMIN_INVALID;
677                                                 pgchanged = true;
678                                                 tupgone = true;
679                                         }
680                                         else
681                                         {
682                                                 tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
683                                                 pgchanged = true;
684                                         }
685                                 }
686                                 else if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
687                                 {
688                                         if (!TransactionIdDidCommit((TransactionId)
689                                                                                                 tuple.t_data->t_cmin))
690                                         {
691                                                 tuple.t_data->t_infomask |= HEAP_XMIN_INVALID;
692                                                 pgchanged = true;
693                                                 tupgone = true;
694                                         }
695                                         else
696                                         {
697                                                 tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
698                                                 pgchanged = true;
699                                         }
700                                 }
701                                 else
702                                 {
703                                         if (TransactionIdDidAbort(tuple.t_data->t_xmin))
704                                                 tupgone = true;
705                                         else if (TransactionIdDidCommit(tuple.t_data->t_xmin))
706                                         {
707                                                 tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
708                                                 pgchanged = true;
709                                         }
710                                         else if (!TransactionIdIsInProgress(tuple.t_data->t_xmin))
711                                         {
712
713                                                 /*
714                                                  * Not Aborted, Not Committed, Not in Progress -
715                                                  * so it's from crashed process. - vadim 11/26/96
716                                                  */
717                                                 ncrash++;
718                                                 tupgone = true;
719                                         }
720                                         else
721                                         {
722                                                 elog(NOTICE, "Rel %s: TID %u/%u: InsertTransactionInProgress %u - can't shrink relation",
723                                                    relname, blkno, offnum, tuple.t_data->t_xmin);
724                                                 do_shrinking = false;
725                                         }
726                                 }
727                         }
728
729                         /*
730                          * here we are concerned about tuples with xmin committed and
731                          * xmax unknown or committed
732                          */
733                         if (tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED &&
734                                 !(tuple.t_data->t_infomask & HEAP_XMAX_INVALID))
735                         {
736                                 if (tuple.t_data->t_infomask & HEAP_XMAX_COMMITTED)
737                                 {
738                                         if (tuple.t_data->t_infomask & HEAP_MARKED_FOR_UPDATE)
739                                         {
740                                                 tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
741                                                 tuple.t_data->t_infomask &=
742                                                         ~(HEAP_XMAX_COMMITTED | HEAP_MARKED_FOR_UPDATE);
743                                                 pgchanged = true;
744                                         }
745                                         else
746                                                 tupgone = true;
747                                 }
748                                 else if (TransactionIdDidAbort(tuple.t_data->t_xmax))
749                                 {
750                                         tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
751                                         pgchanged = true;
752                                 }
753                                 else if (TransactionIdDidCommit(tuple.t_data->t_xmax))
754                                 {
755                                         if (tuple.t_data->t_infomask & HEAP_MARKED_FOR_UPDATE)
756                                         {
757                                                 tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
758                                                 tuple.t_data->t_infomask &=
759                                                         ~(HEAP_XMAX_COMMITTED | HEAP_MARKED_FOR_UPDATE);
760                                                 pgchanged = true;
761                                         }
762                                         else
763                                                 tupgone = true;
764                                 }
765                                 else if (!TransactionIdIsInProgress(tuple.t_data->t_xmax))
766                                 {
767
768                                         /*
769                                          * Not Aborted, Not Committed, Not in Progress - so it
770                                          * from crashed process. - vadim 06/02/97
771                                          */
772                                         tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
773                                         tuple.t_data->t_infomask &=
774                                                 ~(HEAP_XMAX_COMMITTED | HEAP_MARKED_FOR_UPDATE);
775                                         pgchanged = true;
776                                 }
777                                 else
778                                 {
779                                         elog(NOTICE, "Rel %s: TID %u/%u: DeleteTransactionInProgress %u - can't shrink relation",
780                                                  relname, blkno, offnum, tuple.t_data->t_xmax);
781                                         do_shrinking = false;
782                                 }
783
784                                 /*
785                                  * If tuple is recently deleted then we must not remove it
786                                  * from relation.
787                                  */
788                                 if (tupgone && (tuple.t_data->t_infomask & HEAP_XMIN_INVALID) == 0 && tuple.t_data->t_xmax >= XmaxRecent)
789                                 {
790                                         tupgone = false;
791                                         nkeep++;
792                                         if (!(tuple.t_data->t_infomask & HEAP_XMAX_COMMITTED))
793                                         {
794                                                 tuple.t_data->t_infomask |= HEAP_XMAX_COMMITTED;
795                                                 pgchanged = true;
796                                         }
797
798                                         /*
799                                          * If we do shrinking and this tuple is updated one
800                                          * then remember it to construct updated tuple
801                                          * dependencies.
802                                          */
803                                         if (do_shrinking && !(ItemPointerEquals(&(tuple.t_self),
804                                                                                            &(tuple.t_data->t_ctid))))
805                                         {
806                                                 if (free_vtlinks == 0)
807                                                 {
808                                                         free_vtlinks = 1000;
809                                                         vtlinks = (VTupleLink) repalloc(vtlinks,
810                                                                                    (free_vtlinks + num_vtlinks) *
811                                                                                                  sizeof(VTupleLinkData));
812                                                 }
813                                                 vtlinks[num_vtlinks].new_tid = tuple.t_data->t_ctid;
814                                                 vtlinks[num_vtlinks].this_tid = tuple.t_self;
815                                                 free_vtlinks--;
816                                                 num_vtlinks++;
817                                         }
818                                 }
819                         }
820
821                         /*
822                          * Other checks...
823                          */
824                         if (!OidIsValid(tuple.t_data->t_oid))
825                         {
826                                 elog(NOTICE, "Rel %s: TID %u/%u: OID IS INVALID. TUPGONE %d.",
827                                          relname, blkno, offnum, tupgone);
828                         }
829
830                         if (tupgone)
831                         {
832                                 ItemId          lpp;
833
834                                 /*
835                                  * Here we are building a temporary copy of the page with
836                                  * dead tuples removed.  Below we will apply
837                                  * PageRepairFragmentation to the copy, so that we can
838                                  * determine how much space will be available after
839                                  * removal of dead tuples.  But note we are NOT changing
840                                  * the real page yet...
841                                  */
842                                 if (tempPage == (Page) NULL)
843                                 {
844                                         Size            pageSize;
845
846                                         pageSize = PageGetPageSize(page);
847                                         tempPage = (Page) palloc(pageSize);
848                                         memmove(tempPage, page, pageSize);
849                                 }
850
851                                 /* mark it unused on the temp page */
852                                 lpp = &(((PageHeader) tempPage)->pd_linp[offnum - 1]);
853                                 lpp->lp_flags &= ~LP_USED;
854
855                                 vacpage->offsets[vacpage->offsets_free++] = offnum;
856                                 tups_vacuumed++;
857                         }
858                         else
859                         {
860                                 num_tuples++;
861                                 notup = false;
862                                 if (tuple.t_len < min_tlen)
863                                         min_tlen = tuple.t_len;
864                                 if (tuple.t_len > max_tlen)
865                                         max_tlen = tuple.t_len;
866                         }
867                 }
868
869                 if (pgchanged)
870                 {
871                         WriteBuffer(buf);
872                         dobufrel = false;
873                         changed_pages++;
874                 }
875                 else
876                         dobufrel = true;
877
878                 if (tempPage != (Page) NULL)
879                 {                                               /* Some tuples are gone */
880                         PageRepairFragmentation(tempPage);
881                         vacpage->free = ((PageHeader) tempPage)->pd_upper - ((PageHeader) tempPage)->pd_lower;
882                         free_size += vacpage->free;
883                         reap_page(vacuum_pages, vacpage);
884                         pfree(tempPage);
885                         tempPage = (Page) NULL;
886                 }
887                 else if (vacpage->offsets_free > 0)
888                 {                                               /* there are only ~LP_USED line pointers */
889                         vacpage->free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower;
890                         free_size += vacpage->free;
891                         reap_page(vacuum_pages, vacpage);
892                 }
893                 if (dobufrel)
894                         ReleaseBuffer(buf);
895                 if (notup)
896                         empty_end_pages++;
897                 else
898                         empty_end_pages = 0;
899         }
900
901         pfree(vacpage);
902
903         /* save stats in the rel list for use later */
904         vacrelstats->num_tuples = num_tuples;
905         vacrelstats->num_pages = nblocks;
906 /*        vacrelstats->natts = attr_cnt;*/
907         if (num_tuples == 0)
908                 min_tlen = max_tlen = 0;
909         vacrelstats->min_tlen = min_tlen;
910         vacrelstats->max_tlen = max_tlen;
911
912         vacuum_pages->empty_end_pages = empty_end_pages;
913         fraged_pages->empty_end_pages = empty_end_pages;
914
915         /*
916          * Try to make fraged_pages keeping in mind that we can't use free
917          * space of "empty" end-pages and last page if it reaped.
918          */
919         if (do_shrinking && vacuum_pages->num_pages - empty_end_pages > 0)
920         {
921                 int                     nusf;           /* blocks usefull for re-using */
922
923                 nusf = vacuum_pages->num_pages - empty_end_pages;
924                 if ((vacuum_pages->pagedesc[nusf - 1])->blkno == nblocks - empty_end_pages - 1)
925                         nusf--;
926
927                 for (i = 0; i < nusf; i++)
928                 {
929                         vp = vacuum_pages->pagedesc[i];
930                         if (enough_space(vp, min_tlen))
931                         {
932                                 vpage_insert(fraged_pages, vp);
933                                 usable_free_size += vp->free;
934                         }
935                 }
936         }
937
938         if (usable_free_size > 0 && num_vtlinks > 0)
939         {
940                 qsort((char *) vtlinks, num_vtlinks, sizeof(VTupleLinkData),
941                           vac_cmp_vtlinks);
942                 vacrelstats->vtlinks = vtlinks;
943                 vacrelstats->num_vtlinks = num_vtlinks;
944         }
945         else
946         {
947                 vacrelstats->vtlinks = NULL;
948                 vacrelstats->num_vtlinks = 0;
949                 pfree(vtlinks);
950         }
951
952         elog(MESSAGE_LEVEL, "Pages %u: Changed %u, reaped %u, Empty %u, New %u; \
953 Tup %u: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %u, MaxLen %u; \
954 Re-using: Free/Avail. Space %u/%u; EndEmpty/Avail. Pages %u/%u. %s",
955                  nblocks, changed_pages, vacuum_pages->num_pages, empty_pages,
956                  new_pages, num_tuples, tups_vacuumed,
957                  nkeep, vacrelstats->num_vtlinks, ncrash,
958                  nunused, min_tlen, max_tlen, free_size, usable_free_size,
959                  empty_end_pages, fraged_pages->num_pages,
960                  show_rusage(&ru0));
961
962 }
963
964
965 /*
966  *      repair_frag() -- try to repair relation's fragmentation
967  *
968  *              This routine marks dead tuples as unused and tries re-use dead space
969  *              by moving tuples (and inserting indices if needed). It constructs
970  *              Nvacpagelist list of free-ed pages (moved tuples) and clean indices
971  *              for them after committing (in hack-manner - without losing locks
972  *              and freeing memory!) current transaction. It truncates relation
973  *              if some end-blocks are gone away.
974  */
975 static void
976 repair_frag(VRelStats *vacrelstats, Relation onerel,
977                            VacPageList vacuum_pages, VacPageList fraged_pages,
978                            int nindices, Relation *Irel)
979 {
980         TransactionId myXID;
981         CommandId       myCID;
982         Buffer          buf,
983                                 cur_buffer;
984         int                     nblocks,
985                                 blkno;
986         Page            page,
987                                 ToPage = NULL;
988         OffsetNumber offnum = 0,
989                                 maxoff = 0,
990                                 newoff,
991                                 max_offset;
992         ItemId          itemid,
993                                 newitemid;
994         HeapTupleData tuple,
995                                 newtup;
996         TupleDesc       tupdesc;
997         IndexInfo **indexInfo = NULL;
998         Datum           idatum[INDEX_MAX_KEYS];
999         char            inulls[INDEX_MAX_KEYS];
1000         InsertIndexResult iresult;
1001         VacPageListData Nvacpagelist;
1002         VacPage         cur_page = NULL,
1003                                 last_vacuum_page,
1004                                 vacpage,
1005                            *curpage;
1006         int                     cur_item = 0;
1007         int                     last_move_dest_block = -1,
1008                                 last_vacuum_block,
1009                                 i = 0;
1010         Size            tuple_len;
1011         int                     num_moved,
1012                                 num_fraged_pages,
1013                                 vacuumed_pages;
1014         int                     checked_moved,
1015                                 num_tuples,
1016                                 keep_tuples = 0;
1017         bool            isempty,
1018                                 dowrite,
1019                                 chain_tuple_moved;
1020         struct rusage ru0;
1021
1022         getrusage(RUSAGE_SELF, &ru0);
1023
1024         myXID = GetCurrentTransactionId();
1025         myCID = GetCurrentCommandId();
1026
1027         tupdesc = RelationGetDescr(onerel);
1028
1029         if (Irel != (Relation *) NULL)          /* preparation for index' inserts */
1030                 indexInfo = get_index_desc(onerel, nindices, Irel);
1031
1032         Nvacpagelist.num_pages = 0;
1033         num_fraged_pages = fraged_pages->num_pages;
1034         Assert(vacuum_pages->num_pages > vacuum_pages->empty_end_pages);
1035         vacuumed_pages = vacuum_pages->num_pages - vacuum_pages->empty_end_pages;
1036         last_vacuum_page = vacuum_pages->pagedesc[vacuumed_pages - 1];
1037         last_vacuum_block = last_vacuum_page->blkno;
1038         cur_buffer = InvalidBuffer;
1039         num_moved = 0;
1040
1041         vacpage = (VacPage) palloc(sizeof(VacPageData) + MaxOffsetNumber * sizeof(OffsetNumber));
1042         vacpage->offsets_used = vacpage->offsets_free = 0;
1043
1044         /*
1045          * Scan pages backwards from the last nonempty page, trying to move
1046          * tuples down to lower pages.  Quit when we reach a page that we have
1047          * moved any tuples onto.  Note that if a page is still in the
1048          * fraged_pages list (list of candidate move-target pages) when we
1049          * reach it, we will remove it from the list.  This ensures we never
1050          * move a tuple up to a higher page number.
1051          *
1052          * NB: this code depends on the vacuum_pages and fraged_pages lists being
1053          * in order, and on fraged_pages being a subset of vacuum_pages.
1054          */
1055         nblocks = vacrelstats->num_pages;
1056         for (blkno = nblocks - vacuum_pages->empty_end_pages - 1;
1057                  blkno > last_move_dest_block;
1058                  blkno--)
1059         {
1060                 buf = ReadBuffer(onerel, blkno);
1061                 page = BufferGetPage(buf);
1062
1063                 vacpage->offsets_free = 0;
1064
1065                 isempty = PageIsEmpty(page);
1066
1067                 dowrite = false;
1068                 if (blkno == last_vacuum_block) /* it's reaped page */
1069                 {
1070                         if (last_vacuum_page->offsets_free > 0) /* there are dead tuples */
1071                         {                                       /* on this page - clean */
1072                                 Assert(!isempty);
1073                                 vacuum_page(page, last_vacuum_page);
1074                                 dowrite = true;
1075                         }
1076                         else
1077                                 Assert(isempty);
1078                         --vacuumed_pages;
1079                         if (vacuumed_pages > 0)
1080                         {
1081                                 /* get prev reaped page from vacuum_pages */
1082                                 last_vacuum_page = vacuum_pages->pagedesc[vacuumed_pages - 1];
1083                                 last_vacuum_block = last_vacuum_page->blkno;
1084                         }
1085                         else
1086                         {
1087                                 last_vacuum_page = NULL;
1088                                 last_vacuum_block = -1;
1089                         }
1090                         if (num_fraged_pages > 0 &&
1091                         fraged_pages->pagedesc[num_fraged_pages - 1]->blkno ==
1092                                 (BlockNumber) blkno)
1093                         {
1094                                 /* page is in fraged_pages too; remove it */
1095                                 --num_fraged_pages;
1096                         }
1097                         if (isempty)
1098                         {
1099                                 ReleaseBuffer(buf);
1100                                 continue;
1101                         }
1102                 }
1103                 else
1104                         Assert(!isempty);
1105
1106                 chain_tuple_moved = false;              /* no one chain-tuple was moved
1107                                                                                  * off this page, yet */
1108                 vacpage->blkno = blkno;
1109                 maxoff = PageGetMaxOffsetNumber(page);
1110                 for (offnum = FirstOffsetNumber;
1111                          offnum <= maxoff;
1112                          offnum = OffsetNumberNext(offnum))
1113                 {
1114                         itemid = PageGetItemId(page, offnum);
1115
1116                         if (!ItemIdIsUsed(itemid))
1117                                 continue;
1118
1119                         tuple.t_datamcxt = NULL;
1120                         tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1121                         tuple_len = tuple.t_len = ItemIdGetLength(itemid);
1122                         ItemPointerSet(&(tuple.t_self), blkno, offnum);
1123
1124                         if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
1125                         {
1126                                 if ((TransactionId) tuple.t_data->t_cmin != myXID)
1127                                         elog(ERROR, "Invalid XID in t_cmin");
1128                                 if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
1129                                         elog(ERROR, "HEAP_MOVED_IN was not expected");
1130
1131                                 /*
1132                                  * If this (chain) tuple is moved by me already then I
1133                                  * have to check is it in vacpage or not - i.e. is it moved
1134                                  * while cleaning this page or some previous one.
1135                                  */
1136                                 if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
1137                                 {
1138                                         if (keep_tuples == 0)
1139                                                 continue;
1140                                         if (chain_tuple_moved)          /* some chains was moved
1141                                                                                                  * while */
1142                                         {                       /* cleaning this page */
1143                                                 Assert(vacpage->offsets_free > 0);
1144                                                 for (i = 0; i < vacpage->offsets_free; i++)
1145                                                 {
1146                                                         if (vacpage->offsets[i] == offnum)
1147                                                                 break;
1148                                                 }
1149                                                 if (i >= vacpage->offsets_free) /* not found */
1150                                                 {
1151                                                         vacpage->offsets[vacpage->offsets_free++] = offnum;
1152                                                         keep_tuples--;
1153                                                 }
1154                                         }
1155                                         else
1156                                         {
1157                                                 vacpage->offsets[vacpage->offsets_free++] = offnum;
1158                                                 keep_tuples--;
1159                                         }
1160                                         continue;
1161                                 }
1162                                 elog(ERROR, "HEAP_MOVED_OFF was expected");
1163                         }
1164
1165                         /*
1166                          * If this tuple is in the chain of tuples created in updates
1167                          * by "recent" transactions then we have to move all chain of
1168                          * tuples to another places.
1169                          */
1170                         if ((tuple.t_data->t_infomask & HEAP_UPDATED &&
1171                                  tuple.t_data->t_xmin >= XmaxRecent) ||
1172                                 (!(tuple.t_data->t_infomask & HEAP_XMAX_INVALID) &&
1173                                  !(ItemPointerEquals(&(tuple.t_self), &(tuple.t_data->t_ctid)))))
1174                         {
1175                                 Buffer          Cbuf = buf;
1176                                 Page            Cpage;
1177                                 ItemId          Citemid;
1178                                 ItemPointerData Ctid;
1179                                 HeapTupleData tp = tuple;
1180                                 Size            tlen = tuple_len;
1181                                 VTupleMove      vtmove = (VTupleMove)
1182                                 palloc(100 * sizeof(VTupleMoveData));
1183                                 int                     num_vtmove = 0;
1184                                 int                     free_vtmove = 100;
1185                                 VacPage         to_vacpage = NULL;
1186                                 int                     to_item = 0;
1187                                 bool            freeCbuf = false;
1188                                 int                     ti;
1189
1190                                 if (vacrelstats->vtlinks == NULL)
1191                                         elog(ERROR, "No one parent tuple was found");
1192                                 if (cur_buffer != InvalidBuffer)
1193                                 {
1194                                         WriteBuffer(cur_buffer);
1195                                         cur_buffer = InvalidBuffer;
1196                                 }
1197
1198                                 /*
1199                                  * If this tuple is in the begin/middle of the chain then
1200                                  * we have to move to the end of chain.
1201                                  */
1202                                 while (!(tp.t_data->t_infomask & HEAP_XMAX_INVALID) &&
1203                                 !(ItemPointerEquals(&(tp.t_self), &(tp.t_data->t_ctid))))
1204                                 {
1205                                         Ctid = tp.t_data->t_ctid;
1206                                         if (freeCbuf)
1207                                                 ReleaseBuffer(Cbuf);
1208                                         freeCbuf = true;
1209                                         Cbuf = ReadBuffer(onerel,
1210                                                                           ItemPointerGetBlockNumber(&Ctid));
1211                                         Cpage = BufferGetPage(Cbuf);
1212                                         Citemid = PageGetItemId(Cpage,
1213                                                                           ItemPointerGetOffsetNumber(&Ctid));
1214                                         if (!ItemIdIsUsed(Citemid))
1215                                         {
1216
1217                                                 /*
1218                                                  * This means that in the middle of chain there
1219                                                  * was tuple updated by older (than XmaxRecent)
1220                                                  * xaction and this tuple is already deleted by
1221                                                  * me. Actually, upper part of chain should be
1222                                                  * removed and seems that this should be handled
1223                                                  * in scan_heap(), but it's not implemented at
1224                                                  * the moment and so we just stop shrinking here.
1225                                                  */
1226                                                 ReleaseBuffer(Cbuf);
1227                                                 pfree(vtmove);
1228                                                 vtmove = NULL;
1229                                                 elog(NOTICE, "Child itemid in update-chain marked as unused - can't continue repair_frag");
1230                                                 break;
1231                                         }
1232                                         tp.t_datamcxt = NULL;
1233                                         tp.t_data = (HeapTupleHeader) PageGetItem(Cpage, Citemid);
1234                                         tp.t_self = Ctid;
1235                                         tlen = tp.t_len = ItemIdGetLength(Citemid);
1236                                 }
1237                                 if (vtmove == NULL)
1238                                         break;
1239                                 /* first, can chain be moved ? */
1240                                 for (;;)
1241                                 {
1242                                         if (to_vacpage == NULL ||
1243                                                 !enough_space(to_vacpage, tlen))
1244                                         {
1245
1246                                                 /*
1247                                                  * if to_vacpage no longer has enough free space to be
1248                                                  * useful, remove it from fraged_pages list
1249                                                  */
1250                                                 if (to_vacpage != NULL &&
1251                                                  !enough_space(to_vacpage, vacrelstats->min_tlen))
1252                                                 {
1253                                                         Assert(num_fraged_pages > to_item);
1254                                                         memmove(fraged_pages->pagedesc + to_item,
1255                                                                 fraged_pages->pagedesc + to_item + 1,
1256                                                                         sizeof(VacPage) * (num_fraged_pages - to_item - 1));
1257                                                         num_fraged_pages--;
1258                                                 }
1259                                                 for (i = 0; i < num_fraged_pages; i++)
1260                                                 {
1261                                                         if (enough_space(fraged_pages->pagedesc[i], tlen))
1262                                                                 break;
1263                                                 }
1264
1265                                                 /* can't move item anywhere */
1266                                                 if (i == num_fraged_pages)
1267                                                 {
1268                                                         for (i = 0; i < num_vtmove; i++)
1269                                                         {
1270                                                                 Assert(vtmove[i].vacpage->offsets_used > 0);
1271                                                                 (vtmove[i].vacpage->offsets_used)--;
1272                                                         }
1273                                                         num_vtmove = 0;
1274                                                         break;
1275                                                 }
1276                                                 to_item = i;
1277                                                 to_vacpage = fraged_pages->pagedesc[to_item];
1278                                         }
1279                                         to_vacpage->free -= MAXALIGN(tlen);
1280                                         if (to_vacpage->offsets_used >= to_vacpage->offsets_free)
1281                                                 to_vacpage->free -= MAXALIGN(sizeof(ItemIdData));
1282                                         (to_vacpage->offsets_used)++;
1283                                         if (free_vtmove == 0)
1284                                         {
1285                                                 free_vtmove = 1000;
1286                                                 vtmove = (VTupleMove) repalloc(vtmove,
1287                                                                                          (free_vtmove + num_vtmove) *
1288                                                                                                  sizeof(VTupleMoveData));
1289                                         }
1290                                         vtmove[num_vtmove].tid = tp.t_self;
1291                                         vtmove[num_vtmove].vacpage = to_vacpage;
1292                                         if (to_vacpage->offsets_used == 1)
1293                                                 vtmove[num_vtmove].cleanVpd = true;
1294                                         else
1295                                                 vtmove[num_vtmove].cleanVpd = false;
1296                                         free_vtmove--;
1297                                         num_vtmove++;
1298
1299                                         /* All done ? */
1300                                         if (!(tp.t_data->t_infomask & HEAP_UPDATED) ||
1301                                                 tp.t_data->t_xmin < XmaxRecent)
1302                                                 break;
1303
1304                                         /* Well, try to find tuple with old row version */
1305                                         for (;;)
1306                                         {
1307                                                 Buffer          Pbuf;
1308                                                 Page            Ppage;
1309                                                 ItemId          Pitemid;
1310                                                 HeapTupleData Ptp;
1311                                                 VTupleLinkData vtld,
1312                                                                    *vtlp;
1313
1314                                                 vtld.new_tid = tp.t_self;
1315                                                 vtlp = (VTupleLink)
1316                                                         vac_find_eq((void *) (vacrelstats->vtlinks),
1317                                                                            vacrelstats->num_vtlinks,
1318                                                                            sizeof(VTupleLinkData),
1319                                                                            (void *) &vtld,
1320                                                                            vac_cmp_vtlinks);
1321                                                 if (vtlp == NULL)
1322                                                         elog(ERROR, "Parent tuple was not found");
1323                                                 tp.t_self = vtlp->this_tid;
1324                                                 Pbuf = ReadBuffer(onerel,
1325                                                                 ItemPointerGetBlockNumber(&(tp.t_self)));
1326                                                 Ppage = BufferGetPage(Pbuf);
1327                                                 Pitemid = PageGetItemId(Ppage,
1328                                                            ItemPointerGetOffsetNumber(&(tp.t_self)));
1329                                                 if (!ItemIdIsUsed(Pitemid))
1330                                                         elog(ERROR, "Parent itemid marked as unused");
1331                                                 Ptp.t_datamcxt = NULL;
1332                                                 Ptp.t_data = (HeapTupleHeader) PageGetItem(Ppage, Pitemid);
1333                                                 Assert(ItemPointerEquals(&(vtld.new_tid),
1334                                                                                                  &(Ptp.t_data->t_ctid)));
1335
1336                                                 /*
1337                                                  * Read above about cases when
1338                                                  * !ItemIdIsUsed(Citemid) (child item is
1339                                                  * removed)... Due to the fact that at the moment
1340                                                  * we don't remove unuseful part of update-chain,
1341                                                  * it's possible to get too old parent row here.
1342                                                  * Like as in the case which caused this problem,
1343                                                  * we stop shrinking here. I could try to find
1344                                                  * real parent row but want not to do it because
1345                                                  * of real solution will be implemented anyway,
1346                                                  * latter, and we are too close to 6.5 release. -
1347                                                  * vadim 06/11/99
1348                                                  */
1349                                                 if (Ptp.t_data->t_xmax != tp.t_data->t_xmin)
1350                                                 {
1351                                                         if (freeCbuf)
1352                                                                 ReleaseBuffer(Cbuf);
1353                                                         freeCbuf = false;
1354                                                         ReleaseBuffer(Pbuf);
1355                                                         for (i = 0; i < num_vtmove; i++)
1356                                                         {
1357                                                                 Assert(vtmove[i].vacpage->offsets_used > 0);
1358                                                                 (vtmove[i].vacpage->offsets_used)--;
1359                                                         }
1360                                                         num_vtmove = 0;
1361                                                         elog(NOTICE, "Too old parent tuple found - can't continue repair_frag");
1362                                                         break;
1363                                                 }
1364 #ifdef NOT_USED                                 /* I'm not sure that this will wotk
1365                                                                  * properly... */
1366
1367                                                 /*
1368                                                  * If this tuple is updated version of row and it
1369                                                  * was created by the same transaction then no one
1370                                                  * is interested in this tuple - mark it as
1371                                                  * removed.
1372                                                  */
1373                                                 if (Ptp.t_data->t_infomask & HEAP_UPDATED &&
1374                                                         Ptp.t_data->t_xmin == Ptp.t_data->t_xmax)
1375                                                 {
1376                                                         TransactionIdStore(myXID,
1377                                                                 (TransactionId *) &(Ptp.t_data->t_cmin));
1378                                                         Ptp.t_data->t_infomask &=
1379                                                                 ~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_IN);
1380                                                         Ptp.t_data->t_infomask |= HEAP_MOVED_OFF;
1381                                                         WriteBuffer(Pbuf);
1382                                                         continue;
1383                                                 }
1384 #endif
1385                                                 tp.t_datamcxt = Ptp.t_datamcxt;
1386                                                 tp.t_data = Ptp.t_data;
1387                                                 tlen = tp.t_len = ItemIdGetLength(Pitemid);
1388                                                 if (freeCbuf)
1389                                                         ReleaseBuffer(Cbuf);
1390                                                 Cbuf = Pbuf;
1391                                                 freeCbuf = true;
1392                                                 break;
1393                                         }
1394                                         if (num_vtmove == 0)
1395                                                 break;
1396                                 }
1397                                 if (freeCbuf)
1398                                         ReleaseBuffer(Cbuf);
1399                                 if (num_vtmove == 0)    /* chain can't be moved */
1400                                 {
1401                                         pfree(vtmove);
1402                                         break;
1403                                 }
1404                                 ItemPointerSetInvalid(&Ctid);
1405                                 for (ti = 0; ti < num_vtmove; ti++)
1406                                 {
1407                                         VacPage destvacpage = vtmove[ti].vacpage;
1408
1409                                         /* Get page to move from */
1410                                         tuple.t_self = vtmove[ti].tid;
1411                                         Cbuf = ReadBuffer(onerel,
1412                                                          ItemPointerGetBlockNumber(&(tuple.t_self)));
1413
1414                                         /* Get page to move to */
1415                                         cur_buffer = ReadBuffer(onerel, destvacpage->blkno);
1416
1417                                         LockBuffer(cur_buffer, BUFFER_LOCK_EXCLUSIVE);
1418                                         if (cur_buffer != Cbuf)
1419                                                 LockBuffer(Cbuf, BUFFER_LOCK_EXCLUSIVE);
1420
1421                                         ToPage = BufferGetPage(cur_buffer);
1422                                         Cpage = BufferGetPage(Cbuf);
1423
1424                                         /* NO ELOG(ERROR) TILL CHANGES ARE LOGGED */
1425
1426                                         Citemid = PageGetItemId(Cpage,
1427                                                         ItemPointerGetOffsetNumber(&(tuple.t_self)));
1428                                         tuple.t_datamcxt = NULL;
1429                                         tuple.t_data = (HeapTupleHeader) PageGetItem(Cpage, Citemid);
1430                                         tuple_len = tuple.t_len = ItemIdGetLength(Citemid);
1431
1432                                         /*
1433                                          * make a copy of the source tuple, and then mark the
1434                                          * source tuple MOVED_OFF.
1435                                          */
1436                                         heap_copytuple_with_tuple(&tuple, &newtup);
1437
1438                                         RelationInvalidateHeapTuple(onerel, &tuple);
1439
1440                                         TransactionIdStore(myXID, (TransactionId *) &(tuple.t_data->t_cmin));
1441                                         tuple.t_data->t_infomask &=
1442                                                 ~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_IN);
1443                                         tuple.t_data->t_infomask |= HEAP_MOVED_OFF;
1444
1445                                         /*
1446                                          * If this page was not used before - clean it.
1447                                          *
1448                                          * NOTE: a nasty bug used to lurk here.  It is possible
1449                                          * for the source and destination pages to be the same
1450                                          * (since this tuple-chain member can be on a page lower
1451                                          * than the one we're currently processing in the outer
1452                                          * loop).  If that's true, then after vacuum_page() the
1453                                          * source tuple will have been moved, and tuple.t_data
1454                                          * will be pointing at garbage.  Therefore we must do
1455                                          * everything that uses tuple.t_data BEFORE this step!!
1456                                          *
1457                                          * This path is different from the other callers of
1458                                          * vacuum_page, because we have already incremented the
1459                                          * vacpage's offsets_used field to account for the
1460                                          * tuple(s) we expect to move onto the page. Therefore
1461                                          * vacuum_page's check for offsets_used == 0 is
1462                                          * wrong. But since that's a good debugging check for
1463                                          * all other callers, we work around it here rather
1464                                          * than remove it.
1465                                          */
1466                                         if (!PageIsEmpty(ToPage) && vtmove[ti].cleanVpd)
1467                                         {
1468                                                 int                     sv_offsets_used = destvacpage->offsets_used;
1469
1470                                                 destvacpage->offsets_used = 0;
1471                                                 vacuum_page(ToPage, destvacpage);
1472                                                 destvacpage->offsets_used = sv_offsets_used;
1473                                         }
1474
1475                                         /*
1476                                          * Update the state of the copied tuple, and store it
1477                                          * on the destination page.
1478                                          */
1479                                         TransactionIdStore(myXID, (TransactionId *) &(newtup.t_data->t_cmin));
1480                                         newtup.t_data->t_infomask &=
1481                                                 ~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_OFF);
1482                                         newtup.t_data->t_infomask |= HEAP_MOVED_IN;
1483                                         newoff = PageAddItem(ToPage, (Item) newtup.t_data, tuple_len,
1484                                                                                  InvalidOffsetNumber, LP_USED);
1485                                         if (newoff == InvalidOffsetNumber)
1486                                         {
1487                                                 elog(STOP, "moving chain: failed to add item with len = %u to page %u",
1488                                                          tuple_len, destvacpage->blkno);
1489                                         }
1490                                         newitemid = PageGetItemId(ToPage, newoff);
1491                                         pfree(newtup.t_data);
1492                                         newtup.t_datamcxt = NULL;
1493                                         newtup.t_data = (HeapTupleHeader) PageGetItem(ToPage, newitemid);
1494                                         ItemPointerSet(&(newtup.t_self), destvacpage->blkno, newoff);
1495
1496 #ifdef XLOG
1497                                         {
1498                                                 XLogRecPtr      recptr = 
1499                                                         log_heap_move(onerel, tuple.t_self, &newtup);
1500
1501                                                 if (Cbuf != cur_buffer)
1502                                                 {
1503                                                         PageSetLSN(Cpage, recptr);
1504                                                         PageSetSUI(Cpage, ThisStartUpID);
1505                                                 }
1506                                                 PageSetLSN(ToPage, recptr);
1507                                                 PageSetSUI(ToPage, ThisStartUpID);
1508                                         }
1509 #endif
1510
1511                                         if (((int) destvacpage->blkno) > last_move_dest_block)
1512                                                 last_move_dest_block = destvacpage->blkno;
1513
1514                                         /*
1515                                          * Set new tuple's t_ctid pointing to itself for last
1516                                          * tuple in chain, and to next tuple in chain otherwise.
1517                                          */
1518                                         if (!ItemPointerIsValid(&Ctid))
1519                                                 newtup.t_data->t_ctid = newtup.t_self;
1520                                         else
1521                                                 newtup.t_data->t_ctid = Ctid;
1522                                         Ctid = newtup.t_self;
1523
1524                                         num_moved++;
1525
1526                                         /*
1527                                          * Remember that we moved tuple from the current page
1528                                          * (corresponding index tuple will be cleaned).
1529                                          */
1530                                         if (Cbuf == buf)
1531                                                 vacpage->offsets[vacpage->offsets_free++] =
1532                                                         ItemPointerGetOffsetNumber(&(tuple.t_self));
1533                                         else
1534                                                 keep_tuples++;
1535
1536                                         LockBuffer(cur_buffer, BUFFER_LOCK_UNLOCK);
1537                                         if (cur_buffer != Cbuf)
1538                                                 LockBuffer(Cbuf, BUFFER_LOCK_UNLOCK);
1539
1540                                         if (Irel != (Relation *) NULL)
1541                                         {
1542                                                 /*
1543                                                  * XXX using CurrentMemoryContext here means
1544                                                  * intra-vacuum memory leak for functional indexes.
1545                                                  * Should fix someday.
1546                                                  *
1547                                                  * XXX This code fails to handle partial indexes!
1548                                                  * Probably should change it to use ExecOpenIndices.
1549                                                  */
1550                                                 for (i = 0; i < nindices; i++)
1551                                                 {
1552                                                         FormIndexDatum(indexInfo[i],
1553                                                                                    &newtup,
1554                                                                                    tupdesc,
1555                                                                                    CurrentMemoryContext,
1556                                                                                    idatum,
1557                                                                                    inulls);
1558                                                         iresult = index_insert(Irel[i],
1559                                                                                                    idatum,
1560                                                                                                    inulls,
1561                                                                                                    &newtup.t_self,
1562                                                                                                    onerel);
1563                                                         if (iresult)
1564                                                                 pfree(iresult);
1565                                                 }
1566                                         }
1567                                         WriteBuffer(cur_buffer);
1568                                         WriteBuffer(Cbuf);
1569                                 }
1570                                 cur_buffer = InvalidBuffer;
1571                                 pfree(vtmove);
1572                                 chain_tuple_moved = true;
1573                                 continue;
1574                         }
1575
1576                         /* try to find new page for this tuple */
1577                         if (cur_buffer == InvalidBuffer ||
1578                                 !enough_space(cur_page, tuple_len))
1579                         {
1580                                 if (cur_buffer != InvalidBuffer)
1581                                 {
1582                                         WriteBuffer(cur_buffer);
1583                                         cur_buffer = InvalidBuffer;
1584
1585                                         /*
1586                                          * If previous target page is now too full to add *any*
1587                                          * tuple to it, remove it from fraged_pages.
1588                                          */
1589                                         if (!enough_space(cur_page, vacrelstats->min_tlen))
1590                                         {
1591                                                 Assert(num_fraged_pages > cur_item);
1592                                                 memmove(fraged_pages->pagedesc + cur_item,
1593                                                                 fraged_pages->pagedesc + cur_item + 1,
1594                                                                 sizeof(VacPage) * (num_fraged_pages - cur_item - 1));
1595                                                 num_fraged_pages--;
1596                                         }
1597                                 }
1598                                 for (i = 0; i < num_fraged_pages; i++)
1599                                 {
1600                                         if (enough_space(fraged_pages->pagedesc[i], tuple_len))
1601                                                 break;
1602                                 }
1603                                 if (i == num_fraged_pages)
1604                                         break;          /* can't move item anywhere */
1605                                 cur_item = i;
1606                                 cur_page = fraged_pages->pagedesc[cur_item];
1607                                 cur_buffer = ReadBuffer(onerel, cur_page->blkno);
1608                                 LockBuffer(cur_buffer, BUFFER_LOCK_EXCLUSIVE);
1609                                 ToPage = BufferGetPage(cur_buffer);
1610                                 /* if this page was not used before - clean it */
1611                                 if (!PageIsEmpty(ToPage) && cur_page->offsets_used == 0)
1612                                         vacuum_page(ToPage, cur_page);
1613                         }
1614                         else
1615                                 LockBuffer(cur_buffer, BUFFER_LOCK_EXCLUSIVE);
1616
1617                         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1618
1619                         /* copy tuple */
1620                         heap_copytuple_with_tuple(&tuple, &newtup);
1621
1622                         RelationInvalidateHeapTuple(onerel, &tuple);
1623
1624                         /*
1625                          * Mark new tuple as moved_in by vacuum and store vacuum XID
1626                          * in t_cmin !!!
1627                          */
1628                         TransactionIdStore(myXID, (TransactionId *) &(newtup.t_data->t_cmin));
1629                         newtup.t_data->t_infomask &=
1630                                 ~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_OFF);
1631                         newtup.t_data->t_infomask |= HEAP_MOVED_IN;
1632
1633                         /* add tuple to the page */
1634                         newoff = PageAddItem(ToPage, (Item) newtup.t_data, tuple_len,
1635                                                                  InvalidOffsetNumber, LP_USED);
1636                         if (newoff == InvalidOffsetNumber)
1637                         {
1638                                 elog(ERROR, "\
1639 failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
1640                                          tuple_len, cur_page->blkno, cur_page->free,
1641                                  cur_page->offsets_used, cur_page->offsets_free);
1642                         }
1643                         newitemid = PageGetItemId(ToPage, newoff);
1644                         pfree(newtup.t_data);
1645                         newtup.t_datamcxt = NULL;
1646                         newtup.t_data = (HeapTupleHeader) PageGetItem(ToPage, newitemid);
1647                         ItemPointerSet(&(newtup.t_data->t_ctid), cur_page->blkno, newoff);
1648                         newtup.t_self = newtup.t_data->t_ctid;
1649
1650                         /*
1651                          * Mark old tuple as moved_off by vacuum and store vacuum XID
1652                          * in t_cmin !!!
1653                          */
1654                         TransactionIdStore(myXID, (TransactionId *) &(tuple.t_data->t_cmin));
1655                         tuple.t_data->t_infomask &=
1656                                 ~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_IN);
1657                         tuple.t_data->t_infomask |= HEAP_MOVED_OFF;
1658
1659 #ifdef XLOG
1660                         {
1661                                 XLogRecPtr      recptr = 
1662                                         log_heap_move(onerel, tuple.t_self, &newtup);
1663
1664                                 PageSetLSN(page, recptr);
1665                                 PageSetSUI(page, ThisStartUpID);
1666                                 PageSetLSN(ToPage, recptr);
1667                                 PageSetSUI(ToPage, ThisStartUpID);
1668                         }
1669 #endif
1670
1671                         cur_page->offsets_used++;
1672                         num_moved++;
1673                         cur_page->free = ((PageHeader) ToPage)->pd_upper - ((PageHeader) ToPage)->pd_lower;
1674                         if (((int) cur_page->blkno) > last_move_dest_block)
1675                                 last_move_dest_block = cur_page->blkno;
1676
1677                         vacpage->offsets[vacpage->offsets_free++] = offnum;
1678
1679                         LockBuffer(cur_buffer, BUFFER_LOCK_UNLOCK);
1680                         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1681
1682                         /* insert index' tuples if needed */
1683                         if (Irel != (Relation *) NULL)
1684                         {
1685                                 /*
1686                                  * XXX using CurrentMemoryContext here means
1687                                  * intra-vacuum memory leak for functional indexes.
1688                                  * Should fix someday.
1689                                  *
1690                                  * XXX This code fails to handle partial indexes!
1691                                  * Probably should change it to use ExecOpenIndices.
1692                                  */
1693                                 for (i = 0; i < nindices; i++)
1694                                 {
1695                                         FormIndexDatum(indexInfo[i],
1696                                                                    &newtup,
1697                                                                    tupdesc,
1698                                                                    CurrentMemoryContext,
1699                                                                    idatum,
1700                                                                    inulls);
1701                                         iresult = index_insert(Irel[i],
1702                                                                                    idatum,
1703                                                                                    inulls,
1704                                                                                    &newtup.t_self,
1705                                                                                    onerel);
1706                                         if (iresult)
1707                                                 pfree(iresult);
1708                                 }
1709                         }
1710
1711                 }                                               /* walk along page */
1712
1713                 if (offnum < maxoff && keep_tuples > 0)
1714                 {
1715                         OffsetNumber off;
1716
1717                         for (off = OffsetNumberNext(offnum);
1718                                  off <= maxoff;
1719                                  off = OffsetNumberNext(off))
1720                         {
1721                                 itemid = PageGetItemId(page, off);
1722                                 if (!ItemIdIsUsed(itemid))
1723                                         continue;
1724                                 tuple.t_datamcxt = NULL;
1725                                 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1726                                 if (tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED)
1727                                         continue;
1728                                 if ((TransactionId) tuple.t_data->t_cmin != myXID)
1729                                         elog(ERROR, "Invalid XID in t_cmin (4)");
1730                                 if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
1731                                         elog(ERROR, "HEAP_MOVED_IN was not expected (2)");
1732                                 if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
1733                                 {
1734                                         /* some chains was moved while */
1735                                         if (chain_tuple_moved)
1736                                         {                       /* cleaning this page */
1737                                                 Assert(vacpage->offsets_free > 0);
1738                                                 for (i = 0; i < vacpage->offsets_free; i++)
1739                                                 {
1740                                                         if (vacpage->offsets[i] == off)
1741                                                                 break;
1742                                                 }
1743                                                 if (i >= vacpage->offsets_free) /* not found */
1744                                                 {
1745                                                         vacpage->offsets[vacpage->offsets_free++] = off;
1746                                                         Assert(keep_tuples > 0);
1747                                                         keep_tuples--;
1748                                                 }
1749                                         }
1750                                         else
1751                                         {
1752                                                 vacpage->offsets[vacpage->offsets_free++] = off;
1753                                                 Assert(keep_tuples > 0);
1754                                                 keep_tuples--;
1755                                         }
1756                                 }
1757                         }
1758                 }
1759
1760                 if (vacpage->offsets_free > 0)  /* some tuples were moved */
1761                 {
1762                         if (chain_tuple_moved)          /* else - they are ordered */
1763                         {
1764                                 qsort((char *) (vacpage->offsets), vacpage->offsets_free,
1765                                           sizeof(OffsetNumber), vac_cmp_offno);
1766                         }
1767                         reap_page(&Nvacpagelist, vacpage);
1768                         WriteBuffer(buf);
1769                 }
1770                 else if (dowrite)
1771                         WriteBuffer(buf);
1772                 else
1773                         ReleaseBuffer(buf);
1774
1775                 if (offnum <= maxoff)
1776                         break;                          /* some item(s) left */
1777
1778         }                                                       /* walk along relation */
1779
1780         blkno++;                                        /* new number of blocks */
1781
1782         if (cur_buffer != InvalidBuffer)
1783         {
1784                 Assert(num_moved > 0);
1785                 WriteBuffer(cur_buffer);
1786         }
1787
1788         if (num_moved > 0)
1789         {
1790
1791                 /*
1792                  * We have to commit our tuple' movings before we'll truncate
1793                  * relation, but we shouldn't lose our locks. And so - quick hack:
1794                  * flush buffers and record status of current transaction as
1795                  * committed, and continue. - vadim 11/13/96
1796                  */
1797                 FlushBufferPool();
1798                 TransactionIdCommit(myXID);
1799                 FlushBufferPool();
1800         }
1801
1802         /*
1803          * Clean uncleaned reaped pages from vacuum_pages list list and set
1804          * xmin committed for inserted tuples
1805          */
1806         checked_moved = 0;
1807         for (i = 0, curpage = vacuum_pages->pagedesc; i < vacuumed_pages; i++, curpage++)
1808         {
1809                 Assert((*curpage)->blkno < (BlockNumber) blkno);
1810                 buf = ReadBuffer(onerel, (*curpage)->blkno);
1811                 page = BufferGetPage(buf);
1812                 if ((*curpage)->offsets_used == 0)              /* this page was not used */
1813                 {
1814                         if (!PageIsEmpty(page))
1815                                 vacuum_page(page, *curpage);
1816                 }
1817                 else
1818 /* this page was used */
1819                 {
1820                         num_tuples = 0;
1821                         max_offset = PageGetMaxOffsetNumber(page);
1822                         for (newoff = FirstOffsetNumber;
1823                                  newoff <= max_offset;
1824                                  newoff = OffsetNumberNext(newoff))
1825                         {
1826                                 itemid = PageGetItemId(page, newoff);
1827                                 if (!ItemIdIsUsed(itemid))
1828                                         continue;
1829                                 tuple.t_datamcxt = NULL;
1830                                 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1831                                 if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
1832                                 {
1833                                         if ((TransactionId) tuple.t_data->t_cmin != myXID)
1834                                                 elog(ERROR, "Invalid XID in t_cmin (2)");
1835                                         if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
1836                                         {
1837                                                 tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
1838                                                 num_tuples++;
1839                                         }
1840                                         else if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
1841                                                 tuple.t_data->t_infomask |= HEAP_XMIN_INVALID;
1842                                         else
1843                                                 elog(ERROR, "HEAP_MOVED_OFF/HEAP_MOVED_IN was expected");
1844                                 }
1845                         }
1846                         Assert((*curpage)->offsets_used == num_tuples);
1847                         checked_moved += num_tuples;
1848                 }
1849                 WriteBuffer(buf);
1850         }
1851         Assert(num_moved == checked_moved);
1852
1853         elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u; Tuple(s) moved: %u. %s",
1854                  RelationGetRelationName(onerel),
1855                  nblocks, blkno, num_moved,
1856                  show_rusage(&ru0));
1857
1858         if (Nvacpagelist.num_pages > 0)
1859         {
1860                 /* vacuum indices again if needed */
1861                 if (Irel != (Relation *) NULL)
1862                 {
1863                         VacPage    *vpleft,
1864                                            *vpright,
1865                                                 vpsave;
1866
1867                         /* re-sort Nvacpagelist.pagedesc */
1868                         for (vpleft = Nvacpagelist.pagedesc,
1869                                  vpright = Nvacpagelist.pagedesc + Nvacpagelist.num_pages - 1;
1870                                  vpleft < vpright; vpleft++, vpright--)
1871                         {
1872                                 vpsave = *vpleft;
1873                                 *vpleft = *vpright;
1874                                 *vpright = vpsave;
1875                         }
1876                         Assert(keep_tuples >= 0);
1877                         for (i = 0; i < nindices; i++)
1878                                 vacuum_index(&Nvacpagelist, Irel[i],
1879                                                          vacrelstats->num_tuples, keep_tuples);
1880                 }
1881
1882                 /* clean moved tuples from last page in Nvacpagelist list */
1883                 if (vacpage->blkno == (BlockNumber) (blkno - 1) &&
1884                         vacpage->offsets_free > 0)
1885                 {
1886                         buf = ReadBuffer(onerel, vacpage->blkno);
1887                         page = BufferGetPage(buf);
1888                         num_tuples = 0;
1889                         for (offnum = FirstOffsetNumber;
1890                                  offnum <= maxoff;
1891                                  offnum = OffsetNumberNext(offnum))
1892                         {
1893                                 itemid = PageGetItemId(page, offnum);
1894                                 if (!ItemIdIsUsed(itemid))
1895                                         continue;
1896                                 tuple.t_datamcxt = NULL;
1897                                 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1898
1899                                 if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
1900                                 {
1901                                         if ((TransactionId) tuple.t_data->t_cmin != myXID)
1902                                                 elog(ERROR, "Invalid XID in t_cmin (3)");
1903                                         if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
1904                                         {
1905                                                 itemid->lp_flags &= ~LP_USED;
1906                                                 num_tuples++;
1907                                         }
1908                                         else
1909                                                 elog(ERROR, "HEAP_MOVED_OFF was expected (2)");
1910                                 }
1911
1912                         }
1913                         Assert(vacpage->offsets_free == num_tuples);
1914                         PageRepairFragmentation(page);
1915                         WriteBuffer(buf);
1916                 }
1917
1918                 /* now - free new list of reaped pages */
1919                 curpage = Nvacpagelist.pagedesc;
1920                 for (i = 0; i < Nvacpagelist.num_pages; i++, curpage++)
1921                         pfree(*curpage);
1922                 pfree(Nvacpagelist.pagedesc);
1923         }
1924
1925         /*
1926          * Flush dirty pages out to disk.  We do this unconditionally, even if
1927          * we don't need to truncate, because we want to ensure that all tuples
1928          * have correct on-row commit status on disk (see bufmgr.c's comments
1929          * for FlushRelationBuffers()).
1930          */
1931         i = FlushRelationBuffers(onerel, blkno);
1932         if (i < 0)
1933                 elog(ERROR, "VACUUM (repair_frag): FlushRelationBuffers returned %d",
1934                          i);
1935
1936         /* truncate relation, if needed */
1937         if (blkno < nblocks)
1938         {
1939                 blkno = smgrtruncate(DEFAULT_SMGR, onerel, blkno);
1940                 Assert(blkno >= 0);
1941                 vacrelstats->num_pages = blkno; /* set new number of blocks */
1942         }
1943
1944         if (Irel != (Relation *) NULL)          /* pfree index' allocations */
1945         {
1946                 close_indices(nindices, Irel);
1947                 pfree(indexInfo);
1948         }
1949
1950         pfree(vacpage);
1951         if (vacrelstats->vtlinks != NULL)
1952                 pfree(vacrelstats->vtlinks);
1953 }
1954
1955 /*
1956  *      vacuum_heap() -- free dead tuples
1957  *
1958  *              This routine marks dead tuples as unused and truncates relation
1959  *              if there are "empty" end-blocks.
1960  */
1961 static void
1962 vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
1963 {
1964         Buffer          buf;
1965         Page            page;
1966         VacPage    *vacpage;
1967         int                     nblocks;
1968         int                     i;
1969
1970         nblocks = vacuum_pages->num_pages;
1971         nblocks -= vacuum_pages->empty_end_pages;               /* nothing to do with
1972                                                                                                          * them */
1973
1974         for (i = 0, vacpage = vacuum_pages->pagedesc; i < nblocks; i++, vacpage++)
1975         {
1976                 if ((*vacpage)->offsets_free > 0)
1977                 {
1978                         buf = ReadBuffer(onerel, (*vacpage)->blkno);
1979                         page = BufferGetPage(buf);
1980                         vacuum_page(page, *vacpage);
1981                         WriteBuffer(buf);
1982                 }
1983         }
1984
1985         /*
1986          * Flush dirty pages out to disk.  We do this unconditionally, even if
1987          * we don't need to truncate, because we want to ensure that all tuples
1988          * have correct on-row commit status on disk (see bufmgr.c's comments
1989          * for FlushRelationBuffers()).
1990          */
1991         Assert(vacrelstats->num_pages >= vacuum_pages->empty_end_pages);
1992         nblocks = vacrelstats->num_pages - vacuum_pages->empty_end_pages;
1993
1994         i = FlushRelationBuffers(onerel, nblocks);
1995         if (i < 0)
1996                 elog(ERROR, "VACUUM (vacuum_heap): FlushRelationBuffers returned %d",
1997                          i);
1998
1999         /* truncate relation if there are some empty end-pages */
2000         if (vacuum_pages->empty_end_pages > 0)
2001         {
2002                 elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u.",
2003                          RelationGetRelationName(onerel),
2004                          vacrelstats->num_pages, nblocks);
2005                 nblocks = smgrtruncate(DEFAULT_SMGR, onerel, nblocks);
2006                 Assert(nblocks >= 0);
2007                 vacrelstats->num_pages = nblocks; /* set new number of blocks */
2008         }
2009 }
2010
2011 /*
2012  *      vacuum_page() -- free dead tuples on a page
2013  *                                       and repair its fragmentation.
2014  */
2015 static void
2016 vacuum_page(Page page, VacPage vacpage)
2017 {
2018         ItemId          itemid;
2019         int                     i;
2020
2021         /* There shouldn't be any tuples moved onto the page yet! */
2022         Assert(vacpage->offsets_used == 0);
2023
2024         for (i = 0; i < vacpage->offsets_free; i++)
2025         {
2026                 itemid = &(((PageHeader) page)->pd_linp[vacpage->offsets[i] - 1]);
2027                 itemid->lp_flags &= ~LP_USED;
2028         }
2029         PageRepairFragmentation(page);
2030
2031 }
2032
2033 /*
2034  *      _scan_index() -- scan one index relation to update statistic.
2035  *
2036  */
2037 static void
2038 scan_index(Relation indrel, int num_tuples)
2039 {
2040         RetrieveIndexResult res;
2041         IndexScanDesc iscan;
2042         int                     nitups;
2043         int                     nipages;
2044         struct rusage ru0;
2045
2046         getrusage(RUSAGE_SELF, &ru0);
2047
2048         /* walk through the entire index */
2049         iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL);
2050         nitups = 0;
2051
2052         while ((res = index_getnext(iscan, ForwardScanDirection))
2053                    != (RetrieveIndexResult) NULL)
2054         {
2055                 nitups++;
2056                 pfree(res);
2057         }
2058
2059         index_endscan(iscan);
2060
2061         /* now update statistics in pg_class */
2062         nipages = RelationGetNumberOfBlocks(indrel);
2063         update_relstats(RelationGetRelid(indrel), nipages, nitups, false, NULL);
2064
2065         elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. %s",
2066                  RelationGetRelationName(indrel), nipages, nitups,
2067                  show_rusage(&ru0));
2068
2069         if (nitups != num_tuples)
2070                 elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
2071 \n\tRecreate the index.",
2072                          RelationGetRelationName(indrel), nitups, num_tuples);
2073
2074 }
2075
2076 /*
2077  *      vacuum_index() -- vacuum one index relation.
2078  *
2079  *              Vpl is the VacPageList of the heap we're currently vacuuming.
2080  *              It's locked. Indrel is an index relation on the vacuumed heap.
2081  *              We don't set locks on the index relation here, since the indexed
2082  *              access methods support locking at different granularities.
2083  *              We let them handle it.
2084  *
2085  *              Finally, we arrange to update the index relation's statistics in
2086  *              pg_class.
2087  */
2088 static void
2089 vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples)
2090 {
2091         RetrieveIndexResult res;
2092         IndexScanDesc iscan;
2093         ItemPointer heapptr;
2094         int                     tups_vacuumed;
2095         int                     num_index_tuples;
2096         int                     num_pages;
2097         VacPage         vp;
2098         struct rusage ru0;
2099
2100         getrusage(RUSAGE_SELF, &ru0);
2101
2102         /* walk through the entire index */
2103         iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL);
2104         tups_vacuumed = 0;
2105         num_index_tuples = 0;
2106
2107         while ((res = index_getnext(iscan, ForwardScanDirection))
2108                    != (RetrieveIndexResult) NULL)
2109         {
2110                 heapptr = &res->heap_iptr;
2111
2112                 if ((vp = tid_reaped(heapptr, vacpagelist)) != (VacPage) NULL)
2113                 {
2114 #ifdef NOT_USED
2115                         elog(DEBUG, "<%x,%x> -> <%x,%x>",
2116                                  ItemPointerGetBlockNumber(&(res->index_iptr)),
2117                                  ItemPointerGetOffsetNumber(&(res->index_iptr)),
2118                                  ItemPointerGetBlockNumber(&(res->heap_iptr)),
2119                                  ItemPointerGetOffsetNumber(&(res->heap_iptr)));
2120 #endif
2121                         if (vp->offsets_free == 0)
2122                         {
2123                                 elog(NOTICE, "Index %s: pointer to EmptyPage (blk %u off %u) - fixing",
2124                                          RelationGetRelationName(indrel),
2125                                          vp->blkno, ItemPointerGetOffsetNumber(heapptr));
2126                         }
2127                         ++tups_vacuumed;
2128                         index_delete(indrel, &res->index_iptr);
2129                 }
2130                 else
2131                         num_index_tuples++;
2132
2133                 pfree(res);
2134         }
2135
2136         index_endscan(iscan);
2137
2138         /* now update statistics in pg_class */
2139         num_pages = RelationGetNumberOfBlocks(indrel);
2140         update_relstats(RelationGetRelid(indrel), num_pages, num_index_tuples, false, NULL);
2141
2142         elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. %s",
2143                  RelationGetRelationName(indrel), num_pages,
2144                  num_index_tuples - keep_tuples, tups_vacuumed,
2145                  show_rusage(&ru0));
2146
2147         if (num_index_tuples != num_tuples + keep_tuples)
2148                 elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
2149 \n\tRecreate the index.",
2150                   RelationGetRelationName(indrel), num_index_tuples, num_tuples);
2151
2152 }
2153
2154 /*
2155  *      tid_reaped() -- is a particular tid reaped?
2156  *
2157  *              vacpagelist->VacPage_array is sorted in right order.
2158  */
2159 static VacPage
2160 tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
2161 {
2162         OffsetNumber ioffno;
2163         OffsetNumber *voff;
2164         VacPage         vp,
2165                            *vpp;
2166         VacPageData vacpage;
2167
2168         vacpage.blkno = ItemPointerGetBlockNumber(itemptr);
2169         ioffno = ItemPointerGetOffsetNumber(itemptr);
2170
2171         vp = &vacpage;
2172         vpp = (VacPage *) vac_find_eq((void *) (vacpagelist->pagedesc),
2173                                         vacpagelist->num_pages, sizeof(VacPage), (void *) &vp,
2174                                                                         vac_cmp_blk);
2175
2176         if (vpp == (VacPage *) NULL)
2177                 return (VacPage) NULL;
2178         vp = *vpp;
2179
2180         /* ok - we are on true page */
2181
2182         if (vp->offsets_free == 0)
2183         {                                                       /* this is EmptyPage !!! */
2184                 return vp;
2185         }
2186
2187         voff = (OffsetNumber *) vac_find_eq((void *) (vp->offsets),
2188                         vp->offsets_free, sizeof(OffsetNumber), (void *) &ioffno,
2189                                                                            vac_cmp_offno);
2190
2191         if (voff == (OffsetNumber *) NULL)
2192                 return (VacPage) NULL;
2193
2194         return vp;
2195
2196 }
2197
2198 /*
2199  *      update_relstats() -- update statistics for one relation
2200  *
2201  *              Statistics are stored in several places: the pg_class row for the
2202  *              relation has stats about the whole relation, the pg_attribute rows
2203  *              for each attribute store "dispersion", and there is a pg_statistic
2204  *              row for each (non-system) attribute.  (Dispersion probably ought to
2205  *              be moved to pg_statistic, but it's not worth doing unless there's
2206  *              another reason to have to change pg_attribute.)  Dispersion and
2207  *              pg_statistic values are only updated by VACUUM ANALYZE, but we
2208  *              always update the stats in pg_class.
2209  *
2210  *              This routine works for both index and heap relation entries in
2211  *              pg_class.  We violate no-overwrite semantics here by storing new
2212  *              values for the statistics columns directly into the pg_class
2213  *              tuple that's already on the page.  The reason for this is that if
2214  *              we updated these tuples in the usual way, vacuuming pg_class itself
2215  *              wouldn't work very well --- by the time we got done with a vacuum
2216  *              cycle, most of the tuples in pg_class would've been obsoleted.
2217  *              Updating pg_class's own statistics would be especially tricky.
2218  *              Of course, this only works for fixed-size never-null columns, but
2219  *              these are.
2220  */
2221 static void
2222 update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex,
2223                         VRelStats *vacrelstats)
2224 {
2225         Relation        rd;
2226         HeapTupleData rtup;
2227         HeapTuple       ctup;
2228         Form_pg_class pgcform;
2229         Buffer          buffer;
2230
2231         /*
2232          * update number of tuples and number of pages in pg_class
2233          */
2234         rd = heap_openr(RelationRelationName, RowExclusiveLock);
2235
2236         ctup = SearchSysCacheTupleCopy(RELOID,
2237                                                                    ObjectIdGetDatum(relid),
2238                                                                    0, 0, 0);
2239         if (!HeapTupleIsValid(ctup))
2240                 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
2241                          relid);
2242
2243         /* get the buffer cache tuple */
2244         rtup.t_self = ctup->t_self;
2245         heap_fetch(rd, SnapshotNow, &rtup, &buffer);
2246         heap_freetuple(ctup);
2247
2248         /* overwrite the existing statistics in the tuple */
2249         pgcform = (Form_pg_class) GETSTRUCT(&rtup);
2250         pgcform->reltuples = num_tuples;
2251         pgcform->relpages = num_pages;
2252         pgcform->relhasindex = hasindex;
2253
2254         /* invalidate the tuple in the cache and write the buffer */
2255         RelationInvalidateHeapTuple(rd, &rtup);
2256         WriteBuffer(buffer);
2257
2258         heap_close(rd, RowExclusiveLock);
2259 }
2260
2261 /*
2262  *      reap_page() -- save a page on the array of reaped pages.
2263  *
2264  *              As a side effect of the way that the vacuuming loop for a given
2265  *              relation works, higher pages come after lower pages in the array
2266  *              (and highest tid on a page is last).
2267  */
2268 static void
2269 reap_page(VacPageList vacpagelist, VacPage vacpage)
2270 {
2271         VacPage newvacpage;
2272
2273         /* allocate a VacPageData entry */
2274         newvacpage = (VacPage) palloc(sizeof(VacPageData) + vacpage->offsets_free * sizeof(OffsetNumber));
2275
2276         /* fill it in */
2277         if (vacpage->offsets_free > 0)
2278                 memmove(newvacpage->offsets, vacpage->offsets, vacpage->offsets_free * sizeof(OffsetNumber));
2279         newvacpage->blkno = vacpage->blkno;
2280         newvacpage->free = vacpage->free;
2281         newvacpage->offsets_used = vacpage->offsets_used;
2282         newvacpage->offsets_free = vacpage->offsets_free;
2283
2284         /* insert this page into vacpagelist list */
2285         vpage_insert(vacpagelist, newvacpage);
2286
2287 }
2288
2289 static void
2290 vpage_insert(VacPageList vacpagelist, VacPage vpnew)
2291 {
2292 #define PG_NPAGEDESC 1024
2293
2294         /* allocate a VacPage entry if needed */
2295         if (vacpagelist->num_pages == 0)
2296         {
2297                 vacpagelist->pagedesc = (VacPage *) palloc(PG_NPAGEDESC * sizeof(VacPage));
2298                 vacpagelist->num_allocated_pages = PG_NPAGEDESC;
2299         }
2300         else if (vacpagelist->num_pages >= vacpagelist->num_allocated_pages)
2301         {
2302                 vacpagelist->num_allocated_pages *= 2;
2303                 vacpagelist->pagedesc = (VacPage *) repalloc(vacpagelist->pagedesc, vacpagelist->num_allocated_pages * sizeof(VacPage));
2304         }
2305         vacpagelist->pagedesc[vacpagelist->num_pages] = vpnew;
2306         (vacpagelist->num_pages)++;
2307
2308 }
2309
2310 static void *
2311 vac_find_eq(void *bot, int nelem, int size, void *elm,
2312                    int (*compar) (const void *, const void *))
2313 {
2314         int                     res;
2315         int                     last = nelem - 1;
2316         int                     celm = nelem / 2;
2317         bool            last_move,
2318                                 first_move;
2319
2320         last_move = first_move = true;
2321         for (;;)
2322         {
2323                 if (first_move == true)
2324                 {
2325                         res = compar(bot, elm);
2326                         if (res > 0)
2327                                 return NULL;
2328                         if (res == 0)
2329                                 return bot;
2330                         first_move = false;
2331                 }
2332                 if (last_move == true)
2333                 {
2334                         res = compar(elm, (void *) ((char *) bot + last * size));
2335                         if (res > 0)
2336                                 return NULL;
2337                         if (res == 0)
2338                                 return (void *) ((char *) bot + last * size);
2339                         last_move = false;
2340                 }
2341                 res = compar(elm, (void *) ((char *) bot + celm * size));
2342                 if (res == 0)
2343                         return (void *) ((char *) bot + celm * size);
2344                 if (res < 0)
2345                 {
2346                         if (celm == 0)
2347                                 return NULL;
2348                         last = celm - 1;
2349                         celm = celm / 2;
2350                         last_move = true;
2351                         continue;
2352                 }
2353
2354                 if (celm == last)
2355                         return NULL;
2356
2357                 last = last - celm - 1;
2358                 bot = (void *) ((char *) bot + (celm + 1) * size);
2359                 celm = (last + 1) / 2;
2360                 first_move = true;
2361         }
2362
2363 }
2364
2365 static int
2366 vac_cmp_blk(const void *left, const void *right)
2367 {
2368         BlockNumber lblk,
2369                                 rblk;
2370
2371         lblk = (*((VacPage *) left))->blkno;
2372         rblk = (*((VacPage *) right))->blkno;
2373
2374         if (lblk < rblk)
2375                 return -1;
2376         if (lblk == rblk)
2377                 return 0;
2378         return 1;
2379
2380 }
2381
2382 static int
2383 vac_cmp_offno(const void *left, const void *right)
2384 {
2385
2386         if (*(OffsetNumber *) left < *(OffsetNumber *) right)
2387                 return -1;
2388         if (*(OffsetNumber *) left == *(OffsetNumber *) right)
2389                 return 0;
2390         return 1;
2391
2392 }
2393
2394 static int
2395 vac_cmp_vtlinks(const void *left, const void *right)
2396 {
2397
2398         if (((VTupleLink) left)->new_tid.ip_blkid.bi_hi <
2399                 ((VTupleLink) right)->new_tid.ip_blkid.bi_hi)
2400                 return -1;
2401         if (((VTupleLink) left)->new_tid.ip_blkid.bi_hi >
2402                 ((VTupleLink) right)->new_tid.ip_blkid.bi_hi)
2403                 return 1;
2404         /* bi_hi-es are equal */
2405         if (((VTupleLink) left)->new_tid.ip_blkid.bi_lo <
2406                 ((VTupleLink) right)->new_tid.ip_blkid.bi_lo)
2407                 return -1;
2408         if (((VTupleLink) left)->new_tid.ip_blkid.bi_lo >
2409                 ((VTupleLink) right)->new_tid.ip_blkid.bi_lo)
2410                 return 1;
2411         /* bi_lo-es are equal */
2412         if (((VTupleLink) left)->new_tid.ip_posid <
2413                 ((VTupleLink) right)->new_tid.ip_posid)
2414                 return -1;
2415         if (((VTupleLink) left)->new_tid.ip_posid >
2416                 ((VTupleLink) right)->new_tid.ip_posid)
2417                 return 1;
2418         return 0;
2419
2420 }
2421
2422
2423 static void
2424 get_indices(Relation relation, int *nindices, Relation **Irel)
2425 {
2426         List       *indexoidlist,
2427                            *indexoidscan;
2428         int                     i;
2429
2430         indexoidlist = RelationGetIndexList(relation);
2431
2432         *nindices = length(indexoidlist);
2433
2434         if (*nindices > 0)
2435                 *Irel = (Relation *) palloc(*nindices * sizeof(Relation));
2436         else
2437                 *Irel = NULL;
2438
2439         i = 0;
2440         foreach(indexoidscan, indexoidlist)
2441         {
2442                 Oid                     indexoid = lfirsti(indexoidscan);
2443
2444                 (*Irel)[i] = index_open(indexoid);
2445                 i++;
2446         }
2447
2448         freeList(indexoidlist);
2449 }
2450
2451
2452 static void
2453 close_indices(int nindices, Relation *Irel)
2454 {
2455
2456         if (Irel == (Relation *) NULL)
2457                 return;
2458
2459         while (nindices--)
2460                 index_close(Irel[nindices]);
2461         pfree(Irel);
2462
2463 }
2464
2465
2466 /*
2467  * Obtain IndexInfo data for each index on the rel
2468  */
2469 static IndexInfo **
2470 get_index_desc(Relation onerel, int nindices, Relation *Irel)
2471 {
2472         IndexInfo **indexInfo;
2473         int                     i;
2474         HeapTuple       cachetuple;
2475
2476         indexInfo = (IndexInfo **) palloc(nindices * sizeof(IndexInfo *));
2477
2478         for (i = 0; i < nindices; i++)
2479         {
2480                 cachetuple = SearchSysCacheTuple(INDEXRELID,
2481                                                          ObjectIdGetDatum(RelationGetRelid(Irel[i])),
2482                                                                                  0, 0, 0);
2483                 if (!HeapTupleIsValid(cachetuple))
2484                         elog(ERROR, "get_index_desc: index %u not found",
2485                                  RelationGetRelid(Irel[i]));
2486                 indexInfo[i] = BuildIndexInfo(cachetuple);
2487         }
2488
2489         return indexInfo;
2490 }
2491
2492
2493 static bool
2494 enough_space(VacPage vacpage, Size len)
2495 {
2496
2497         len = MAXALIGN(len);
2498
2499         if (len > vacpage->free)
2500                 return false;
2501
2502         if (vacpage->offsets_used < vacpage->offsets_free)      /* there are free
2503                                                                                                                  * itemid(s) */
2504                 return true;                    /* and len <= free_space */
2505
2506         /* ok. noff_usd >= noff_free and so we'll have to allocate new itemid */
2507         if (len + MAXALIGN(sizeof(ItemIdData)) <= vacpage->free)
2508                 return true;
2509
2510         return false;
2511
2512 }
2513
2514
2515 /*
2516  * Compute elapsed time since ru0 usage snapshot, and format into
2517  * a displayable string.  Result is in a static string, which is
2518  * tacky, but no one ever claimed that the Postgres backend is
2519  * threadable...
2520  */
2521 static char *
2522 show_rusage(struct rusage * ru0)
2523 {
2524         static char result[64];
2525         struct rusage ru1;
2526
2527         getrusage(RUSAGE_SELF, &ru1);
2528
2529         if (ru1.ru_stime.tv_usec < ru0->ru_stime.tv_usec)
2530         {
2531                 ru1.ru_stime.tv_sec--;
2532                 ru1.ru_stime.tv_usec += 1000000;
2533         }
2534         if (ru1.ru_utime.tv_usec < ru0->ru_utime.tv_usec)
2535         {
2536                 ru1.ru_utime.tv_sec--;
2537                 ru1.ru_utime.tv_usec += 1000000;
2538         }
2539
2540         snprintf(result, sizeof(result),
2541                          "CPU %d.%02ds/%d.%02du sec.",
2542                          (int) (ru1.ru_stime.tv_sec - ru0->ru_stime.tv_sec),
2543                          (int) (ru1.ru_stime.tv_usec - ru0->ru_stime.tv_usec) / 10000,
2544                          (int) (ru1.ru_utime.tv_sec - ru0->ru_utime.tv_sec),
2545                    (int) (ru1.ru_utime.tv_usec - ru0->ru_utime.tv_usec) / 10000);
2546
2547         return result;
2548 }