1 /*-------------------------------------------------------------------------
4 * the postgres vacuum cleaner
6 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.172 2000/11/16 05:50:59 momjian Exp $
14 *-------------------------------------------------------------------------
16 #include <sys/types.h>
24 #include "access/genam.h"
25 #include "access/heapam.h"
26 #include "catalog/catalog.h"
27 #include "catalog/catname.h"
28 #include "catalog/index.h"
29 #include "commands/vacuum.h"
30 #include "miscadmin.h"
31 #include "nodes/execnodes.h"
32 #include "storage/sinval.h"
33 #include "storage/smgr.h"
34 #include "tcop/tcopprot.h"
35 #include "utils/acl.h"
36 #include "utils/builtins.h"
37 #include "utils/fmgroids.h"
38 #include "utils/inval.h"
39 #include "utils/relcache.h"
40 #include "utils/syscache.h"
41 #include "utils/temprel.h"
43 #ifndef HAVE_GETRUSAGE
44 #include "rusagestub.h"
47 #include <sys/resource.h>
51 #include "access/xlog.h"
52 XLogRecPtr log_heap_move(Relation reln,
53 ItemPointerData from, HeapTuple newtup);
56 static MemoryContext vac_context = NULL;
58 static int MESSAGE_LEVEL; /* message level */
60 static TransactionId XmaxRecent;
62 /* non-export function prototypes */
63 static void vacuum_init(void);
64 static void vacuum_shutdown(void);
65 static void vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2);
66 static VRelList getrels(NameData *VacRelP);
67 static void vacuum_rel(Oid relid, bool analyze, bool is_toastrel);
68 static void scan_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages);
69 static void repair_frag(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages, int nindices, Relation *Irel);
70 static void vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacpagelist);
71 static void vacuum_page(Page page, VacPage vacpage);
72 static void vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples);
73 static void scan_index(Relation indrel, int num_tuples);
74 static void update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats);
75 static VacPage tid_reaped(ItemPointer itemptr, VacPageList vacpagelist);
76 static void reap_page(VacPageList vacpagelist, VacPage vacpage);
77 static void vpage_insert(VacPageList vacpagelist, VacPage vpnew);
78 static void get_indices(Relation relation, int *nindices, Relation **Irel);
79 static void close_indices(int nindices, Relation *Irel);
80 static IndexInfo **get_index_desc(Relation onerel, int nindices,
82 static void *vac_find_eq(void *bot, int nelem, int size, void *elm,
83 int (*compar) (const void *, const void *));
84 static int vac_cmp_blk(const void *left, const void *right);
85 static int vac_cmp_offno(const void *left, const void *right);
86 static int vac_cmp_vtlinks(const void *left, const void *right);
87 static bool enough_space(VacPage vacpage, Size len);
88 static char *show_rusage(struct rusage * ru0);
92 vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
98 List *anal_cols2 = NIL;
100 if (anal_cols != NIL && !analyze)
101 elog(ERROR, "Can't vacuum columns, only tables. You can 'vacuum analyze' columns.");
104 * We cannot run VACUUM inside a user transaction block; if we were
105 * inside a transaction, then our commit- and
106 * start-transaction-command calls would not have the intended effect!
107 * Furthermore, the forced commit that occurs before truncating the
108 * relation's file would have the effect of committing the rest of the
109 * user's transaction too, which would certainly not be the desired
112 if (IsTransactionBlock())
113 elog(ERROR, "VACUUM cannot run inside a BEGIN/END block");
116 MESSAGE_LEVEL = NOTICE;
118 MESSAGE_LEVEL = DEBUG;
121 * Create special memory context for cross-transaction storage.
123 * Since it is a child of QueryContext, it will go away eventually
124 * even if we suffer an error; there's no need for special abort
127 vac_context = AllocSetContextCreate(QueryContext,
129 ALLOCSET_DEFAULT_MINSIZE,
130 ALLOCSET_DEFAULT_INITSIZE,
131 ALLOCSET_DEFAULT_MAXSIZE);
133 /* vacrel gets de-allocated on xact commit, so copy it to safe storage */
136 namestrcpy(&VacRel, vacrel);
137 VacRelName = &VacRel;
142 /* must also copy the column list, if any, to safe storage */
143 old = MemoryContextSwitchTo(vac_context);
144 foreach(le, anal_cols)
146 char *col = (char *) lfirst(le);
148 anal_cols2 = lappend(anal_cols2, pstrdup(col));
150 MemoryContextSwitchTo(old);
153 * Start up the vacuum cleaner.
155 * NOTE: since this commits the current transaction, the memory holding
156 * any passed-in parameters gets freed here. We must have already
157 * copied pass-by-reference parameters to safe storage. Don't make me
162 /* vacuum the database */
163 vac_vacuum(VacRelName, analyze, anal_cols2);
170 * vacuum_init(), vacuum_shutdown() -- start up and shut down the vacuum cleaner.
172 * Formerly, there was code here to prevent more than one VACUUM from
173 * executing concurrently in the same database. However, there's no
174 * good reason to prevent that, and manually removing lockfiles after
175 * a vacuum crash was a pain for dbadmins. So, forget about lockfiles,
176 * and just rely on the exclusive lock we grab on each target table
177 * to ensure that there aren't two VACUUMs running on the same table
180 * The strangeness with committing and starting transactions in the
181 * init and shutdown routines is due to the fact that the vacuum cleaner
182 * is invoked via an SQL command, and so is already executing inside
183 * a transaction. We need to leave ourselves in a predictable state
184 * on entry and exit to the vacuum cleaner. We commit the transaction
185 * started in PostgresMain() inside vacuum_init(), and start one in
186 * vacuum_shutdown() to match the commit waiting for us back in
192 /* matches the StartTransaction in PostgresMain() */
193 CommitTransactionCommand();
199 /* on entry, we are not in a transaction */
202 * Flush the init file that relcache.c uses to save startup time. The
203 * next backend startup will rebuild the init file with up-to-date
204 * information from pg_class. This lets the optimizer see the stats
205 * that we've collected for certain critical system indexes. See
206 * relcache.c for more details.
208 * Ignore any failure to unlink the file, since it might not be there if
209 * no backend has been started since the last vacuum...
211 unlink(RELCACHE_INIT_FILENAME);
213 /* matches the CommitTransaction in PostgresMain() */
214 StartTransactionCommand();
217 * Clean up working storage --- note we must do this after
218 * StartTransactionCommand, else we might be trying to delete
219 * the active context!
221 MemoryContextDelete(vac_context);
226 * vac_vacuum() -- vacuum the database.
228 * This routine builds a list of relations to vacuum, and then calls
229 * code that vacuums them one at a time. We are careful to vacuum each
230 * relation in a separate transaction in order to avoid holding too many
234 vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2)
239 /* get list of relations */
240 vrl = getrels(VacRelP);
242 /* vacuum each heap relation */
243 for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
245 vacuum_rel(cur->vrl_relid, analyze, false);
246 /* analyze separately so locking is minimized */
248 analyze_rel(cur->vrl_relid, anal_cols2, MESSAGE_LEVEL);
253 getrels(NameData *VacRelP)
268 StartTransactionCommand();
270 if (NameStr(*VacRelP))
274 * we could use the cache here, but it is clearer to use scankeys
275 * for both vacuum cases, bjm 2000/01/19
277 char *nontemp_relname;
279 /* We must re-map temp table names bjm 2000-04-06 */
280 if ((nontemp_relname =
281 get_temp_rel_by_username(NameStr(*VacRelP))) == NULL)
282 nontemp_relname = NameStr(*VacRelP);
284 ScanKeyEntryInitialize(&key, 0x0, Anum_pg_class_relname,
286 PointerGetDatum(nontemp_relname));
290 ScanKeyEntryInitialize(&key, 0x0, Anum_pg_class_relkind,
291 F_CHAREQ, CharGetDatum('r'));
294 vrl = cur = (VRelList) NULL;
296 rel = heap_openr(RelationRelationName, AccessShareLock);
297 tupdesc = RelationGetDescr(rel);
299 scan = heap_beginscan(rel, false, SnapshotNow, 1, &key);
301 while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
305 d = heap_getattr(tuple, Anum_pg_class_relname, tupdesc, &n);
308 d = heap_getattr(tuple, Anum_pg_class_relkind, tupdesc, &n);
310 rkind = DatumGetChar(d);
312 if (rkind != RELKIND_RELATION)
314 elog(NOTICE, "Vacuum: can not process indecies, views and certain system tables");
318 /* get a relation list entry for this guy */
319 if (vrl == (VRelList) NULL)
320 vrl = cur = (VRelList)
321 MemoryContextAlloc(vac_context, sizeof(VRelListData));
324 cur->vrl_next = (VRelList)
325 MemoryContextAlloc(vac_context, sizeof(VRelListData));
329 cur->vrl_relid = tuple->t_data->t_oid;
330 cur->vrl_next = (VRelList) NULL;
334 heap_close(rel, AccessShareLock);
337 elog(NOTICE, "Vacuum: table not found");
339 CommitTransactionCommand();
345 * vacuum_rel() -- vacuum one heap relation
347 * This routine vacuums a single heap, cleans out its indices, and
348 * updates its statistics num_pages and num_tuples statistics.
350 * Doing one heap at a time incurs extra overhead, since we need to
351 * check that the heap exists again just before we vacuum it. The
352 * reason that we do this is so that vacuuming can be spread across
353 * many small transactions. Otherwise, two-phase locking would require
354 * us to lock the entire database during one pass of the vacuum cleaner.
357 vacuum_rel(Oid relid, bool analyze, bool is_toastrel)
361 VacPageListData vacuum_pages; /* List of pages to vacuum and/or clean
363 VacPageListData fraged_pages; /* List of pages with space enough for
369 VRelStats *vacrelstats;
370 bool reindex = false;
374 StartTransactionCommand();
377 * Check for user-requested abort. Note we want this to be inside a
378 * transaction, so xact.c doesn't issue useless NOTICE.
384 * Race condition -- if the pg_class tuple has gone away since the
385 * last time we saw it, we don't need to vacuum it.
387 tuple = SearchSysCacheTuple(RELOID,
388 ObjectIdGetDatum(relid),
390 if (!HeapTupleIsValid(tuple))
393 CommitTransactionCommand();
398 * Open the class, get an exclusive lock on it, and check permissions.
400 * Note we choose to treat permissions failure as a NOTICE and keep
401 * trying to vacuum the rest of the DB --- is this appropriate?
403 onerel = heap_open(relid, AccessExclusiveLock);
405 if (!pg_ownercheck(GetUserId(), RelationGetRelationName(onerel),
408 elog(NOTICE, "Skipping \"%s\" --- only table owner can VACUUM it",
409 RelationGetRelationName(onerel));
410 heap_close(onerel, AccessExclusiveLock);
412 CommitTransactionCommand();
417 * Remember the relation'ss TOAST relation for later
419 toast_relid = onerel->rd_rel->reltoastrelid;
422 * Set up statistics-gathering machinery.
424 vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
425 vacrelstats->relid = relid;
426 vacrelstats->num_pages = vacrelstats->num_tuples = 0;
427 vacrelstats->hasindex = false;
429 GetXmaxRecent(&XmaxRecent);
433 vacuum_pages.num_pages = fraged_pages.num_pages = 0;
434 scan_heap(vacrelstats, onerel, &vacuum_pages, &fraged_pages);
435 if (IsIgnoringSystemIndexes() &&
436 IsSystemRelationName(RelationGetRelationName(onerel)))
439 /* Now open indices */
441 Irel = (Relation *) NULL;
442 get_indices(onerel, &nindices, &Irel);
445 else if (!RelationGetForm(onerel)->relhasindex)
448 vacrelstats->hasindex = true;
450 vacrelstats->hasindex = false;
453 for (i = 0; i < nindices; i++)
454 index_close(Irel[i]);
455 Irel = (Relation *) NULL;
456 activate_indexes_of_a_table(relid, false);
459 /* Clean/scan index relation(s) */
460 if (Irel != (Relation *) NULL)
462 if (vacuum_pages.num_pages > 0)
464 for (i = 0; i < nindices; i++)
465 vacuum_index(&vacuum_pages, Irel[i],
466 vacrelstats->num_tuples, 0);
470 /* just scan indices to update statistic */
471 for (i = 0; i < nindices; i++)
472 scan_index(Irel[i], vacrelstats->num_tuples);
476 if (fraged_pages.num_pages > 0)
478 /* Try to shrink heap */
479 repair_frag(vacrelstats, onerel, &vacuum_pages, &fraged_pages,
484 if (Irel != (Relation *) NULL)
485 close_indices(nindices, Irel);
486 if (vacuum_pages.num_pages > 0)
488 /* Clean pages from vacuum_pages list */
489 vacuum_heap(vacrelstats, onerel, &vacuum_pages);
494 * Flush dirty pages out to disk. We must do this even if we
495 * didn't do anything else, because we want to ensure that all
496 * tuples have correct on-row commit status on disk (see
497 * bufmgr.c's comments for FlushRelationBuffers()).
499 i = FlushRelationBuffers(onerel, vacrelstats->num_pages);
501 elog(ERROR, "VACUUM (vacuum_rel): FlushRelationBuffers returned %d",
506 activate_indexes_of_a_table(relid, true);
509 * ok - free vacuum_pages list of reaped pages
511 * Isn't this a waste of code? Upcoming commit should free memory, no?
513 if (vacuum_pages.num_pages > 0)
515 vacpage = vacuum_pages.pagedesc;
516 for (i = 0; i < vacuum_pages.num_pages; i++, vacpage++)
518 pfree(vacuum_pages.pagedesc);
519 if (fraged_pages.num_pages > 0)
520 pfree(fraged_pages.pagedesc);
523 /* all done with this class, but hold lock until commit */
524 heap_close(onerel, NoLock);
526 /* update statistics in pg_class */
527 update_relstats(vacrelstats->relid, vacrelstats->num_pages,
528 vacrelstats->num_tuples, vacrelstats->hasindex,
532 * If the relation has a secondary toast one, vacuum that too
533 * while we still hold the lock on the master table. We don't
534 * need to propagate "analyze" to it, because the toaster
535 * always uses hardcoded index access and statistics are
536 * totally unimportant for toast relations
538 if (toast_relid != InvalidOid)
539 vacuum_rel(toast_relid, false, true);
541 /* next command frees attribute stats */
543 CommitTransactionCommand();
547 * scan_heap() -- scan an open heap relation
549 * This routine sets commit times, constructs vacuum_pages list of
550 * empty/uninitialized pages and pages with dead tuples and
551 * ~LP_USED line pointers, constructs fraged_pages list of pages
552 * appropriate for purposes of shrinking and maintains statistics
553 * on the number of live tuples in a heap.
556 scan_heap(VRelStats *vacrelstats, Relation onerel,
557 VacPageList vacuum_pages, VacPageList fraged_pages)
575 uint32 tups_vacuumed,
586 Size min_tlen = MaxTupleSize;
589 bool do_shrinking = true;
590 VTupleLink vtlinks = (VTupleLink) palloc(100 * sizeof(VTupleLinkData));
592 int free_vtlinks = 100;
595 getrusage(RUSAGE_SELF, &ru0);
597 relname = RelationGetRelationName(onerel);
598 elog(MESSAGE_LEVEL, "--Relation %s--", relname);
600 tups_vacuumed = num_tuples = nkeep = nunused = ncrash = empty_pages =
601 new_pages = changed_pages = empty_end_pages = 0;
602 free_size = usable_free_size = 0;
604 nblocks = RelationGetNumberOfBlocks(onerel);
606 vacpage = (VacPage) palloc(sizeof(VacPageData) + MaxOffsetNumber * sizeof(OffsetNumber));
607 vacpage->offsets_used = 0;
609 for (blkno = 0; blkno < nblocks; blkno++)
611 buf = ReadBuffer(onerel, blkno);
612 page = BufferGetPage(buf);
613 vacpage->blkno = blkno;
614 vacpage->offsets_free = 0;
618 elog(NOTICE, "Rel %s: Uninitialized page %u - fixing",
620 PageInit(page, BufferGetPageSize(buf), 0);
621 vacpage->free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower;
622 free_size += (vacpage->free - sizeof(ItemIdData));
625 reap_page(vacuum_pages, vacpage);
630 if (PageIsEmpty(page))
632 vacpage->free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower;
633 free_size += (vacpage->free - sizeof(ItemIdData));
636 reap_page(vacuum_pages, vacpage);
643 maxoff = PageGetMaxOffsetNumber(page);
644 for (offnum = FirstOffsetNumber;
646 offnum = OffsetNumberNext(offnum))
648 itemid = PageGetItemId(page, offnum);
651 * Collect un-used items too - it's possible to have indices
652 * pointing here after crash.
654 if (!ItemIdIsUsed(itemid))
656 vacpage->offsets[vacpage->offsets_free++] = offnum;
661 tuple.t_datamcxt = NULL;
662 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
663 tuple.t_len = ItemIdGetLength(itemid);
664 ItemPointerSet(&(tuple.t_self), blkno, offnum);
667 if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
669 if (tuple.t_data->t_infomask & HEAP_XMIN_INVALID)
671 else if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
673 if (TransactionIdDidCommit((TransactionId)
674 tuple.t_data->t_cmin))
676 tuple.t_data->t_infomask |= HEAP_XMIN_INVALID;
682 tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
686 else if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
688 if (!TransactionIdDidCommit((TransactionId)
689 tuple.t_data->t_cmin))
691 tuple.t_data->t_infomask |= HEAP_XMIN_INVALID;
697 tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
703 if (TransactionIdDidAbort(tuple.t_data->t_xmin))
705 else if (TransactionIdDidCommit(tuple.t_data->t_xmin))
707 tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
710 else if (!TransactionIdIsInProgress(tuple.t_data->t_xmin))
714 * Not Aborted, Not Committed, Not in Progress -
715 * so it's from crashed process. - vadim 11/26/96
722 elog(NOTICE, "Rel %s: TID %u/%u: InsertTransactionInProgress %u - can't shrink relation",
723 relname, blkno, offnum, tuple.t_data->t_xmin);
724 do_shrinking = false;
730 * here we are concerned about tuples with xmin committed and
731 * xmax unknown or committed
733 if (tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED &&
734 !(tuple.t_data->t_infomask & HEAP_XMAX_INVALID))
736 if (tuple.t_data->t_infomask & HEAP_XMAX_COMMITTED)
738 if (tuple.t_data->t_infomask & HEAP_MARKED_FOR_UPDATE)
740 tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
741 tuple.t_data->t_infomask &=
742 ~(HEAP_XMAX_COMMITTED | HEAP_MARKED_FOR_UPDATE);
748 else if (TransactionIdDidAbort(tuple.t_data->t_xmax))
750 tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
753 else if (TransactionIdDidCommit(tuple.t_data->t_xmax))
755 if (tuple.t_data->t_infomask & HEAP_MARKED_FOR_UPDATE)
757 tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
758 tuple.t_data->t_infomask &=
759 ~(HEAP_XMAX_COMMITTED | HEAP_MARKED_FOR_UPDATE);
765 else if (!TransactionIdIsInProgress(tuple.t_data->t_xmax))
769 * Not Aborted, Not Committed, Not in Progress - so it
770 * from crashed process. - vadim 06/02/97
772 tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
773 tuple.t_data->t_infomask &=
774 ~(HEAP_XMAX_COMMITTED | HEAP_MARKED_FOR_UPDATE);
779 elog(NOTICE, "Rel %s: TID %u/%u: DeleteTransactionInProgress %u - can't shrink relation",
780 relname, blkno, offnum, tuple.t_data->t_xmax);
781 do_shrinking = false;
785 * If tuple is recently deleted then we must not remove it
788 if (tupgone && (tuple.t_data->t_infomask & HEAP_XMIN_INVALID) == 0 && tuple.t_data->t_xmax >= XmaxRecent)
792 if (!(tuple.t_data->t_infomask & HEAP_XMAX_COMMITTED))
794 tuple.t_data->t_infomask |= HEAP_XMAX_COMMITTED;
799 * If we do shrinking and this tuple is updated one
800 * then remember it to construct updated tuple
803 if (do_shrinking && !(ItemPointerEquals(&(tuple.t_self),
804 &(tuple.t_data->t_ctid))))
806 if (free_vtlinks == 0)
809 vtlinks = (VTupleLink) repalloc(vtlinks,
810 (free_vtlinks + num_vtlinks) *
811 sizeof(VTupleLinkData));
813 vtlinks[num_vtlinks].new_tid = tuple.t_data->t_ctid;
814 vtlinks[num_vtlinks].this_tid = tuple.t_self;
824 if (!OidIsValid(tuple.t_data->t_oid))
826 elog(NOTICE, "Rel %s: TID %u/%u: OID IS INVALID. TUPGONE %d.",
827 relname, blkno, offnum, tupgone);
835 * Here we are building a temporary copy of the page with
836 * dead tuples removed. Below we will apply
837 * PageRepairFragmentation to the copy, so that we can
838 * determine how much space will be available after
839 * removal of dead tuples. But note we are NOT changing
840 * the real page yet...
842 if (tempPage == (Page) NULL)
846 pageSize = PageGetPageSize(page);
847 tempPage = (Page) palloc(pageSize);
848 memmove(tempPage, page, pageSize);
851 /* mark it unused on the temp page */
852 lpp = &(((PageHeader) tempPage)->pd_linp[offnum - 1]);
853 lpp->lp_flags &= ~LP_USED;
855 vacpage->offsets[vacpage->offsets_free++] = offnum;
862 if (tuple.t_len < min_tlen)
863 min_tlen = tuple.t_len;
864 if (tuple.t_len > max_tlen)
865 max_tlen = tuple.t_len;
878 if (tempPage != (Page) NULL)
879 { /* Some tuples are gone */
880 PageRepairFragmentation(tempPage);
881 vacpage->free = ((PageHeader) tempPage)->pd_upper - ((PageHeader) tempPage)->pd_lower;
882 free_size += vacpage->free;
883 reap_page(vacuum_pages, vacpage);
885 tempPage = (Page) NULL;
887 else if (vacpage->offsets_free > 0)
888 { /* there are only ~LP_USED line pointers */
889 vacpage->free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower;
890 free_size += vacpage->free;
891 reap_page(vacuum_pages, vacpage);
903 /* save stats in the rel list for use later */
904 vacrelstats->num_tuples = num_tuples;
905 vacrelstats->num_pages = nblocks;
906 /* vacrelstats->natts = attr_cnt;*/
908 min_tlen = max_tlen = 0;
909 vacrelstats->min_tlen = min_tlen;
910 vacrelstats->max_tlen = max_tlen;
912 vacuum_pages->empty_end_pages = empty_end_pages;
913 fraged_pages->empty_end_pages = empty_end_pages;
916 * Try to make fraged_pages keeping in mind that we can't use free
917 * space of "empty" end-pages and last page if it reaped.
919 if (do_shrinking && vacuum_pages->num_pages - empty_end_pages > 0)
921 int nusf; /* blocks usefull for re-using */
923 nusf = vacuum_pages->num_pages - empty_end_pages;
924 if ((vacuum_pages->pagedesc[nusf - 1])->blkno == nblocks - empty_end_pages - 1)
927 for (i = 0; i < nusf; i++)
929 vp = vacuum_pages->pagedesc[i];
930 if (enough_space(vp, min_tlen))
932 vpage_insert(fraged_pages, vp);
933 usable_free_size += vp->free;
938 if (usable_free_size > 0 && num_vtlinks > 0)
940 qsort((char *) vtlinks, num_vtlinks, sizeof(VTupleLinkData),
942 vacrelstats->vtlinks = vtlinks;
943 vacrelstats->num_vtlinks = num_vtlinks;
947 vacrelstats->vtlinks = NULL;
948 vacrelstats->num_vtlinks = 0;
952 elog(MESSAGE_LEVEL, "Pages %u: Changed %u, reaped %u, Empty %u, New %u; \
953 Tup %u: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
954 Re-using: Free/Avail. Space %lu/%lu; EndEmpty/Avail. Pages %u/%u. %s",
955 nblocks, changed_pages, vacuum_pages->num_pages, empty_pages,
956 new_pages, num_tuples, tups_vacuumed,
957 nkeep, vacrelstats->num_vtlinks, ncrash,
958 nunused, (unsigned long)min_tlen, (unsigned long)max_tlen,
959 (unsigned long)free_size, (unsigned long)usable_free_size,
960 empty_end_pages, fraged_pages->num_pages,
967 * repair_frag() -- try to repair relation's fragmentation
969 * This routine marks dead tuples as unused and tries re-use dead space
970 * by moving tuples (and inserting indices if needed). It constructs
971 * Nvacpagelist list of free-ed pages (moved tuples) and clean indices
972 * for them after committing (in hack-manner - without losing locks
973 * and freeing memory!) current transaction. It truncates relation
974 * if some end-blocks are gone away.
977 repair_frag(VRelStats *vacrelstats, Relation onerel,
978 VacPageList vacuum_pages, VacPageList fraged_pages,
979 int nindices, Relation *Irel)
989 OffsetNumber offnum = 0,
998 IndexInfo **indexInfo = NULL;
999 Datum idatum[INDEX_MAX_KEYS];
1000 char inulls[INDEX_MAX_KEYS];
1001 InsertIndexResult iresult;
1002 VacPageListData Nvacpagelist;
1003 VacPage cur_page = NULL,
1008 int last_move_dest_block = -1,
1023 getrusage(RUSAGE_SELF, &ru0);
1025 myXID = GetCurrentTransactionId();
1026 myCID = GetCurrentCommandId();
1028 tupdesc = RelationGetDescr(onerel);
1030 if (Irel != (Relation *) NULL) /* preparation for index' inserts */
1031 indexInfo = get_index_desc(onerel, nindices, Irel);
1033 Nvacpagelist.num_pages = 0;
1034 num_fraged_pages = fraged_pages->num_pages;
1035 Assert(vacuum_pages->num_pages > vacuum_pages->empty_end_pages);
1036 vacuumed_pages = vacuum_pages->num_pages - vacuum_pages->empty_end_pages;
1037 last_vacuum_page = vacuum_pages->pagedesc[vacuumed_pages - 1];
1038 last_vacuum_block = last_vacuum_page->blkno;
1039 cur_buffer = InvalidBuffer;
1042 vacpage = (VacPage) palloc(sizeof(VacPageData) + MaxOffsetNumber * sizeof(OffsetNumber));
1043 vacpage->offsets_used = vacpage->offsets_free = 0;
1046 * Scan pages backwards from the last nonempty page, trying to move
1047 * tuples down to lower pages. Quit when we reach a page that we have
1048 * moved any tuples onto. Note that if a page is still in the
1049 * fraged_pages list (list of candidate move-target pages) when we
1050 * reach it, we will remove it from the list. This ensures we never
1051 * move a tuple up to a higher page number.
1053 * NB: this code depends on the vacuum_pages and fraged_pages lists being
1054 * in order, and on fraged_pages being a subset of vacuum_pages.
1056 nblocks = vacrelstats->num_pages;
1057 for (blkno = nblocks - vacuum_pages->empty_end_pages - 1;
1058 blkno > last_move_dest_block;
1061 buf = ReadBuffer(onerel, blkno);
1062 page = BufferGetPage(buf);
1064 vacpage->offsets_free = 0;
1066 isempty = PageIsEmpty(page);
1069 if (blkno == last_vacuum_block) /* it's reaped page */
1071 if (last_vacuum_page->offsets_free > 0) /* there are dead tuples */
1072 { /* on this page - clean */
1074 vacuum_page(page, last_vacuum_page);
1080 if (vacuumed_pages > 0)
1082 /* get prev reaped page from vacuum_pages */
1083 last_vacuum_page = vacuum_pages->pagedesc[vacuumed_pages - 1];
1084 last_vacuum_block = last_vacuum_page->blkno;
1088 last_vacuum_page = NULL;
1089 last_vacuum_block = -1;
1091 if (num_fraged_pages > 0 &&
1092 fraged_pages->pagedesc[num_fraged_pages - 1]->blkno ==
1093 (BlockNumber) blkno)
1095 /* page is in fraged_pages too; remove it */
1107 chain_tuple_moved = false; /* no one chain-tuple was moved
1108 * off this page, yet */
1109 vacpage->blkno = blkno;
1110 maxoff = PageGetMaxOffsetNumber(page);
1111 for (offnum = FirstOffsetNumber;
1113 offnum = OffsetNumberNext(offnum))
1115 itemid = PageGetItemId(page, offnum);
1117 if (!ItemIdIsUsed(itemid))
1120 tuple.t_datamcxt = NULL;
1121 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1122 tuple_len = tuple.t_len = ItemIdGetLength(itemid);
1123 ItemPointerSet(&(tuple.t_self), blkno, offnum);
1125 if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
1127 if ((TransactionId) tuple.t_data->t_cmin != myXID)
1128 elog(ERROR, "Invalid XID in t_cmin");
1129 if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
1130 elog(ERROR, "HEAP_MOVED_IN was not expected");
1133 * If this (chain) tuple is moved by me already then I
1134 * have to check is it in vacpage or not - i.e. is it moved
1135 * while cleaning this page or some previous one.
1137 if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
1139 if (keep_tuples == 0)
1141 if (chain_tuple_moved) /* some chains was moved
1143 { /* cleaning this page */
1144 Assert(vacpage->offsets_free > 0);
1145 for (i = 0; i < vacpage->offsets_free; i++)
1147 if (vacpage->offsets[i] == offnum)
1150 if (i >= vacpage->offsets_free) /* not found */
1152 vacpage->offsets[vacpage->offsets_free++] = offnum;
1158 vacpage->offsets[vacpage->offsets_free++] = offnum;
1163 elog(ERROR, "HEAP_MOVED_OFF was expected");
1167 * If this tuple is in the chain of tuples created in updates
1168 * by "recent" transactions then we have to move all chain of
1169 * tuples to another places.
1171 if ((tuple.t_data->t_infomask & HEAP_UPDATED &&
1172 tuple.t_data->t_xmin >= XmaxRecent) ||
1173 (!(tuple.t_data->t_infomask & HEAP_XMAX_INVALID) &&
1174 !(ItemPointerEquals(&(tuple.t_self), &(tuple.t_data->t_ctid)))))
1179 ItemPointerData Ctid;
1180 HeapTupleData tp = tuple;
1181 Size tlen = tuple_len;
1182 VTupleMove vtmove = (VTupleMove)
1183 palloc(100 * sizeof(VTupleMoveData));
1185 int free_vtmove = 100;
1186 VacPage to_vacpage = NULL;
1188 bool freeCbuf = false;
1191 if (vacrelstats->vtlinks == NULL)
1192 elog(ERROR, "No one parent tuple was found");
1193 if (cur_buffer != InvalidBuffer)
1195 WriteBuffer(cur_buffer);
1196 cur_buffer = InvalidBuffer;
1200 * If this tuple is in the begin/middle of the chain then
1201 * we have to move to the end of chain.
1203 while (!(tp.t_data->t_infomask & HEAP_XMAX_INVALID) &&
1204 !(ItemPointerEquals(&(tp.t_self), &(tp.t_data->t_ctid))))
1206 Ctid = tp.t_data->t_ctid;
1208 ReleaseBuffer(Cbuf);
1210 Cbuf = ReadBuffer(onerel,
1211 ItemPointerGetBlockNumber(&Ctid));
1212 Cpage = BufferGetPage(Cbuf);
1213 Citemid = PageGetItemId(Cpage,
1214 ItemPointerGetOffsetNumber(&Ctid));
1215 if (!ItemIdIsUsed(Citemid))
1219 * This means that in the middle of chain there
1220 * was tuple updated by older (than XmaxRecent)
1221 * xaction and this tuple is already deleted by
1222 * me. Actually, upper part of chain should be
1223 * removed and seems that this should be handled
1224 * in scan_heap(), but it's not implemented at
1225 * the moment and so we just stop shrinking here.
1227 ReleaseBuffer(Cbuf);
1230 elog(NOTICE, "Child itemid in update-chain marked as unused - can't continue repair_frag");
1233 tp.t_datamcxt = NULL;
1234 tp.t_data = (HeapTupleHeader) PageGetItem(Cpage, Citemid);
1236 tlen = tp.t_len = ItemIdGetLength(Citemid);
1240 /* first, can chain be moved ? */
1243 if (to_vacpage == NULL ||
1244 !enough_space(to_vacpage, tlen))
1248 * if to_vacpage no longer has enough free space to be
1249 * useful, remove it from fraged_pages list
1251 if (to_vacpage != NULL &&
1252 !enough_space(to_vacpage, vacrelstats->min_tlen))
1254 Assert(num_fraged_pages > to_item);
1255 memmove(fraged_pages->pagedesc + to_item,
1256 fraged_pages->pagedesc + to_item + 1,
1257 sizeof(VacPage) * (num_fraged_pages - to_item - 1));
1260 for (i = 0; i < num_fraged_pages; i++)
1262 if (enough_space(fraged_pages->pagedesc[i], tlen))
1266 /* can't move item anywhere */
1267 if (i == num_fraged_pages)
1269 for (i = 0; i < num_vtmove; i++)
1271 Assert(vtmove[i].vacpage->offsets_used > 0);
1272 (vtmove[i].vacpage->offsets_used)--;
1278 to_vacpage = fraged_pages->pagedesc[to_item];
1280 to_vacpage->free -= MAXALIGN(tlen);
1281 if (to_vacpage->offsets_used >= to_vacpage->offsets_free)
1282 to_vacpage->free -= MAXALIGN(sizeof(ItemIdData));
1283 (to_vacpage->offsets_used)++;
1284 if (free_vtmove == 0)
1287 vtmove = (VTupleMove) repalloc(vtmove,
1288 (free_vtmove + num_vtmove) *
1289 sizeof(VTupleMoveData));
1291 vtmove[num_vtmove].tid = tp.t_self;
1292 vtmove[num_vtmove].vacpage = to_vacpage;
1293 if (to_vacpage->offsets_used == 1)
1294 vtmove[num_vtmove].cleanVpd = true;
1296 vtmove[num_vtmove].cleanVpd = false;
1301 if (!(tp.t_data->t_infomask & HEAP_UPDATED) ||
1302 tp.t_data->t_xmin < XmaxRecent)
1305 /* Well, try to find tuple with old row version */
1312 VTupleLinkData vtld,
1315 vtld.new_tid = tp.t_self;
1317 vac_find_eq((void *) (vacrelstats->vtlinks),
1318 vacrelstats->num_vtlinks,
1319 sizeof(VTupleLinkData),
1323 elog(ERROR, "Parent tuple was not found");
1324 tp.t_self = vtlp->this_tid;
1325 Pbuf = ReadBuffer(onerel,
1326 ItemPointerGetBlockNumber(&(tp.t_self)));
1327 Ppage = BufferGetPage(Pbuf);
1328 Pitemid = PageGetItemId(Ppage,
1329 ItemPointerGetOffsetNumber(&(tp.t_self)));
1330 if (!ItemIdIsUsed(Pitemid))
1331 elog(ERROR, "Parent itemid marked as unused");
1332 Ptp.t_datamcxt = NULL;
1333 Ptp.t_data = (HeapTupleHeader) PageGetItem(Ppage, Pitemid);
1334 Assert(ItemPointerEquals(&(vtld.new_tid),
1335 &(Ptp.t_data->t_ctid)));
1338 * Read above about cases when
1339 * !ItemIdIsUsed(Citemid) (child item is
1340 * removed)... Due to the fact that at the moment
1341 * we don't remove unuseful part of update-chain,
1342 * it's possible to get too old parent row here.
1343 * Like as in the case which caused this problem,
1344 * we stop shrinking here. I could try to find
1345 * real parent row but want not to do it because
1346 * of real solution will be implemented anyway,
1347 * latter, and we are too close to 6.5 release. -
1350 if (Ptp.t_data->t_xmax != tp.t_data->t_xmin)
1353 ReleaseBuffer(Cbuf);
1355 ReleaseBuffer(Pbuf);
1356 for (i = 0; i < num_vtmove; i++)
1358 Assert(vtmove[i].vacpage->offsets_used > 0);
1359 (vtmove[i].vacpage->offsets_used)--;
1362 elog(NOTICE, "Too old parent tuple found - can't continue repair_frag");
1365 #ifdef NOT_USED /* I'm not sure that this will wotk
1369 * If this tuple is updated version of row and it
1370 * was created by the same transaction then no one
1371 * is interested in this tuple - mark it as
1374 if (Ptp.t_data->t_infomask & HEAP_UPDATED &&
1375 Ptp.t_data->t_xmin == Ptp.t_data->t_xmax)
1377 TransactionIdStore(myXID,
1378 (TransactionId *) &(Ptp.t_data->t_cmin));
1379 Ptp.t_data->t_infomask &=
1380 ~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_IN);
1381 Ptp.t_data->t_infomask |= HEAP_MOVED_OFF;
1386 tp.t_datamcxt = Ptp.t_datamcxt;
1387 tp.t_data = Ptp.t_data;
1388 tlen = tp.t_len = ItemIdGetLength(Pitemid);
1390 ReleaseBuffer(Cbuf);
1395 if (num_vtmove == 0)
1399 ReleaseBuffer(Cbuf);
1400 if (num_vtmove == 0) /* chain can't be moved */
1405 ItemPointerSetInvalid(&Ctid);
1406 for (ti = 0; ti < num_vtmove; ti++)
1408 VacPage destvacpage = vtmove[ti].vacpage;
1410 /* Get page to move from */
1411 tuple.t_self = vtmove[ti].tid;
1412 Cbuf = ReadBuffer(onerel,
1413 ItemPointerGetBlockNumber(&(tuple.t_self)));
1415 /* Get page to move to */
1416 cur_buffer = ReadBuffer(onerel, destvacpage->blkno);
1418 LockBuffer(cur_buffer, BUFFER_LOCK_EXCLUSIVE);
1419 if (cur_buffer != Cbuf)
1420 LockBuffer(Cbuf, BUFFER_LOCK_EXCLUSIVE);
1422 ToPage = BufferGetPage(cur_buffer);
1423 Cpage = BufferGetPage(Cbuf);
1425 /* NO ELOG(ERROR) TILL CHANGES ARE LOGGED */
1427 Citemid = PageGetItemId(Cpage,
1428 ItemPointerGetOffsetNumber(&(tuple.t_self)));
1429 tuple.t_datamcxt = NULL;
1430 tuple.t_data = (HeapTupleHeader) PageGetItem(Cpage, Citemid);
1431 tuple_len = tuple.t_len = ItemIdGetLength(Citemid);
1434 * make a copy of the source tuple, and then mark the
1435 * source tuple MOVED_OFF.
1437 heap_copytuple_with_tuple(&tuple, &newtup);
1439 RelationInvalidateHeapTuple(onerel, &tuple);
1441 TransactionIdStore(myXID, (TransactionId *) &(tuple.t_data->t_cmin));
1442 tuple.t_data->t_infomask &=
1443 ~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_IN);
1444 tuple.t_data->t_infomask |= HEAP_MOVED_OFF;
1447 * If this page was not used before - clean it.
1449 * NOTE: a nasty bug used to lurk here. It is possible
1450 * for the source and destination pages to be the same
1451 * (since this tuple-chain member can be on a page lower
1452 * than the one we're currently processing in the outer
1453 * loop). If that's true, then after vacuum_page() the
1454 * source tuple will have been moved, and tuple.t_data
1455 * will be pointing at garbage. Therefore we must do
1456 * everything that uses tuple.t_data BEFORE this step!!
1458 * This path is different from the other callers of
1459 * vacuum_page, because we have already incremented the
1460 * vacpage's offsets_used field to account for the
1461 * tuple(s) we expect to move onto the page. Therefore
1462 * vacuum_page's check for offsets_used == 0 is
1463 * wrong. But since that's a good debugging check for
1464 * all other callers, we work around it here rather
1467 if (!PageIsEmpty(ToPage) && vtmove[ti].cleanVpd)
1469 int sv_offsets_used = destvacpage->offsets_used;
1471 destvacpage->offsets_used = 0;
1472 vacuum_page(ToPage, destvacpage);
1473 destvacpage->offsets_used = sv_offsets_used;
1477 * Update the state of the copied tuple, and store it
1478 * on the destination page.
1480 TransactionIdStore(myXID, (TransactionId *) &(newtup.t_data->t_cmin));
1481 newtup.t_data->t_infomask &=
1482 ~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_OFF);
1483 newtup.t_data->t_infomask |= HEAP_MOVED_IN;
1484 newoff = PageAddItem(ToPage, (Item) newtup.t_data, tuple_len,
1485 InvalidOffsetNumber, LP_USED);
1486 if (newoff == InvalidOffsetNumber)
1488 elog(STOP, "moving chain: failed to add item with len = %lu to page %u",
1489 (unsigned long)tuple_len, destvacpage->blkno);
1491 newitemid = PageGetItemId(ToPage, newoff);
1492 pfree(newtup.t_data);
1493 newtup.t_datamcxt = NULL;
1494 newtup.t_data = (HeapTupleHeader) PageGetItem(ToPage, newitemid);
1495 ItemPointerSet(&(newtup.t_self), destvacpage->blkno, newoff);
1500 log_heap_move(onerel, tuple.t_self, &newtup);
1502 if (Cbuf != cur_buffer)
1504 PageSetLSN(Cpage, recptr);
1505 PageSetSUI(Cpage, ThisStartUpID);
1507 PageSetLSN(ToPage, recptr);
1508 PageSetSUI(ToPage, ThisStartUpID);
1512 if (((int) destvacpage->blkno) > last_move_dest_block)
1513 last_move_dest_block = destvacpage->blkno;
1516 * Set new tuple's t_ctid pointing to itself for last
1517 * tuple in chain, and to next tuple in chain otherwise.
1519 if (!ItemPointerIsValid(&Ctid))
1520 newtup.t_data->t_ctid = newtup.t_self;
1522 newtup.t_data->t_ctid = Ctid;
1523 Ctid = newtup.t_self;
1528 * Remember that we moved tuple from the current page
1529 * (corresponding index tuple will be cleaned).
1532 vacpage->offsets[vacpage->offsets_free++] =
1533 ItemPointerGetOffsetNumber(&(tuple.t_self));
1537 LockBuffer(cur_buffer, BUFFER_LOCK_UNLOCK);
1538 if (cur_buffer != Cbuf)
1539 LockBuffer(Cbuf, BUFFER_LOCK_UNLOCK);
1541 if (Irel != (Relation *) NULL)
1544 * XXX using CurrentMemoryContext here means
1545 * intra-vacuum memory leak for functional indexes.
1546 * Should fix someday.
1548 * XXX This code fails to handle partial indexes!
1549 * Probably should change it to use ExecOpenIndices.
1551 for (i = 0; i < nindices; i++)
1553 FormIndexDatum(indexInfo[i],
1556 CurrentMemoryContext,
1559 iresult = index_insert(Irel[i],
1568 WriteBuffer(cur_buffer);
1571 cur_buffer = InvalidBuffer;
1573 chain_tuple_moved = true;
1577 /* try to find new page for this tuple */
1578 if (cur_buffer == InvalidBuffer ||
1579 !enough_space(cur_page, tuple_len))
1581 if (cur_buffer != InvalidBuffer)
1583 WriteBuffer(cur_buffer);
1584 cur_buffer = InvalidBuffer;
1587 * If previous target page is now too full to add *any*
1588 * tuple to it, remove it from fraged_pages.
1590 if (!enough_space(cur_page, vacrelstats->min_tlen))
1592 Assert(num_fraged_pages > cur_item);
1593 memmove(fraged_pages->pagedesc + cur_item,
1594 fraged_pages->pagedesc + cur_item + 1,
1595 sizeof(VacPage) * (num_fraged_pages - cur_item - 1));
1599 for (i = 0; i < num_fraged_pages; i++)
1601 if (enough_space(fraged_pages->pagedesc[i], tuple_len))
1604 if (i == num_fraged_pages)
1605 break; /* can't move item anywhere */
1607 cur_page = fraged_pages->pagedesc[cur_item];
1608 cur_buffer = ReadBuffer(onerel, cur_page->blkno);
1609 LockBuffer(cur_buffer, BUFFER_LOCK_EXCLUSIVE);
1610 ToPage = BufferGetPage(cur_buffer);
1611 /* if this page was not used before - clean it */
1612 if (!PageIsEmpty(ToPage) && cur_page->offsets_used == 0)
1613 vacuum_page(ToPage, cur_page);
1616 LockBuffer(cur_buffer, BUFFER_LOCK_EXCLUSIVE);
1618 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1621 heap_copytuple_with_tuple(&tuple, &newtup);
1623 RelationInvalidateHeapTuple(onerel, &tuple);
1626 * Mark new tuple as moved_in by vacuum and store vacuum XID
1629 TransactionIdStore(myXID, (TransactionId *) &(newtup.t_data->t_cmin));
1630 newtup.t_data->t_infomask &=
1631 ~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_OFF);
1632 newtup.t_data->t_infomask |= HEAP_MOVED_IN;
1634 /* add tuple to the page */
1635 newoff = PageAddItem(ToPage, (Item) newtup.t_data, tuple_len,
1636 InvalidOffsetNumber, LP_USED);
1637 if (newoff == InvalidOffsetNumber)
1640 failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)",
1641 (unsigned long)tuple_len, cur_page->blkno, (unsigned long)cur_page->free,
1642 cur_page->offsets_used, cur_page->offsets_free);
1644 newitemid = PageGetItemId(ToPage, newoff);
1645 pfree(newtup.t_data);
1646 newtup.t_datamcxt = NULL;
1647 newtup.t_data = (HeapTupleHeader) PageGetItem(ToPage, newitemid);
1648 ItemPointerSet(&(newtup.t_data->t_ctid), cur_page->blkno, newoff);
1649 newtup.t_self = newtup.t_data->t_ctid;
1652 * Mark old tuple as moved_off by vacuum and store vacuum XID
1655 TransactionIdStore(myXID, (TransactionId *) &(tuple.t_data->t_cmin));
1656 tuple.t_data->t_infomask &=
1657 ~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_IN);
1658 tuple.t_data->t_infomask |= HEAP_MOVED_OFF;
1663 log_heap_move(onerel, tuple.t_self, &newtup);
1665 PageSetLSN(page, recptr);
1666 PageSetSUI(page, ThisStartUpID);
1667 PageSetLSN(ToPage, recptr);
1668 PageSetSUI(ToPage, ThisStartUpID);
1672 cur_page->offsets_used++;
1674 cur_page->free = ((PageHeader) ToPage)->pd_upper - ((PageHeader) ToPage)->pd_lower;
1675 if (((int) cur_page->blkno) > last_move_dest_block)
1676 last_move_dest_block = cur_page->blkno;
1678 vacpage->offsets[vacpage->offsets_free++] = offnum;
1680 LockBuffer(cur_buffer, BUFFER_LOCK_UNLOCK);
1681 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1683 /* insert index' tuples if needed */
1684 if (Irel != (Relation *) NULL)
1687 * XXX using CurrentMemoryContext here means
1688 * intra-vacuum memory leak for functional indexes.
1689 * Should fix someday.
1691 * XXX This code fails to handle partial indexes!
1692 * Probably should change it to use ExecOpenIndices.
1694 for (i = 0; i < nindices; i++)
1696 FormIndexDatum(indexInfo[i],
1699 CurrentMemoryContext,
1702 iresult = index_insert(Irel[i],
1712 } /* walk along page */
1714 if (offnum < maxoff && keep_tuples > 0)
1718 for (off = OffsetNumberNext(offnum);
1720 off = OffsetNumberNext(off))
1722 itemid = PageGetItemId(page, off);
1723 if (!ItemIdIsUsed(itemid))
1725 tuple.t_datamcxt = NULL;
1726 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1727 if (tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED)
1729 if ((TransactionId) tuple.t_data->t_cmin != myXID)
1730 elog(ERROR, "Invalid XID in t_cmin (4)");
1731 if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
1732 elog(ERROR, "HEAP_MOVED_IN was not expected (2)");
1733 if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
1735 /* some chains was moved while */
1736 if (chain_tuple_moved)
1737 { /* cleaning this page */
1738 Assert(vacpage->offsets_free > 0);
1739 for (i = 0; i < vacpage->offsets_free; i++)
1741 if (vacpage->offsets[i] == off)
1744 if (i >= vacpage->offsets_free) /* not found */
1746 vacpage->offsets[vacpage->offsets_free++] = off;
1747 Assert(keep_tuples > 0);
1753 vacpage->offsets[vacpage->offsets_free++] = off;
1754 Assert(keep_tuples > 0);
1761 if (vacpage->offsets_free > 0) /* some tuples were moved */
1763 if (chain_tuple_moved) /* else - they are ordered */
1765 qsort((char *) (vacpage->offsets), vacpage->offsets_free,
1766 sizeof(OffsetNumber), vac_cmp_offno);
1768 reap_page(&Nvacpagelist, vacpage);
1776 if (offnum <= maxoff)
1777 break; /* some item(s) left */
1779 } /* walk along relation */
1781 blkno++; /* new number of blocks */
1783 if (cur_buffer != InvalidBuffer)
1785 Assert(num_moved > 0);
1786 WriteBuffer(cur_buffer);
1792 RecordTransactionCommit();
1795 * We have to commit our tuple' movings before we'll truncate
1796 * relation, but we shouldn't lose our locks. And so - quick hack:
1797 * flush buffers and record status of current transaction as
1798 * committed, and continue. - vadim 11/13/96
1801 TransactionIdCommit(myXID);
1807 * Clean uncleaned reaped pages from vacuum_pages list list and set
1808 * xmin committed for inserted tuples
1811 for (i = 0, curpage = vacuum_pages->pagedesc; i < vacuumed_pages; i++, curpage++)
1813 Assert((*curpage)->blkno < (BlockNumber) blkno);
1814 buf = ReadBuffer(onerel, (*curpage)->blkno);
1815 page = BufferGetPage(buf);
1816 if ((*curpage)->offsets_used == 0) /* this page was not used */
1818 if (!PageIsEmpty(page))
1819 vacuum_page(page, *curpage);
1822 /* this page was used */
1825 max_offset = PageGetMaxOffsetNumber(page);
1826 for (newoff = FirstOffsetNumber;
1827 newoff <= max_offset;
1828 newoff = OffsetNumberNext(newoff))
1830 itemid = PageGetItemId(page, newoff);
1831 if (!ItemIdIsUsed(itemid))
1833 tuple.t_datamcxt = NULL;
1834 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1835 if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
1837 if ((TransactionId) tuple.t_data->t_cmin != myXID)
1838 elog(ERROR, "Invalid XID in t_cmin (2)");
1839 if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
1841 tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
1844 else if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
1845 tuple.t_data->t_infomask |= HEAP_XMIN_INVALID;
1847 elog(ERROR, "HEAP_MOVED_OFF/HEAP_MOVED_IN was expected");
1850 Assert((*curpage)->offsets_used == num_tuples);
1851 checked_moved += num_tuples;
1855 Assert(num_moved == checked_moved);
1857 elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u; Tuple(s) moved: %u. %s",
1858 RelationGetRelationName(onerel),
1859 nblocks, blkno, num_moved,
1862 if (Nvacpagelist.num_pages > 0)
1864 /* vacuum indices again if needed */
1865 if (Irel != (Relation *) NULL)
1871 /* re-sort Nvacpagelist.pagedesc */
1872 for (vpleft = Nvacpagelist.pagedesc,
1873 vpright = Nvacpagelist.pagedesc + Nvacpagelist.num_pages - 1;
1874 vpleft < vpright; vpleft++, vpright--)
1880 Assert(keep_tuples >= 0);
1881 for (i = 0; i < nindices; i++)
1882 vacuum_index(&Nvacpagelist, Irel[i],
1883 vacrelstats->num_tuples, keep_tuples);
1886 /* clean moved tuples from last page in Nvacpagelist list */
1887 if (vacpage->blkno == (BlockNumber) (blkno - 1) &&
1888 vacpage->offsets_free > 0)
1890 buf = ReadBuffer(onerel, vacpage->blkno);
1891 page = BufferGetPage(buf);
1893 for (offnum = FirstOffsetNumber;
1895 offnum = OffsetNumberNext(offnum))
1897 itemid = PageGetItemId(page, offnum);
1898 if (!ItemIdIsUsed(itemid))
1900 tuple.t_datamcxt = NULL;
1901 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1903 if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
1905 if ((TransactionId) tuple.t_data->t_cmin != myXID)
1906 elog(ERROR, "Invalid XID in t_cmin (3)");
1907 if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
1909 itemid->lp_flags &= ~LP_USED;
1913 elog(ERROR, "HEAP_MOVED_OFF was expected (2)");
1917 Assert(vacpage->offsets_free == num_tuples);
1918 PageRepairFragmentation(page);
1922 /* now - free new list of reaped pages */
1923 curpage = Nvacpagelist.pagedesc;
1924 for (i = 0; i < Nvacpagelist.num_pages; i++, curpage++)
1926 pfree(Nvacpagelist.pagedesc);
1930 * Flush dirty pages out to disk. We do this unconditionally, even if
1931 * we don't need to truncate, because we want to ensure that all tuples
1932 * have correct on-row commit status on disk (see bufmgr.c's comments
1933 * for FlushRelationBuffers()).
1935 i = FlushRelationBuffers(onerel, blkno);
1937 elog(ERROR, "VACUUM (repair_frag): FlushRelationBuffers returned %d",
1940 /* truncate relation, if needed */
1941 if (blkno < nblocks)
1943 blkno = smgrtruncate(DEFAULT_SMGR, onerel, blkno);
1945 vacrelstats->num_pages = blkno; /* set new number of blocks */
1948 if (Irel != (Relation *) NULL) /* pfree index' allocations */
1950 close_indices(nindices, Irel);
1955 if (vacrelstats->vtlinks != NULL)
1956 pfree(vacrelstats->vtlinks);
1960 * vacuum_heap() -- free dead tuples
1962 * This routine marks dead tuples as unused and truncates relation
1963 * if there are "empty" end-blocks.
1966 vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
1974 nblocks = vacuum_pages->num_pages;
1975 nblocks -= vacuum_pages->empty_end_pages; /* nothing to do with
1978 for (i = 0, vacpage = vacuum_pages->pagedesc; i < nblocks; i++, vacpage++)
1980 if ((*vacpage)->offsets_free > 0)
1982 buf = ReadBuffer(onerel, (*vacpage)->blkno);
1983 page = BufferGetPage(buf);
1984 vacuum_page(page, *vacpage);
1990 * Flush dirty pages out to disk. We do this unconditionally, even if
1991 * we don't need to truncate, because we want to ensure that all tuples
1992 * have correct on-row commit status on disk (see bufmgr.c's comments
1993 * for FlushRelationBuffers()).
1995 Assert(vacrelstats->num_pages >= vacuum_pages->empty_end_pages);
1996 nblocks = vacrelstats->num_pages - vacuum_pages->empty_end_pages;
1998 i = FlushRelationBuffers(onerel, nblocks);
2000 elog(ERROR, "VACUUM (vacuum_heap): FlushRelationBuffers returned %d",
2003 /* truncate relation if there are some empty end-pages */
2004 if (vacuum_pages->empty_end_pages > 0)
2006 elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u.",
2007 RelationGetRelationName(onerel),
2008 vacrelstats->num_pages, nblocks);
2009 nblocks = smgrtruncate(DEFAULT_SMGR, onerel, nblocks);
2010 Assert(nblocks >= 0);
2011 vacrelstats->num_pages = nblocks; /* set new number of blocks */
2016 * vacuum_page() -- free dead tuples on a page
2017 * and repair its fragmentation.
2020 vacuum_page(Page page, VacPage vacpage)
2025 /* There shouldn't be any tuples moved onto the page yet! */
2026 Assert(vacpage->offsets_used == 0);
2028 for (i = 0; i < vacpage->offsets_free; i++)
2030 itemid = &(((PageHeader) page)->pd_linp[vacpage->offsets[i] - 1]);
2031 itemid->lp_flags &= ~LP_USED;
2033 PageRepairFragmentation(page);
2038 * _scan_index() -- scan one index relation to update statistic.
2042 scan_index(Relation indrel, int num_tuples)
2044 RetrieveIndexResult res;
2045 IndexScanDesc iscan;
2050 getrusage(RUSAGE_SELF, &ru0);
2052 /* walk through the entire index */
2053 iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL);
2056 while ((res = index_getnext(iscan, ForwardScanDirection))
2057 != (RetrieveIndexResult) NULL)
2063 index_endscan(iscan);
2065 /* now update statistics in pg_class */
2066 nipages = RelationGetNumberOfBlocks(indrel);
2067 update_relstats(RelationGetRelid(indrel), nipages, nitups, false, NULL);
2069 elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. %s",
2070 RelationGetRelationName(indrel), nipages, nitups,
2073 if (nitups != num_tuples)
2074 elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
2075 \n\tRecreate the index.",
2076 RelationGetRelationName(indrel), nitups, num_tuples);
2081 * vacuum_index() -- vacuum one index relation.
2083 * Vpl is the VacPageList of the heap we're currently vacuuming.
2084 * It's locked. Indrel is an index relation on the vacuumed heap.
2085 * We don't set locks on the index relation here, since the indexed
2086 * access methods support locking at different granularities.
2087 * We let them handle it.
2089 * Finally, we arrange to update the index relation's statistics in
2093 vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples)
2095 RetrieveIndexResult res;
2096 IndexScanDesc iscan;
2097 ItemPointer heapptr;
2099 int num_index_tuples;
2104 getrusage(RUSAGE_SELF, &ru0);
2106 /* walk through the entire index */
2107 iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL);
2109 num_index_tuples = 0;
2111 while ((res = index_getnext(iscan, ForwardScanDirection))
2112 != (RetrieveIndexResult) NULL)
2114 heapptr = &res->heap_iptr;
2116 if ((vp = tid_reaped(heapptr, vacpagelist)) != (VacPage) NULL)
2119 elog(DEBUG, "<%x,%x> -> <%x,%x>",
2120 ItemPointerGetBlockNumber(&(res->index_iptr)),
2121 ItemPointerGetOffsetNumber(&(res->index_iptr)),
2122 ItemPointerGetBlockNumber(&(res->heap_iptr)),
2123 ItemPointerGetOffsetNumber(&(res->heap_iptr)));
2125 if (vp->offsets_free == 0)
2127 elog(NOTICE, "Index %s: pointer to EmptyPage (blk %u off %u) - fixing",
2128 RelationGetRelationName(indrel),
2129 vp->blkno, ItemPointerGetOffsetNumber(heapptr));
2132 index_delete(indrel, &res->index_iptr);
2140 index_endscan(iscan);
2142 /* now update statistics in pg_class */
2143 num_pages = RelationGetNumberOfBlocks(indrel);
2144 update_relstats(RelationGetRelid(indrel), num_pages, num_index_tuples, false, NULL);
2146 elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. %s",
2147 RelationGetRelationName(indrel), num_pages,
2148 num_index_tuples - keep_tuples, tups_vacuumed,
2151 if (num_index_tuples != num_tuples + keep_tuples)
2152 elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
2153 \n\tRecreate the index.",
2154 RelationGetRelationName(indrel), num_index_tuples, num_tuples);
2159 * tid_reaped() -- is a particular tid reaped?
2161 * vacpagelist->VacPage_array is sorted in right order.
2164 tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
2166 OffsetNumber ioffno;
2170 VacPageData vacpage;
2172 vacpage.blkno = ItemPointerGetBlockNumber(itemptr);
2173 ioffno = ItemPointerGetOffsetNumber(itemptr);
2176 vpp = (VacPage *) vac_find_eq((void *) (vacpagelist->pagedesc),
2177 vacpagelist->num_pages, sizeof(VacPage), (void *) &vp,
2180 if (vpp == (VacPage *) NULL)
2181 return (VacPage) NULL;
2184 /* ok - we are on true page */
2186 if (vp->offsets_free == 0)
2187 { /* this is EmptyPage !!! */
2191 voff = (OffsetNumber *) vac_find_eq((void *) (vp->offsets),
2192 vp->offsets_free, sizeof(OffsetNumber), (void *) &ioffno,
2195 if (voff == (OffsetNumber *) NULL)
2196 return (VacPage) NULL;
2203 * update_relstats() -- update statistics for one relation
2205 * Statistics are stored in several places: the pg_class row for the
2206 * relation has stats about the whole relation, the pg_attribute rows
2207 * for each attribute store "dispersion", and there is a pg_statistic
2208 * row for each (non-system) attribute. (Dispersion probably ought to
2209 * be moved to pg_statistic, but it's not worth doing unless there's
2210 * another reason to have to change pg_attribute.) Dispersion and
2211 * pg_statistic values are only updated by VACUUM ANALYZE, but we
2212 * always update the stats in pg_class.
2214 * This routine works for both index and heap relation entries in
2215 * pg_class. We violate no-overwrite semantics here by storing new
2216 * values for the statistics columns directly into the pg_class
2217 * tuple that's already on the page. The reason for this is that if
2218 * we updated these tuples in the usual way, vacuuming pg_class itself
2219 * wouldn't work very well --- by the time we got done with a vacuum
2220 * cycle, most of the tuples in pg_class would've been obsoleted.
2221 * Updating pg_class's own statistics would be especially tricky.
2222 * Of course, this only works for fixed-size never-null columns, but
2226 update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex,
2227 VRelStats *vacrelstats)
2232 Form_pg_class pgcform;
2236 * update number of tuples and number of pages in pg_class
2238 rd = heap_openr(RelationRelationName, RowExclusiveLock);
2240 ctup = SearchSysCacheTupleCopy(RELOID,
2241 ObjectIdGetDatum(relid),
2243 if (!HeapTupleIsValid(ctup))
2244 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
2247 /* get the buffer cache tuple */
2248 rtup.t_self = ctup->t_self;
2249 heap_fetch(rd, SnapshotNow, &rtup, &buffer);
2250 heap_freetuple(ctup);
2252 /* overwrite the existing statistics in the tuple */
2253 pgcform = (Form_pg_class) GETSTRUCT(&rtup);
2254 pgcform->reltuples = num_tuples;
2255 pgcform->relpages = num_pages;
2256 pgcform->relhasindex = hasindex;
2258 /* invalidate the tuple in the cache and write the buffer */
2259 RelationInvalidateHeapTuple(rd, &rtup);
2260 WriteBuffer(buffer);
2262 heap_close(rd, RowExclusiveLock);
2266 * reap_page() -- save a page on the array of reaped pages.
2268 * As a side effect of the way that the vacuuming loop for a given
2269 * relation works, higher pages come after lower pages in the array
2270 * (and highest tid on a page is last).
2273 reap_page(VacPageList vacpagelist, VacPage vacpage)
2277 /* allocate a VacPageData entry */
2278 newvacpage = (VacPage) palloc(sizeof(VacPageData) + vacpage->offsets_free * sizeof(OffsetNumber));
2281 if (vacpage->offsets_free > 0)
2282 memmove(newvacpage->offsets, vacpage->offsets, vacpage->offsets_free * sizeof(OffsetNumber));
2283 newvacpage->blkno = vacpage->blkno;
2284 newvacpage->free = vacpage->free;
2285 newvacpage->offsets_used = vacpage->offsets_used;
2286 newvacpage->offsets_free = vacpage->offsets_free;
2288 /* insert this page into vacpagelist list */
2289 vpage_insert(vacpagelist, newvacpage);
2294 vpage_insert(VacPageList vacpagelist, VacPage vpnew)
2296 #define PG_NPAGEDESC 1024
2298 /* allocate a VacPage entry if needed */
2299 if (vacpagelist->num_pages == 0)
2301 vacpagelist->pagedesc = (VacPage *) palloc(PG_NPAGEDESC * sizeof(VacPage));
2302 vacpagelist->num_allocated_pages = PG_NPAGEDESC;
2304 else if (vacpagelist->num_pages >= vacpagelist->num_allocated_pages)
2306 vacpagelist->num_allocated_pages *= 2;
2307 vacpagelist->pagedesc = (VacPage *) repalloc(vacpagelist->pagedesc, vacpagelist->num_allocated_pages * sizeof(VacPage));
2309 vacpagelist->pagedesc[vacpagelist->num_pages] = vpnew;
2310 (vacpagelist->num_pages)++;
2315 vac_find_eq(void *bot, int nelem, int size, void *elm,
2316 int (*compar) (const void *, const void *))
2319 int last = nelem - 1;
2320 int celm = nelem / 2;
2324 last_move = first_move = true;
2327 if (first_move == true)
2329 res = compar(bot, elm);
2336 if (last_move == true)
2338 res = compar(elm, (void *) ((char *) bot + last * size));
2342 return (void *) ((char *) bot + last * size);
2345 res = compar(elm, (void *) ((char *) bot + celm * size));
2347 return (void *) ((char *) bot + celm * size);
2361 last = last - celm - 1;
2362 bot = (void *) ((char *) bot + (celm + 1) * size);
2363 celm = (last + 1) / 2;
2370 vac_cmp_blk(const void *left, const void *right)
2375 lblk = (*((VacPage *) left))->blkno;
2376 rblk = (*((VacPage *) right))->blkno;
2387 vac_cmp_offno(const void *left, const void *right)
2390 if (*(OffsetNumber *) left < *(OffsetNumber *) right)
2392 if (*(OffsetNumber *) left == *(OffsetNumber *) right)
2399 vac_cmp_vtlinks(const void *left, const void *right)
2402 if (((VTupleLink) left)->new_tid.ip_blkid.bi_hi <
2403 ((VTupleLink) right)->new_tid.ip_blkid.bi_hi)
2405 if (((VTupleLink) left)->new_tid.ip_blkid.bi_hi >
2406 ((VTupleLink) right)->new_tid.ip_blkid.bi_hi)
2408 /* bi_hi-es are equal */
2409 if (((VTupleLink) left)->new_tid.ip_blkid.bi_lo <
2410 ((VTupleLink) right)->new_tid.ip_blkid.bi_lo)
2412 if (((VTupleLink) left)->new_tid.ip_blkid.bi_lo >
2413 ((VTupleLink) right)->new_tid.ip_blkid.bi_lo)
2415 /* bi_lo-es are equal */
2416 if (((VTupleLink) left)->new_tid.ip_posid <
2417 ((VTupleLink) right)->new_tid.ip_posid)
2419 if (((VTupleLink) left)->new_tid.ip_posid >
2420 ((VTupleLink) right)->new_tid.ip_posid)
2428 get_indices(Relation relation, int *nindices, Relation **Irel)
2434 indexoidlist = RelationGetIndexList(relation);
2436 *nindices = length(indexoidlist);
2439 *Irel = (Relation *) palloc(*nindices * sizeof(Relation));
2444 foreach(indexoidscan, indexoidlist)
2446 Oid indexoid = lfirsti(indexoidscan);
2448 (*Irel)[i] = index_open(indexoid);
2452 freeList(indexoidlist);
2457 close_indices(int nindices, Relation *Irel)
2460 if (Irel == (Relation *) NULL)
2464 index_close(Irel[nindices]);
2471 * Obtain IndexInfo data for each index on the rel
2474 get_index_desc(Relation onerel, int nindices, Relation *Irel)
2476 IndexInfo **indexInfo;
2478 HeapTuple cachetuple;
2480 indexInfo = (IndexInfo **) palloc(nindices * sizeof(IndexInfo *));
2482 for (i = 0; i < nindices; i++)
2484 cachetuple = SearchSysCacheTuple(INDEXRELID,
2485 ObjectIdGetDatum(RelationGetRelid(Irel[i])),
2487 if (!HeapTupleIsValid(cachetuple))
2488 elog(ERROR, "get_index_desc: index %u not found",
2489 RelationGetRelid(Irel[i]));
2490 indexInfo[i] = BuildIndexInfo(cachetuple);
2498 enough_space(VacPage vacpage, Size len)
2501 len = MAXALIGN(len);
2503 if (len > vacpage->free)
2506 if (vacpage->offsets_used < vacpage->offsets_free) /* there are free
2508 return true; /* and len <= free_space */
2510 /* ok. noff_usd >= noff_free and so we'll have to allocate new itemid */
2511 if (len + MAXALIGN(sizeof(ItemIdData)) <= vacpage->free)
2520 * Compute elapsed time since ru0 usage snapshot, and format into
2521 * a displayable string. Result is in a static string, which is
2522 * tacky, but no one ever claimed that the Postgres backend is
2526 show_rusage(struct rusage * ru0)
2528 static char result[64];
2531 getrusage(RUSAGE_SELF, &ru1);
2533 if (ru1.ru_stime.tv_usec < ru0->ru_stime.tv_usec)
2535 ru1.ru_stime.tv_sec--;
2536 ru1.ru_stime.tv_usec += 1000000;
2538 if (ru1.ru_utime.tv_usec < ru0->ru_utime.tv_usec)
2540 ru1.ru_utime.tv_sec--;
2541 ru1.ru_utime.tv_usec += 1000000;
2544 snprintf(result, sizeof(result),
2545 "CPU %d.%02ds/%d.%02du sec.",
2546 (int) (ru1.ru_stime.tv_sec - ru0->ru_stime.tv_sec),
2547 (int) (ru1.ru_stime.tv_usec - ru0->ru_stime.tv_usec) / 10000,
2548 (int) (ru1.ru_utime.tv_sec - ru0->ru_utime.tv_sec),
2549 (int) (ru1.ru_utime.tv_usec - ru0->ru_utime.tv_usec) / 10000);