1 /*-------------------------------------------------------------------------
4 * the postgres vacuum cleaner
6 * Copyright (c) 1994, Regents of the University of California
10 * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.35 1997/06/07 05:18:47 momjian Exp $
12 *-------------------------------------------------------------------------
16 #include <sys/types.h>
24 #include <utils/portal.h>
25 #include <access/genam.h>
26 #include <access/heapam.h>
27 #include <access/xact.h>
28 #include <storage/bufmgr.h>
29 #include <access/transam.h>
30 #include <catalog/pg_index.h>
31 #include <catalog/index.h>
32 #include <catalog/catname.h>
33 #include <catalog/catalog.h>
34 #include <catalog/pg_class.h>
35 #include <catalog/pg_proc.h>
36 #include <catalog/pg_statistic.h>
37 #include <catalog/pg_type.h>
38 #include <catalog/pg_operator.h>
39 #include <storage/smgr.h>
40 #include <storage/lmgr.h>
41 #include <utils/inval.h>
42 #include <utils/mcxt.h>
43 #include <utils/inval.h>
44 #include <utils/syscache.h>
45 #include <utils/builtins.h>
46 #include <commands/vacuum.h>
47 #include <parser/catalog_utils.h>
48 #include <storage/bufpage.h>
49 #include "storage/shmem.h"
50 #ifndef HAVE_GETRUSAGE
51 # include <rusagestub.h>
53 # include <sys/time.h>
54 # include <sys/resource.h>
57 #include <port-protos.h>
59 bool VacuumRunning = false;
61 static Portal vc_portal;
63 static int MESSAGE_LEVEL; /* message level */
65 #define swapLong(a,b) {long tmp; tmp=a; a=b; b=tmp;}
66 #define swapInt(a,b) {int tmp; tmp=a; a=b; b=tmp;}
67 #define swapDatum(a,b) {Datum tmp; tmp=a; a=b; b=tmp;}
68 #define VacAttrStatsEqValid(stats) ( stats->f_cmpeq != NULL )
69 #define VacAttrStatsLtGtValid(stats) ( stats->f_cmplt != NULL && \
70 stats->f_cmpgt != NULL && \
71 RegProcedureIsValid(stats->outfunc) )
74 /* non-export function prototypes */
75 static void vc_init(void);
76 static void vc_shutdown(void);
77 static void vc_vacuum(NameData *VacRelP, bool analyze, List *va_cols);
78 static VRelList vc_getrels(NameData *VacRelP);
79 static void vc_vacone (Oid relid, bool analyze, List *va_cols);
80 static void vc_scanheap (VRelStats *vacrelstats, Relation onerel, VPageList Vvpl, VPageList Fvpl);
81 static void vc_rpfheap (VRelStats *vacrelstats, Relation onerel, VPageList Vvpl, VPageList Fvpl, int nindices, Relation *Irel);
82 static void vc_vacheap (VRelStats *vacrelstats, Relation onerel, VPageList vpl);
83 static void vc_vacpage (Page page, VPageDescr vpd, Relation archrel);
84 static void vc_vaconeind (VPageList vpl, Relation indrel, int nhtups);
85 static void vc_scanoneind (Relation indrel, int nhtups);
86 static void vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple htup);
87 static void vc_bucketcpy(AttributeTupleForm attr, Datum value, Datum *bucket, int16 *bucket_len);
88 static void vc_updstats(Oid relid, int npages, int ntups, bool hasindex, VRelStats *vacrelstats);
89 static void vc_delhilowstats (Oid relid, int attcnt, int *attnums);
90 static void vc_setpagelock(Relation rel, BlockNumber blkno);
91 static VPageDescr vc_tidreapped (ItemPointer itemptr, VPageList vpl);
92 static void vc_reappage (VPageList vpl, VPageDescr vpc);
93 static void vc_vpinsert (VPageList vpl, VPageDescr vpnew);
94 static void vc_free(VRelList vrl);
95 static void vc_getindices (Oid relid, int *nindices, Relation **Irel);
96 static void vc_clsindices (int nindices, Relation *Irel);
97 static Relation vc_getarchrel(Relation heaprel);
98 static void vc_archive(Relation archrel, HeapTuple htup);
99 static bool vc_isarchrel(char *rname);
100 static void vc_mkindesc (Relation onerel, int nindices, Relation *Irel, IndDesc **Idesc);
101 static char * vc_find_eq (char *bot, int nelem, int size, char *elm, int (*compar)(char *, char *));
102 static int vc_cmp_blk (char *left, char *right);
103 static int vc_cmp_offno (char *left, char *right);
104 static bool vc_enough_space (VPageDescr vpd, Size len);
107 vacuum(char *vacrel, bool verbose, bool analyze, List *va_spec)
111 PortalVariableMemory pmem;
117 * Create a portal for safe memory across transctions. We need to
118 * palloc the name space for it because our hash function expects
119 * the name to be on a longword boundary. CreatePortal copies the
120 * name to safe storage for us.
122 pname = (char *) palloc(strlen(VACPNAME) + 1);
123 strcpy(pname, VACPNAME);
124 vc_portal = CreatePortal(pname);
128 MESSAGE_LEVEL = NOTICE;
130 MESSAGE_LEVEL = DEBUG;
132 /* vacrel gets de-allocated on transaction commit */
134 strcpy(VacRel.data,vacrel);
136 pmem = PortalGetVariableMemory(vc_portal);
137 old = MemoryContextSwitchTo((MemoryContext)pmem);
139 Assert ( va_spec == NIL || analyze );
140 foreach (le, va_spec)
142 char *col = (char*)lfirst(le);
145 dest = (char*) palloc (strlen (col) + 1);
147 va_cols = lappend (va_cols, dest);
149 (void) MemoryContextSwitchTo(old);
151 /* initialize vacuum cleaner */
154 /* vacuum the database */
156 vc_vacuum (&VacRel, analyze, va_cols);
158 vc_vacuum (NULL, analyze, NIL);
160 PortalDestroy (&vc_portal);
167 * vc_init(), vc_shutdown() -- start up and shut down the vacuum cleaner.
169 * We run exactly one vacuum cleaner at a time. We use the file system
170 * to guarantee an exclusive lock on vacuuming, since a single vacuum
171 * cleaner instantiation crosses transaction boundaries, and we'd lose
172 * postgres-style locks at the end of every transaction.
174 * The strangeness with committing and starting transactions in the
175 * init and shutdown routines is due to the fact that the vacuum cleaner
176 * is invoked via a sql command, and so is already executing inside
177 * a transaction. We need to leave ourselves in a predictable state
178 * on entry and exit to the vacuum cleaner. We commit the transaction
179 * started in PostgresMain() inside vc_init(), and start one in
180 * vc_shutdown() to match the commit waiting for us back in
188 if ((fd = open("pg_vlock", O_CREAT|O_EXCL, 0600)) < 0)
189 elog(WARN, "can't create lock file -- another vacuum cleaner running?");
194 * By here, exclusive open on the lock file succeeded. If we abort
195 * for any reason during vacuuming, we need to remove the lock file.
196 * This global variable is checked in the transaction manager on xact
197 * abort, and the routine vc_abort() is called if necessary.
200 VacuumRunning = true;
202 /* matches the StartTransaction in PostgresMain() */
203 CommitTransactionCommand();
209 /* on entry, not in a transaction */
210 if (unlink("pg_vlock") < 0)
211 elog(WARN, "vacuum: can't destroy lock file!");
213 /* okay, we're done */
214 VacuumRunning = false;
216 /* matches the CommitTransaction in PostgresMain() */
217 StartTransactionCommand();
224 /* on abort, remove the vacuum cleaner lock file */
225 (void) unlink("pg_vlock");
227 VacuumRunning = false;
231 * vc_vacuum() -- vacuum the database.
233 * This routine builds a list of relations to vacuum, and then calls
234 * code that vacuums them one at a time. We are careful to vacuum each
235 * relation in a separate transaction in order to avoid holding too many
239 vc_vacuum(NameData *VacRelP, bool analyze, List *va_cols)
243 /* get list of relations */
244 vrl = vc_getrels(VacRelP);
246 if ( analyze && VacRelP == NULL && vrl != NULL )
247 vc_delhilowstats (InvalidOid, 0, NULL);
249 /* vacuum each heap relation */
250 for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
251 vc_vacone (cur->vrl_relid, analyze, va_cols);
257 vc_getrels(NameData *VacRelP)
261 HeapScanDesc pgcscan;
264 PortalVariableMemory portalmem;
275 StartTransactionCommand();
278 ScanKeyEntryInitialize(&pgckey, 0x0, Anum_pg_class_relname,
279 NameEqualRegProcedure,
280 PointerGetDatum(VacRelP->data));
282 ScanKeyEntryInitialize(&pgckey, 0x0, Anum_pg_class_relkind,
283 CharacterEqualRegProcedure, CharGetDatum('r'));
286 portalmem = PortalGetVariableMemory(vc_portal);
287 vrl = cur = (VRelList) NULL;
289 pgclass = heap_openr(RelationRelationName);
290 pgcdesc = RelationGetTupleDescriptor(pgclass);
292 pgcscan = heap_beginscan(pgclass, false, NowTimeQual, 1, &pgckey);
294 while (HeapTupleIsValid(pgctup = heap_getnext(pgcscan, 0, &buf))) {
299 * We have to be careful not to vacuum the archive (since it
300 * already contains vacuumed tuples), and not to vacuum
301 * relations on write-once storage managers like the Sony
302 * jukebox at Berkeley.
305 d = (Datum) heap_getattr(pgctup, buf, Anum_pg_class_relname,
309 /* skip archive relations */
310 if (vc_isarchrel(rname)) {
315 /* don't vacuum large objects for now - something breaks when we do */
316 if ( (strlen(rname) > 5) && rname[0] == 'x' &&
317 rname[1] == 'i' && rname[2] == 'n' &&
318 (rname[3] == 'v' || rname[3] == 'x') &&
319 rname[4] >= '0' && rname[4] <= '9')
321 elog (NOTICE, "Rel %.*s: can't vacuum LargeObjects now",
327 d = (Datum) heap_getattr(pgctup, buf, Anum_pg_class_relsmgr,
329 smgrno = DatumGetInt16(d);
331 /* skip write-once storage managers */
332 if (smgriswo(smgrno)) {
337 d = (Datum) heap_getattr(pgctup, buf, Anum_pg_class_relkind,
340 rkind = DatumGetChar(d);
342 /* skip system relations */
345 elog(NOTICE, "Vacuum: can not process index and certain system tables" );
349 /* get a relation list entry for this guy */
350 old = MemoryContextSwitchTo((MemoryContext)portalmem);
351 if (vrl == (VRelList) NULL) {
352 vrl = cur = (VRelList) palloc(sizeof(VRelListData));
354 cur->vrl_next = (VRelList) palloc(sizeof(VRelListData));
357 (void) MemoryContextSwitchTo(old);
359 cur->vrl_relid = pgctup->t_oid;
360 cur->vrl_next = (VRelList) NULL;
362 /* wei hates it if you forget to do this */
366 elog(NOTICE, "Vacuum: table not found" );
369 heap_endscan(pgcscan);
372 CommitTransactionCommand();
378 * vc_vacone() -- vacuum one heap relation
380 * This routine vacuums a single heap, cleans out its indices, and
381 * updates its statistics npages and ntups statistics.
383 * Doing one heap at a time incurs extra overhead, since we need to
384 * check that the heap exists again just before we vacuum it. The
385 * reason that we do this is so that vacuuming can be spread across
386 * many small transactions. Otherwise, two-phase locking would require
387 * us to lock the entire database during one pass of the vacuum cleaner.
390 vc_vacone (Oid relid, bool analyze, List *va_cols)
394 HeapTuple pgctup, pgttup;
396 HeapScanDesc pgcscan;
399 VPageListData Vvpl; /* List of pages to vacuum and/or clean indices */
400 VPageListData Fvpl; /* List of pages with space enough for re-using */
404 VRelStats *vacrelstats;
406 StartTransactionCommand();
408 ScanKeyEntryInitialize(&pgckey, 0x0, ObjectIdAttributeNumber,
409 ObjectIdEqualRegProcedure,
410 ObjectIdGetDatum(relid));
412 pgclass = heap_openr(RelationRelationName);
413 pgcdesc = RelationGetTupleDescriptor(pgclass);
414 pgcscan = heap_beginscan(pgclass, false, NowTimeQual, 1, &pgckey);
417 * Race condition -- if the pg_class tuple has gone away since the
418 * last time we saw it, we don't need to vacuum it.
421 if (!HeapTupleIsValid(pgctup = heap_getnext(pgcscan, 0, &pgcbuf))) {
422 heap_endscan(pgcscan);
424 CommitTransactionCommand();
428 /* now open the class and vacuum it */
429 onerel = heap_open(relid);
431 vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
432 vacrelstats->relid = relid;
433 vacrelstats->npages = vacrelstats->ntups = 0;
434 vacrelstats->hasindex = false;
435 if ( analyze && !IsSystemRelationName ((RelationGetRelationName (onerel))->data) )
437 int attr_cnt, *attnums = NULL;
438 AttributeTupleForm *attr;
440 attr_cnt = onerel->rd_att->natts;
441 attr = onerel->rd_att->attrs;
443 if ( va_cols != NIL )
448 if ( length (va_cols) > attr_cnt )
449 elog (WARN, "vacuum: too many attributes specified for relation %.*s",
450 NAMEDATALEN, (RelationGetRelationName(onerel))->data);
451 attnums = (int*) palloc (attr_cnt * sizeof (int));
452 foreach (le, va_cols)
454 char *col = (char*) lfirst(le);
456 for (i = 0; i < attr_cnt; i++)
458 if ( namestrcmp (&(attr[i]->attname), col) == 0 )
461 if ( i < attr_cnt ) /* found */
465 elog (WARN, "vacuum: there is no attribute %s in %.*s",
466 col, NAMEDATALEN, (RelationGetRelationName(onerel))->data);
472 vacrelstats->vacattrstats =
473 (VacAttrStats *) palloc (attr_cnt * sizeof(VacAttrStats));
475 for (i = 0; i < attr_cnt; i++)
477 Operator func_operator;
478 OperatorTupleForm pgopform;
481 stats = &vacrelstats->vacattrstats[i];
482 stats->attr = palloc(ATTRIBUTE_TUPLE_SIZE);
483 memmove (stats->attr, attr[((attnums) ? attnums[i] : i)], ATTRIBUTE_TUPLE_SIZE);
484 stats->best = stats->guess1 = stats->guess2 = 0;
485 stats->max = stats->min = 0;
486 stats->best_len = stats->guess1_len = stats->guess2_len = 0;
487 stats->max_len = stats->min_len = 0;
488 stats->initialized = false;
489 stats->best_cnt = stats->guess1_cnt = stats->guess1_hits = stats->guess2_hits = 0;
490 stats->max_cnt = stats->min_cnt = stats->null_cnt = stats->nonnull_cnt = 0;
492 func_operator = oper("=",stats->attr->atttypid,stats->attr->atttypid,true);
493 if (func_operator != NULL)
497 pgopform = (OperatorTupleForm) GETSTRUCT(func_operator);
498 fmgr_info (pgopform->oprcode, &(stats->f_cmpeq), &nargs);
501 stats->f_cmpeq = NULL;
503 func_operator = oper("<",stats->attr->atttypid,stats->attr->atttypid,true);
504 if (func_operator != NULL)
508 pgopform = (OperatorTupleForm) GETSTRUCT(func_operator);
509 fmgr_info (pgopform->oprcode, &(stats->f_cmplt), &nargs);
512 stats->f_cmplt = NULL;
514 func_operator = oper(">",stats->attr->atttypid,stats->attr->atttypid,true);
515 if (func_operator != NULL)
519 pgopform = (OperatorTupleForm) GETSTRUCT(func_operator);
520 fmgr_info (pgopform->oprcode, &(stats->f_cmpgt), &nargs);
523 stats->f_cmpgt = NULL;
525 pgttup = SearchSysCacheTuple(TYPOID,
526 ObjectIdGetDatum(stats->attr->atttypid),
528 if (HeapTupleIsValid(pgttup))
529 stats->outfunc = ((TypeTupleForm) GETSTRUCT(pgttup))->typoutput;
531 stats->outfunc = InvalidOid;
533 vacrelstats->va_natts = attr_cnt;
534 vc_delhilowstats (relid, ((attnums) ? attr_cnt : 0), attnums);
540 vacrelstats->va_natts = 0;
541 vacrelstats->vacattrstats = (VacAttrStats *) NULL;
544 /* we require the relation to be locked until the indices are cleaned */
545 RelationSetLockForWrite(onerel);
548 Vvpl.vpl_npages = Fvpl.vpl_npages = 0;
549 vc_scanheap(vacrelstats, onerel, &Vvpl, &Fvpl);
551 /* Now open indices */
552 Irel = (Relation *) NULL;
553 vc_getindices(vacrelstats->relid, &nindices, &Irel);
556 vacrelstats->hasindex = true;
558 vacrelstats->hasindex = false;
560 /* Clean/scan index relation(s) */
561 if ( Irel != (Relation*) NULL )
563 if ( Vvpl.vpl_npages > 0 )
565 for (i = 0; i < nindices; i++)
566 vc_vaconeind (&Vvpl, Irel[i], vacrelstats->ntups);
568 else /* just scan indices to update statistic */
570 for (i = 0; i < nindices; i++)
571 vc_scanoneind (Irel[i], vacrelstats->ntups);
575 if ( Fvpl.vpl_npages > 0 ) /* Try to shrink heap */
576 vc_rpfheap (vacrelstats, onerel, &Vvpl, &Fvpl, nindices, Irel);
579 if ( Irel != (Relation*) NULL )
580 vc_clsindices (nindices, Irel);
581 if ( Vvpl.vpl_npages > 0 ) /* Clean pages from Vvpl list */
582 vc_vacheap (vacrelstats, onerel, &Vvpl);
585 /* ok - free Vvpl list of reapped pages */
586 if ( Vvpl.vpl_npages > 0 )
588 vpp = Vvpl.vpl_pgdesc;
589 for (i = 0; i < Vvpl.vpl_npages; i++, vpp++)
591 pfree (Vvpl.vpl_pgdesc);
592 if ( Fvpl.vpl_npages > 0 )
593 pfree (Fvpl.vpl_pgdesc);
596 /* all done with this class */
598 heap_endscan(pgcscan);
601 /* update statistics in pg_class */
602 vc_updstats(vacrelstats->relid, vacrelstats->npages, vacrelstats->ntups,
603 vacrelstats->hasindex, vacrelstats);
605 /* next command frees attribute stats */
607 CommitTransactionCommand();
611 * vc_scanheap() -- scan an open heap relation
613 * This routine sets commit times, constructs Vvpl list of
614 * empty/uninitialized pages and pages with dead tuples and
615 * ~LP_USED line pointers, constructs Fvpl list of pages
616 * appropriate for purposes of shrinking and maintains statistics
617 * on the number of live tuples in a heap.
620 vc_scanheap (VRelStats *vacrelstats, Relation onerel,
621 VPageList Vvpl, VPageList Fvpl)
628 Page page, tempPage = NULL;
629 OffsetNumber offnum, maxoff;
630 bool pgchanged, tupgone, dobufrel, notup;
633 uint32 nvac, ntups, nunused, ncrash, nempg, nnepg, nchpg, nemend;
635 Size min_tlen = MAXTUPLEN;
637 int32 i/*, attr_cnt*/;
638 struct rusage ru0, ru1;
639 bool do_shrinking = true;
641 getrusage(RUSAGE_SELF, &ru0);
643 nvac = ntups = nunused = ncrash = nempg = nnepg = nchpg = nemend = 0;
646 relname = (RelationGetRelationName(onerel))->data;
648 nblocks = RelationGetNumberOfBlocks(onerel);
650 vpc = (VPageDescr) palloc (sizeof(VPageDescrData) + MaxOffsetNumber*sizeof(OffsetNumber));
653 for (blkno = 0; blkno < nblocks; blkno++) {
654 buf = ReadBuffer(onerel, blkno);
655 page = BufferGetPage(buf);
656 vpc->vpd_blkno = blkno;
659 if (PageIsNew(page)) {
660 elog (NOTICE, "Rel %.*s: Uninitialized page %u - fixing",
661 NAMEDATALEN, relname, blkno);
662 PageInit (page, BufferGetPageSize (buf), 0);
663 vpc->vpd_free = ((PageHeader)page)->pd_upper - ((PageHeader)page)->pd_lower;
664 frsize += (vpc->vpd_free - sizeof (ItemIdData));
667 vc_reappage (Vvpl, vpc);
672 if (PageIsEmpty(page)) {
673 vpc->vpd_free = ((PageHeader)page)->pd_upper - ((PageHeader)page)->pd_lower;
674 frsize += (vpc->vpd_free - sizeof (ItemIdData));
677 vc_reappage (Vvpl, vpc);
684 maxoff = PageGetMaxOffsetNumber(page);
685 for (offnum = FirstOffsetNumber;
687 offnum = OffsetNumberNext(offnum)) {
688 itemid = PageGetItemId(page, offnum);
691 * Collect un-used items too - it's possible to have
692 * indices pointing here after crash.
694 if (!ItemIdIsUsed(itemid)) {
695 vpc->vpd_voff[vpc->vpd_noff++] = offnum;
700 htup = (HeapTuple) PageGetItem(page, itemid);
703 if (!AbsoluteTimeIsBackwardCompatiblyValid(htup->t_tmin) &&
704 TransactionIdIsValid((TransactionId)htup->t_xmin)) {
706 if (TransactionIdDidAbort(htup->t_xmin)) {
708 } else if (TransactionIdDidCommit(htup->t_xmin)) {
709 htup->t_tmin = TransactionIdGetCommitTime(htup->t_xmin);
711 } else if ( !TransactionIdIsInProgress (htup->t_xmin) ) {
713 * Not Aborted, Not Committed, Not in Progress -
714 * so it from crashed process. - vadim 11/26/96
721 elog (NOTICE, "Rel %.*s: TID %u/%u: InsertTransactionInProgress %u - can't shrink relation",
722 NAMEDATALEN, relname, blkno, offnum, htup->t_xmin);
723 do_shrinking = false;
727 if (TransactionIdIsValid((TransactionId)htup->t_xmax))
729 if (TransactionIdDidAbort(htup->t_xmax))
731 StoreInvalidTransactionId(&(htup->t_xmax));
734 else if (TransactionIdDidCommit(htup->t_xmax))
736 else if ( !TransactionIdIsInProgress (htup->t_xmax) ) {
738 * Not Aborted, Not Committed, Not in Progress -
739 * so it from crashed process. - vadim 06/02/97
741 StoreInvalidTransactionId(&(htup->t_xmax));
746 elog (NOTICE, "Rel %.*s: TID %u/%u: DeleteTransactionInProgress %u - can't shrink relation",
747 NAMEDATALEN, relname, blkno, offnum, htup->t_xmax);
748 do_shrinking = false;
753 * Is it possible at all ? - vadim 11/26/96
755 if ( !TransactionIdIsValid((TransactionId)htup->t_xmin) )
757 elog (NOTICE, "Rel %.*s: TID %u/%u: INSERT_TRANSACTION_ID IS INVALID. \
758 DELETE_TRANSACTION_ID_VALID %d, TUPGONE %d.",
759 NAMEDATALEN, relname, blkno, offnum,
760 TransactionIdIsValid((TransactionId)htup->t_xmax),
765 * It's possibly! But from where it comes ?
766 * And should we fix it ? - vadim 11/28/96
768 itemptr = &(htup->t_ctid);
769 if ( !ItemPointerIsValid (itemptr) ||
770 BlockIdGetBlockNumber(&(itemptr->ip_blkid)) != blkno )
772 elog (NOTICE, "Rel %.*s: TID %u/%u: TID IN TUPLEHEADER %u/%u IS NOT THE SAME. TUPGONE %d.",
773 NAMEDATALEN, relname, blkno, offnum,
774 BlockIdGetBlockNumber(&(itemptr->ip_blkid)),
775 itemptr->ip_posid, tupgone);
781 if ( htup->t_len != itemid->lp_len )
783 elog (NOTICE, "Rel %.*s: TID %u/%u: TUPLE_LEN IN PAGEHEADER %u IS NOT THE SAME AS IN TUPLEHEADER %u. TUPGONE %d.",
784 NAMEDATALEN, relname, blkno, offnum,
785 itemid->lp_len, htup->t_len, tupgone);
787 if ( !OidIsValid(htup->t_oid) )
789 elog (NOTICE, "Rel %.*s: TID %u/%u: OID IS INVALID. TUPGONE %d.",
790 NAMEDATALEN, relname, blkno, offnum, tupgone);
796 if ( tempPage == (Page) NULL )
800 pageSize = PageGetPageSize(page);
801 tempPage = (Page) palloc(pageSize);
802 memmove (tempPage, page, pageSize);
805 lpp = &(((PageHeader) tempPage)->pd_linp[offnum - 1]);
808 lpp->lp_flags &= ~LP_USED;
810 vpc->vpd_voff[vpc->vpd_noff++] = offnum;
816 if ( htup->t_len < min_tlen )
817 min_tlen = htup->t_len;
818 if ( htup->t_len > max_tlen )
819 max_tlen = htup->t_len;
820 vc_attrstats(onerel, vacrelstats, htup);
831 if ( tempPage != (Page) NULL )
832 { /* Some tuples are gone */
833 PageRepairFragmentation(tempPage);
834 vpc->vpd_free = ((PageHeader)tempPage)->pd_upper - ((PageHeader)tempPage)->pd_lower;
835 frsize += vpc->vpd_free;
836 vc_reappage (Vvpl, vpc);
838 tempPage = (Page) NULL;
840 else if ( vpc->vpd_noff > 0 )
841 { /* there are only ~LP_USED line pointers */
842 vpc->vpd_free = ((PageHeader)page)->pd_upper - ((PageHeader)page)->pd_lower;
843 frsize += vpc->vpd_free;
844 vc_reappage (Vvpl, vpc);
856 /* save stats in the rel list for use later */
857 vacrelstats->ntups = ntups;
858 vacrelstats->npages = nblocks;
859 /* vacrelstats->natts = attr_cnt;*/
861 min_tlen = max_tlen = 0;
862 vacrelstats->min_tlen = min_tlen;
863 vacrelstats->max_tlen = max_tlen;
865 Vvpl->vpl_nemend = nemend;
866 Fvpl->vpl_nemend = nemend;
869 * Try to make Fvpl keeping in mind that we can't use free space
870 * of "empty" end-pages and last page if it reapped.
872 if ( do_shrinking && Vvpl->vpl_npages - nemend > 0 )
874 int nusf; /* blocks usefull for re-using */
876 nusf = Vvpl->vpl_npages - nemend;
877 if ( (Vvpl->vpl_pgdesc[nusf-1])->vpd_blkno == nblocks - nemend - 1 )
880 for (i = 0; i < nusf; i++)
882 vp = Vvpl->vpl_pgdesc[i];
883 if ( vc_enough_space (vp, min_tlen) )
885 vc_vpinsert (Fvpl, vp);
886 frsusf += vp->vpd_free;
891 getrusage(RUSAGE_SELF, &ru1);
893 elog (MESSAGE_LEVEL, "Rel %.*s: Pages %u: Changed %u, Reapped %u, Empty %u, New %u; \
894 Tup %u: Vac %u, Crash %u, UnUsed %u, MinLen %u, MaxLen %u; Re-using: Free/Avail. Space %u/%u; EndEmpty/Avail. Pages %u/%u. Elapsed %u/%u sec.",
895 NAMEDATALEN, relname,
896 nblocks, nchpg, Vvpl->vpl_npages, nempg, nnepg,
897 ntups, nvac, ncrash, nunused, min_tlen, max_tlen,
898 frsize, frsusf, nemend, Fvpl->vpl_npages,
899 ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
900 ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
906 * vc_rpfheap() -- try to repaire relation' fragmentation
908 * This routine marks dead tuples as unused and tries re-use dead space
909 * by moving tuples (and inserting indices if needed). It constructs
910 * Nvpl list of free-ed pages (moved tuples) and clean indices
911 * for them after committing (in hack-manner - without losing locks
912 * and freeing memory!) current transaction. It truncates relation
913 * if some end-blocks are gone away.
916 vc_rpfheap (VRelStats *vacrelstats, Relation onerel,
917 VPageList Vvpl, VPageList Fvpl, int nindices, Relation *Irel)
921 AbsoluteTime myCTM = 0;
924 Page page, ToPage = NULL;
925 OffsetNumber offnum = 0, maxoff = 0, newoff, moff;
926 ItemId itemid, newitemid;
927 HeapTuple htup, newtup;
928 TupleDesc tupdesc = NULL;
929 Datum *idatum = NULL;
931 InsertIndexResult iresult;
933 VPageDescr ToVpd = NULL, Fvplast, Vvplast, vpc, *vpp;
935 IndDesc *Idesc, *idcur;
936 int Fblklast, Vblklast, i;
938 int nmoved, Fnpages, Vnpages;
940 bool isempty, dowrite;
942 struct rusage ru0, ru1;
944 getrusage(RUSAGE_SELF, &ru0);
946 myXID = GetCurrentTransactionId();
947 myCID = GetCurrentCommandId();
949 if ( Irel != (Relation*) NULL ) /* preparation for index' inserts */
951 vc_mkindesc (onerel, nindices, Irel, &Idesc);
952 tupdesc = RelationGetTupleDescriptor(onerel);
953 idatum = (Datum *) palloc(INDEX_MAX_KEYS * sizeof (*idatum));
954 inulls = (char *) palloc(INDEX_MAX_KEYS * sizeof (*inulls));
957 /* if the relation has an archive, open it */
958 if (onerel->rd_rel->relarch != 'n')
960 archrel = vc_getarchrel(onerel);
961 /* Archive tuples from "empty" end-pages */
962 for ( vpp = Vvpl->vpl_pgdesc + Vvpl->vpl_npages - 1,
963 i = Vvpl->vpl_nemend; i > 0; i--, vpp-- )
965 if ( (*vpp)->vpd_noff > 0 )
967 buf = ReadBuffer(onerel, (*vpp)->vpd_blkno);
968 page = BufferGetPage(buf);
969 Assert ( !PageIsEmpty(page) );
970 vc_vacpage (page, *vpp, archrel);
976 archrel = (Relation) NULL;
979 Fnpages = Fvpl->vpl_npages;
980 Fvplast = Fvpl->vpl_pgdesc[Fnpages - 1];
981 Fblklast = Fvplast->vpd_blkno;
982 Assert ( Vvpl->vpl_npages > Vvpl->vpl_nemend );
983 Vnpages = Vvpl->vpl_npages - Vvpl->vpl_nemend;
984 Vvplast = Vvpl->vpl_pgdesc[Vnpages - 1];
985 Vblklast = Vvplast->vpd_blkno;
986 Assert ( Vblklast >= Fblklast );
987 ToBuf = InvalidBuffer;
990 vpc = (VPageDescr) palloc (sizeof(VPageDescrData) + MaxOffsetNumber*sizeof(OffsetNumber));
991 vpc->vpd_nusd = vpc->vpd_noff = 0;
993 nblocks = vacrelstats->npages;
994 for (blkno = nblocks - Vvpl->vpl_nemend - 1; ; blkno--)
996 /* if it's reapped page and it was used by me - quit */
997 if ( blkno == Fblklast && Fvplast->vpd_nusd > 0 )
1000 buf = ReadBuffer(onerel, blkno);
1001 page = BufferGetPage(buf);
1005 isempty = PageIsEmpty(page);
1008 if ( blkno == Vblklast ) /* it's reapped page */
1010 if ( Vvplast->vpd_noff > 0 ) /* there are dead tuples */
1011 { /* on this page - clean */
1012 Assert ( ! isempty );
1013 vc_vacpage (page, Vvplast, archrel);
1021 Assert ( Vnpages > 0 );
1022 /* get prev reapped page from Vvpl */
1023 Vvplast = Vvpl->vpl_pgdesc[Vnpages - 1];
1024 Vblklast = Vvplast->vpd_blkno;
1025 if ( blkno == Fblklast ) /* this page in Fvpl too */
1028 Assert ( Fnpages > 0 );
1029 Assert ( Fvplast->vpd_nusd == 0 );
1030 /* get prev reapped page from Fvpl */
1031 Fvplast = Fvpl->vpl_pgdesc[Fnpages - 1];
1032 Fblklast = Fvplast->vpd_blkno;
1034 Assert ( Fblklast <= Vblklast );
1043 Assert ( ! isempty );
1046 vpc->vpd_blkno = blkno;
1047 maxoff = PageGetMaxOffsetNumber(page);
1048 for (offnum = FirstOffsetNumber;
1050 offnum = OffsetNumberNext(offnum))
1052 itemid = PageGetItemId(page, offnum);
1054 if (!ItemIdIsUsed(itemid))
1057 htup = (HeapTuple) PageGetItem(page, itemid);
1060 /* try to find new page for this tuple */
1061 if ( ToBuf == InvalidBuffer ||
1062 ! vc_enough_space (ToVpd, tlen) )
1064 if ( ToBuf != InvalidBuffer )
1067 ToBuf = InvalidBuffer;
1069 * If no one tuple can't be added to this page -
1070 * remove page from Fvpl. - vadim 11/27/96
1072 if ( !vc_enough_space (ToVpd, vacrelstats->min_tlen) )
1074 if ( ToVpd != Fvplast )
1076 Assert ( Fnpages > ToVpI + 1 );
1077 memmove (Fvpl->vpl_pgdesc + ToVpI,
1078 Fvpl->vpl_pgdesc + ToVpI + 1,
1079 sizeof (VPageDescr*) * (Fnpages - ToVpI - 1));
1081 Assert ( Fnpages >= 1 );
1085 /* get prev reapped page from Fvpl */
1086 Fvplast = Fvpl->vpl_pgdesc[Fnpages - 1];
1087 Fblklast = Fvplast->vpd_blkno;
1090 for (i=0; i < Fnpages; i++)
1092 if ( vc_enough_space (Fvpl->vpl_pgdesc[i], tlen) )
1096 break; /* can't move item anywhere */
1098 ToVpd = Fvpl->vpl_pgdesc[ToVpI];
1099 ToBuf = ReadBuffer(onerel, ToVpd->vpd_blkno);
1100 ToPage = BufferGetPage(ToBuf);
1101 /* if this page was not used before - clean it */
1102 if ( ! PageIsEmpty(ToPage) && ToVpd->vpd_nusd == 0 )
1103 vc_vacpage (ToPage, ToVpd, archrel);
1107 newtup = (HeapTuple) palloc (tlen);
1108 memmove((char *) newtup, (char *) htup, tlen);
1110 /* store transaction information */
1111 TransactionIdStore(myXID, &(newtup->t_xmin));
1112 newtup->t_cmin = myCID;
1113 StoreInvalidTransactionId(&(newtup->t_xmax));
1114 newtup->t_tmin = INVALID_ABSTIME;
1115 newtup->t_tmax = CURRENT_ABSTIME;
1116 ItemPointerSetInvalid(&newtup->t_chain);
1118 /* add tuple to the page */
1119 newoff = PageAddItem (ToPage, (Item)newtup, tlen,
1120 InvalidOffsetNumber, LP_USED);
1121 if ( newoff == InvalidOffsetNumber )
1124 failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
1125 tlen, ToVpd->vpd_blkno, ToVpd->vpd_free,
1126 ToVpd->vpd_nusd, ToVpd->vpd_noff);
1128 newitemid = PageGetItemId(ToPage, newoff);
1130 newtup = (HeapTuple) PageGetItem(ToPage, newitemid);
1131 ItemPointerSet(&(newtup->t_ctid), ToVpd->vpd_blkno, newoff);
1133 /* now logically delete end-tuple */
1134 TransactionIdStore(myXID, &(htup->t_xmax));
1135 htup->t_cmax = myCID;
1136 memmove ((char*)&(htup->t_chain), (char*)&(newtup->t_ctid), sizeof (newtup->t_ctid));
1140 ToVpd->vpd_free = ((PageHeader)ToPage)->pd_upper - ((PageHeader)ToPage)->pd_lower;
1141 vpc->vpd_voff[vpc->vpd_noff++] = offnum;
1143 /* insert index' tuples if needed */
1144 if ( Irel != (Relation*) NULL )
1146 for (i = 0, idcur = Idesc; i < nindices; i++, idcur++)
1150 (AttrNumber *)&(idcur->tform->indkey[0]),
1157 iresult = index_insert (
1163 if (iresult) pfree(iresult);
1167 } /* walk along page */
1169 if ( vpc->vpd_noff > 0 ) /* some tuples were moved */
1171 vc_reappage (&Nvpl, vpc);
1179 if ( offnum <= maxoff )
1180 break; /* some item(s) left */
1182 } /* walk along relation */
1184 blkno++; /* new number of blocks */
1186 if ( ToBuf != InvalidBuffer )
1188 Assert (nmoved > 0);
1195 * We have to commit our tuple' movings before we'll truncate
1196 * relation, but we shouldn't lose our locks. And so - quick hack:
1197 * flush buffers and record status of current transaction
1198 * as committed, and continue. - vadim 11/13/96
1200 FlushBufferPool(!TransactionFlushEnabled());
1201 TransactionIdCommit(myXID);
1202 FlushBufferPool(!TransactionFlushEnabled());
1203 myCTM = TransactionIdGetCommitTime(myXID);
1207 * Clean uncleaned reapped pages from Vvpl list
1208 * and set commit' times for inserted tuples
1211 for (i = 0, vpp = Vvpl->vpl_pgdesc; i < Vnpages; i++, vpp++)
1213 Assert ( (*vpp)->vpd_blkno < blkno );
1214 buf = ReadBuffer(onerel, (*vpp)->vpd_blkno);
1215 page = BufferGetPage(buf);
1216 if ( (*vpp)->vpd_nusd == 0 ) /* this page was not used */
1218 /* noff == 0 in empty pages only - such pages should be re-used */
1219 Assert ( (*vpp)->vpd_noff > 0 );
1220 vc_vacpage (page, *vpp, archrel);
1222 else /* this page was used */
1225 moff = PageGetMaxOffsetNumber(page);
1226 for (newoff = FirstOffsetNumber;
1228 newoff = OffsetNumberNext(newoff))
1230 itemid = PageGetItemId(page, newoff);
1231 if (!ItemIdIsUsed(itemid))
1233 htup = (HeapTuple) PageGetItem(page, itemid);
1234 if ( TransactionIdEquals((TransactionId)htup->t_xmin, myXID) )
1236 htup->t_tmin = myCTM;
1240 Assert ( (*vpp)->vpd_nusd == ntups );
1245 Assert ( nmoved == nchkmvd );
1247 getrusage(RUSAGE_SELF, &ru1);
1249 elog (MESSAGE_LEVEL, "Rel %.*s: Pages: %u --> %u; Tuple(s) moved: %u. \
1250 Elapsed %u/%u sec.",
1251 NAMEDATALEN, (RelationGetRelationName(onerel))->data,
1252 nblocks, blkno, nmoved,
1253 ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
1254 ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
1256 if ( Nvpl.vpl_npages > 0 )
1258 /* vacuum indices again if needed */
1259 if ( Irel != (Relation*) NULL )
1261 VPageDescr *vpleft, *vpright, vpsave;
1263 /* re-sort Nvpl.vpl_pgdesc */
1264 for (vpleft = Nvpl.vpl_pgdesc,
1265 vpright = Nvpl.vpl_pgdesc + Nvpl.vpl_npages - 1;
1266 vpleft < vpright; vpleft++, vpright--)
1268 vpsave = *vpleft; *vpleft = *vpright; *vpright = vpsave;
1270 for (i = 0; i < nindices; i++)
1271 vc_vaconeind (&Nvpl, Irel[i], vacrelstats->ntups);
1275 * clean moved tuples from last page in Nvpl list
1276 * if some tuples left there
1278 if ( vpc->vpd_noff > 0 && offnum <= maxoff )
1280 Assert (vpc->vpd_blkno == blkno - 1);
1281 buf = ReadBuffer(onerel, vpc->vpd_blkno);
1282 page = BufferGetPage (buf);
1285 for (offnum = FirstOffsetNumber;
1287 offnum = OffsetNumberNext(offnum))
1289 itemid = PageGetItemId(page, offnum);
1290 if (!ItemIdIsUsed(itemid))
1292 htup = (HeapTuple) PageGetItem(page, itemid);
1293 Assert ( TransactionIdEquals((TransactionId)htup->t_xmax, myXID) );
1294 itemid->lp_flags &= ~LP_USED;
1297 Assert ( vpc->vpd_noff == ntups );
1298 PageRepairFragmentation(page);
1302 /* now - free new list of reapped pages */
1303 vpp = Nvpl.vpl_pgdesc;
1304 for (i = 0; i < Nvpl.vpl_npages; i++, vpp++)
1306 pfree (Nvpl.vpl_pgdesc);
1309 /* truncate relation */
1310 if ( blkno < nblocks )
1312 blkno = smgrtruncate (onerel->rd_rel->relsmgr, onerel, blkno);
1313 Assert ( blkno >= 0 );
1314 vacrelstats->npages = blkno; /* set new number of blocks */
1317 if ( archrel != (Relation) NULL )
1318 heap_close(archrel);
1320 if ( Irel != (Relation*) NULL ) /* pfree index' allocations */
1325 vc_clsindices (nindices, Irel);
1333 * vc_vacheap() -- free dead tuples
1335 * This routine marks dead tuples as unused and truncates relation
1336 * if there are "empty" end-blocks.
1339 vc_vacheap (VRelStats *vacrelstats, Relation onerel, VPageList Vvpl)
1348 nblocks = Vvpl->vpl_npages;
1349 /* if the relation has an archive, open it */
1350 if (onerel->rd_rel->relarch != 'n')
1351 archrel = vc_getarchrel(onerel);
1354 archrel = (Relation) NULL;
1355 nblocks -= Vvpl->vpl_nemend; /* nothing to do with them */
1358 for (i = 0, vpp = Vvpl->vpl_pgdesc; i < nblocks; i++, vpp++)
1360 if ( (*vpp)->vpd_noff > 0 )
1362 buf = ReadBuffer(onerel, (*vpp)->vpd_blkno);
1363 page = BufferGetPage (buf);
1364 vc_vacpage (page, *vpp, archrel);
1369 /* truncate relation if there are some empty end-pages */
1370 if ( Vvpl->vpl_nemend > 0 )
1372 Assert ( vacrelstats->npages >= Vvpl->vpl_nemend );
1373 nblocks = vacrelstats->npages - Vvpl->vpl_nemend;
1374 elog (MESSAGE_LEVEL, "Rel %.*s: Pages: %u --> %u.",
1375 NAMEDATALEN, (RelationGetRelationName(onerel))->data,
1376 vacrelstats->npages, nblocks);
1379 * we have to flush "empty" end-pages (if changed, but who knows it)
1382 FlushBufferPool(!TransactionFlushEnabled());
1384 nblocks = smgrtruncate (onerel->rd_rel->relsmgr, onerel, nblocks);
1385 Assert ( nblocks >= 0 );
1386 vacrelstats->npages = nblocks; /* set new number of blocks */
1389 if ( archrel != (Relation) NULL )
1390 heap_close(archrel);
1395 * vc_vacpage() -- free (and archive if needed) dead tuples on a page
1396 * and repaire its fragmentation.
1399 vc_vacpage (Page page, VPageDescr vpd, Relation archrel)
1405 Assert ( vpd->vpd_nusd == 0 );
1406 for (i=0; i < vpd->vpd_noff; i++)
1408 itemid = &(((PageHeader) page)->pd_linp[vpd->vpd_voff[i] - 1]);
1409 if ( archrel != (Relation) NULL && ItemIdIsUsed(itemid) )
1411 htup = (HeapTuple) PageGetItem (page, itemid);
1412 vc_archive (archrel, htup);
1414 itemid->lp_flags &= ~LP_USED;
1416 PageRepairFragmentation(page);
1421 * _vc_scanoneind() -- scan one index relation to update statistic.
1425 vc_scanoneind (Relation indrel, int nhtups)
1427 RetrieveIndexResult res;
1428 IndexScanDesc iscan;
1431 struct rusage ru0, ru1;
1433 getrusage(RUSAGE_SELF, &ru0);
1435 /* walk through the entire index */
1436 iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL);
1439 while ((res = index_getnext(iscan, ForwardScanDirection))
1440 != (RetrieveIndexResult) NULL)
1446 index_endscan(iscan);
1448 /* now update statistics in pg_class */
1449 nipages = RelationGetNumberOfBlocks(indrel);
1450 vc_updstats(indrel->rd_id, nipages, nitups, false, NULL);
1452 getrusage(RUSAGE_SELF, &ru1);
1454 elog (MESSAGE_LEVEL, "Ind %.*s: Pages %u; Tuples %u. Elapsed %u/%u sec.",
1455 NAMEDATALEN, indrel->rd_rel->relname.data, nipages, nitups,
1456 ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
1457 ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
1459 if ( nitups != nhtups )
1460 elog (NOTICE, "Ind %.*s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u)",
1461 NAMEDATALEN, indrel->rd_rel->relname.data, nitups, nhtups);
1463 } /* vc_scanoneind */
1466 * vc_vaconeind() -- vacuum one index relation.
1468 * Vpl is the VPageList of the heap we're currently vacuuming.
1469 * It's locked. Indrel is an index relation on the vacuumed heap.
1470 * We don't set locks on the index relation here, since the indexed
1471 * access methods support locking at different granularities.
1472 * We let them handle it.
1474 * Finally, we arrange to update the index relation's statistics in
1478 vc_vaconeind(VPageList vpl, Relation indrel, int nhtups)
1480 RetrieveIndexResult res;
1481 IndexScanDesc iscan;
1482 ItemPointer heapptr;
1487 struct rusage ru0, ru1;
1489 getrusage(RUSAGE_SELF, &ru0);
1491 /* walk through the entire index */
1492 iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL);
1496 while ((res = index_getnext(iscan, ForwardScanDirection))
1497 != (RetrieveIndexResult) NULL) {
1498 heapptr = &res->heap_iptr;
1500 if ( (vp = vc_tidreapped (heapptr, vpl)) != (VPageDescr) NULL)
1503 elog(DEBUG, "<%x,%x> -> <%x,%x>",
1504 ItemPointerGetBlockNumber(&(res->index_iptr)),
1505 ItemPointerGetOffsetNumber(&(res->index_iptr)),
1506 ItemPointerGetBlockNumber(&(res->heap_iptr)),
1507 ItemPointerGetOffsetNumber(&(res->heap_iptr)));
1509 if ( vp->vpd_noff == 0 )
1510 { /* this is EmptyPage !!! */
1511 elog (NOTICE, "Ind %.*s: pointer to EmptyPage (blk %u off %u) - fixing",
1512 NAMEDATALEN, indrel->rd_rel->relname.data,
1513 vp->vpd_blkno, ItemPointerGetOffsetNumber(heapptr));
1516 index_delete(indrel, &res->index_iptr);
1525 index_endscan(iscan);
1527 /* now update statistics in pg_class */
1528 nipages = RelationGetNumberOfBlocks(indrel);
1529 vc_updstats(indrel->rd_id, nipages, nitups, false, NULL);
1531 getrusage(RUSAGE_SELF, &ru1);
1533 elog (MESSAGE_LEVEL, "Ind %.*s: Pages %u; Tuples %u: Deleted %u. Elapsed %u/%u sec.",
1534 NAMEDATALEN, indrel->rd_rel->relname.data, nipages, nitups, nvac,
1535 ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
1536 ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
1538 if ( nitups != nhtups )
1539 elog (NOTICE, "Ind %.*s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u)",
1540 NAMEDATALEN, indrel->rd_rel->relname.data, nitups, nhtups);
1542 } /* vc_vaconeind */
1545 * vc_tidreapped() -- is a particular tid reapped?
1547 * vpl->VPageDescr_array is sorted in right order.
1550 vc_tidreapped(ItemPointer itemptr, VPageList vpl)
1552 OffsetNumber ioffno;
1554 VPageDescr vp, *vpp;
1557 vpd.vpd_blkno = ItemPointerGetBlockNumber(itemptr);
1558 ioffno = ItemPointerGetOffsetNumber(itemptr);
1561 vpp = (VPageDescr*) vc_find_eq ((char*)(vpl->vpl_pgdesc),
1562 vpl->vpl_npages, sizeof (VPageDescr), (char*)&vp,
1565 if ( vpp == (VPageDescr*) NULL )
1566 return ((VPageDescr)NULL);
1569 /* ok - we are on true page */
1571 if ( vp->vpd_noff == 0 ) { /* this is EmptyPage !!! */
1575 voff = (OffsetNumber*) vc_find_eq ((char*)(vp->vpd_voff),
1576 vp->vpd_noff, sizeof (OffsetNumber), (char*)&ioffno,
1579 if ( voff == (OffsetNumber*) NULL )
1580 return ((VPageDescr)NULL);
1584 } /* vc_tidreapped */
1587 * vc_attrstats() -- compute column statistics used by the optimzer
1589 * We compute the column min, max, null and non-null counts.
1590 * Plus we attempt to find the count of the value that occurs most
1591 * frequently in each column
1592 * These figures are used to compute the selectivity of the column
1594 * We use a three-bucked cache to get the most frequent item
1595 * The 'guess' buckets count hits. A cache miss causes guess1
1596 * to get the most hit 'guess' item in the most recent cycle, and
1597 * the new item goes into guess2. Whenever the total count of hits
1598 * of a 'guess' entry is larger than 'best', 'guess' becomes 'best'.
1600 * This method works perfectly for columns with unique values, and columns
1601 * with only two unique values, plus nulls.
1603 * It becomes less perfect as the number of unique values increases and
1604 * their distribution in the table becomes more random.
1608 vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple htup)
1610 int i, attr_cnt = vacrelstats->va_natts;
1611 VacAttrStats *vacattrstats = vacrelstats->vacattrstats;
1612 TupleDesc tupDesc = onerel->rd_att;
1616 for (i = 0; i < attr_cnt; i++) {
1617 VacAttrStats *stats = &vacattrstats[i];
1618 bool value_hit = true;
1620 value = (Datum) heap_getattr (htup, InvalidBuffer,
1621 stats->attr->attnum, tupDesc, &isnull);
1623 if (!VacAttrStatsEqValid(stats))
1629 stats->nonnull_cnt++;
1630 if (stats->initialized == false) {
1631 vc_bucketcpy(stats->attr, value, &stats->best, &stats->best_len);
1632 /* best_cnt gets incremented later */
1633 vc_bucketcpy(stats->attr, value, &stats->guess1, &stats->guess1_len);
1634 stats->guess1_cnt = stats->guess1_hits = 1;
1635 vc_bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
1636 stats->guess2_hits = 1;
1637 if (VacAttrStatsLtGtValid(stats)) {
1638 vc_bucketcpy(stats->attr, value, &stats->max , &stats->max_len);
1639 vc_bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
1641 stats->initialized = true;
1643 if (VacAttrStatsLtGtValid(stats)) {
1644 if ( (*(stats->f_cmplt)) (value,stats->min) ) {
1645 vc_bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
1648 if ( (*(stats->f_cmpgt)) (value,stats->max) ) {
1649 vc_bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
1652 if ( (*(stats->f_cmpeq)) (value,stats->min) )
1654 else if ( (*(stats->f_cmpeq)) (value,stats->max) )
1657 if ( (*(stats->f_cmpeq)) (value,stats->best) )
1659 else if ( (*(stats->f_cmpeq)) (value,stats->guess1) ) {
1660 stats->guess1_cnt++;
1661 stats->guess1_hits++;
1663 else if ( (*(stats->f_cmpeq)) (value,stats->guess2) )
1664 stats->guess2_hits++;
1665 else value_hit = false;
1667 if (stats->guess2_hits > stats->guess1_hits) {
1668 swapDatum(stats->guess1,stats->guess2);
1669 swapInt(stats->guess1_len,stats->guess2_len);
1670 stats->guess1_cnt = stats->guess2_hits;
1671 swapLong(stats->guess1_hits, stats->guess2_hits);
1673 if (stats->guess1_cnt > stats->best_cnt) {
1674 swapDatum(stats->best,stats->guess1);
1675 swapInt(stats->best_len,stats->guess1_len);
1676 swapLong(stats->best_cnt,stats->guess1_cnt);
1677 stats->guess1_hits = 1;
1678 stats->guess2_hits = 1;
1681 vc_bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
1682 stats->guess1_hits = 1;
1683 stats->guess2_hits = 1;
1691 * vc_bucketcpy() -- update pg_class statistics for one relation
1695 vc_bucketcpy(AttributeTupleForm attr, Datum value, Datum *bucket, int16 *bucket_len)
1697 if (attr->attbyval && attr->attlen != -1)
1700 int len = (attr->attlen != -1 ? attr->attlen : VARSIZE(value));
1702 if (len > *bucket_len)
1704 if (*bucket_len != 0)
1705 pfree(DatumGetPointer(*bucket));
1706 *bucket = PointerGetDatum(palloc(len));
1709 memmove(DatumGetPointer(*bucket), DatumGetPointer(value), len);
1714 * vc_updstats() -- update pg_class statistics for one relation
1716 * This routine works for both index and heap relation entries in
1717 * pg_class. We violate no-overwrite semantics here by storing new
1718 * values for ntups, npages, and hasindex directly in the pg_class
1719 * tuple that's already on the page. The reason for this is that if
1720 * we updated these tuples in the usual way, then every tuple in pg_class
1721 * would be replaced every day. This would make planning and executing
1722 * historical queries very expensive.
1725 vc_updstats(Oid relid, int npages, int ntups, bool hasindex, VRelStats *vacrelstats)
1727 Relation rd, ad, sd;
1728 HeapScanDesc rsdesc, asdesc;
1730 HeapTuple rtup, atup, stup;
1732 Form_pg_class pgcform;
1733 ScanKeyData rskey, askey;
1734 AttributeTupleForm attp;
1737 * update number of tuples and number of pages in pg_class
1739 ScanKeyEntryInitialize(&rskey, 0x0, ObjectIdAttributeNumber,
1740 ObjectIdEqualRegProcedure,
1741 ObjectIdGetDatum(relid));
1743 rd = heap_openr(RelationRelationName);
1744 rsdesc = heap_beginscan(rd, false, NowTimeQual, 1, &rskey);
1746 if (!HeapTupleIsValid(rtup = heap_getnext(rsdesc, 0, &rbuf)))
1747 elog(WARN, "pg_class entry for relid %d vanished during vacuuming",
1750 /* overwrite the existing statistics in the tuple */
1751 vc_setpagelock(rd, BufferGetBlockNumber(rbuf));
1752 pgcform = (Form_pg_class) GETSTRUCT(rtup);
1753 pgcform->reltuples = ntups;
1754 pgcform->relpages = npages;
1755 pgcform->relhasindex = hasindex;
1757 if ( vacrelstats != NULL && vacrelstats->va_natts > 0 )
1759 VacAttrStats *vacattrstats = vacrelstats->vacattrstats;
1760 int natts = vacrelstats->va_natts;
1762 ad = heap_openr(AttributeRelationName);
1763 sd = heap_openr(StatisticRelationName);
1764 ScanKeyEntryInitialize(&askey, 0, Anum_pg_attribute_attrelid,
1767 asdesc = heap_beginscan(ad, false, NowTimeQual, 1, &askey);
1769 while (HeapTupleIsValid(atup = heap_getnext(asdesc, 0, &abuf)))
1772 double selratio; /* average ratio of rows selected for a random constant */
1773 VacAttrStats *stats;
1774 Datum values[ Natts_pg_statistic ];
1775 char nulls[ Natts_pg_statistic ];
1777 attp = (AttributeTupleForm) GETSTRUCT(atup);
1778 if ( attp->attnum <= 0) /* skip system attributes for now, */
1779 /* they are unique anyway */
1782 for (i = 0; i < natts; i++)
1784 if ( attp->attnum == vacattrstats[i].attr->attnum )
1789 stats = &(vacattrstats[i]);
1791 /* overwrite the existing statistics in the tuple */
1792 if (VacAttrStatsEqValid(stats)) {
1794 vc_setpagelock(ad, BufferGetBlockNumber(abuf));
1796 if (stats->nonnull_cnt + stats->null_cnt == 0 ||
1797 (stats->null_cnt <= 1 && stats->best_cnt == 1))
1799 else if (VacAttrStatsLtGtValid(stats) && stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
1801 double min_cnt_d = stats->min_cnt,
1802 max_cnt_d = stats->max_cnt,
1803 null_cnt_d = stats->null_cnt,
1804 nonnullcnt_d = stats->nonnull_cnt; /* prevent overflow */
1805 selratio = (min_cnt_d*min_cnt_d+max_cnt_d*max_cnt_d+null_cnt_d*null_cnt_d)/
1806 (nonnullcnt_d+null_cnt_d)/(nonnullcnt_d+null_cnt_d);
1809 double most = (double)(stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt);
1810 double total = ((double)stats->nonnull_cnt)+((double)stats->null_cnt);
1811 /* we assume count of other values are 20%
1812 of best count in table */
1813 selratio = (most*most + 0.20*most*(total-most))/total/total;
1817 attp->attnvals = (selratio ? (selratio * ATTNVALS_SCALE) : 0);
1818 WriteNoReleaseBuffer(abuf);
1820 /* DO PG_STATISTIC INSERTS */
1822 /* doing system relations, especially pg_statistic is a problem */
1823 if (VacAttrStatsLtGtValid(stats) && stats->initialized /* &&
1824 !IsSystemRelationName(pgcform->relname.data)*/) {
1825 func_ptr out_function;
1829 for (i = 0; i < Natts_pg_statistic; ++i) nulls[i] = ' ';
1832 * initialize values[]
1836 values[i++] = (Datum) relid; /* 1 */
1837 values[i++] = (Datum) attp->attnum; /* 2 */
1838 values[i++] = (Datum) InvalidOid; /* 3 */
1839 fmgr_info(stats->outfunc, &out_function, &dummy);
1840 out_string = (*out_function)(stats->min, stats->attr->atttypid);
1841 values[i++] = (Datum) fmgr(TextInRegProcedure,out_string);
1843 out_string = (char *)(*out_function)(stats->max, stats->attr->atttypid);
1844 values[i++] = (Datum) fmgr(TextInRegProcedure,out_string);
1849 stup = heap_formtuple(sdesc, values, nulls);
1852 * insert the tuple in the relation and get the tuple's oid.
1855 heap_insert(sd, stup);
1856 pfree(DatumGetPointer(values[3]));
1857 pfree(DatumGetPointer(values[4]));
1862 heap_endscan(asdesc);
1867 /* XXX -- after write, should invalidate relcache in other backends */
1868 WriteNoReleaseBuffer(rbuf); /* heap_endscan release scan' buffers ? */
1870 /* invalidating system relations confuses the function cache
1871 of pg_operator and pg_opclass */
1872 if ( !IsSystemRelationName(pgcform->relname.data))
1873 RelationInvalidateHeapTuple(rd, rtup);
1875 /* that's all, folks */
1876 heap_endscan(rsdesc);
1881 * vc_delhilowstats() -- delete pg_statistics rows
1885 vc_delhilowstats(Oid relid, int attcnt, int *attnums)
1887 Relation pgstatistic;
1888 HeapScanDesc pgsscan;
1892 pgstatistic = heap_openr(StatisticRelationName);
1894 if (relid != InvalidOid ) {
1895 ScanKeyEntryInitialize(&pgskey, 0x0, Anum_pg_statistic_starelid,
1896 ObjectIdEqualRegProcedure,
1897 ObjectIdGetDatum(relid));
1898 pgsscan = heap_beginscan(pgstatistic, false, NowTimeQual, 1, &pgskey);
1901 pgsscan = heap_beginscan(pgstatistic, false, NowTimeQual, 0, NULL);
1903 while (HeapTupleIsValid(pgstup = heap_getnext(pgsscan, 0, NULL)))
1907 Form_pg_statistic pgs = (Form_pg_statistic) GETSTRUCT (pgstup);
1910 for (i = 0; i < attcnt; i++)
1912 if ( pgs->staattnum == attnums[i] + 1 )
1916 continue; /* don't delete it */
1918 heap_delete(pgstatistic, &pgstup->t_ctid);
1921 heap_endscan(pgsscan);
1922 heap_close(pgstatistic);
1925 static void vc_setpagelock(Relation rel, BlockNumber blkno)
1927 ItemPointerData itm;
1929 ItemPointerSet(&itm, blkno, 1);
1931 RelationSetLockForWritePage(rel, &itm);
1935 * vc_reappage() -- save a page on the array of reapped pages.
1937 * As a side effect of the way that the vacuuming loop for a given
1938 * relation works, higher pages come after lower pages in the array
1939 * (and highest tid on a page is last).
1942 vc_reappage(VPageList vpl, VPageDescr vpc)
1946 /* allocate a VPageDescrData entry */
1947 newvpd = (VPageDescr) palloc(sizeof(VPageDescrData) + vpc->vpd_noff*sizeof(OffsetNumber));
1950 if ( vpc->vpd_noff > 0 )
1951 memmove (newvpd->vpd_voff, vpc->vpd_voff, vpc->vpd_noff*sizeof(OffsetNumber));
1952 newvpd->vpd_blkno = vpc->vpd_blkno;
1953 newvpd->vpd_free = vpc->vpd_free;
1954 newvpd->vpd_nusd = vpc->vpd_nusd;
1955 newvpd->vpd_noff = vpc->vpd_noff;
1957 /* insert this page into vpl list */
1958 vc_vpinsert (vpl, newvpd);
1963 vc_vpinsert (VPageList vpl, VPageDescr vpnew)
1966 /* allocate a VPageDescr entry if needed */
1967 if ( vpl->vpl_npages == 0 )
1968 vpl->vpl_pgdesc = (VPageDescr*) palloc(100*sizeof(VPageDescr));
1969 else if ( vpl->vpl_npages % 100 == 0 )
1970 vpl->vpl_pgdesc = (VPageDescr*) repalloc(vpl->vpl_pgdesc, (vpl->vpl_npages+100)*sizeof(VPageDescr));
1971 vpl->vpl_pgdesc[vpl->vpl_npages] = vpnew;
1972 (vpl->vpl_npages)++;
1977 vc_free(VRelList vrl)
1981 PortalVariableMemory pmem;
1983 pmem = PortalGetVariableMemory(vc_portal);
1984 old = MemoryContextSwitchTo((MemoryContext)pmem);
1986 while (vrl != (VRelList) NULL) {
1988 /* free rel list entry */
1990 vrl = vrl->vrl_next;
1994 (void) MemoryContextSwitchTo(old);
1998 * vc_getarchrel() -- open the archive relation for a heap relation
2000 * The archive relation is named 'a,XXXXX' for the heap relation
2001 * whose relid is XXXXX.
2004 #define ARCHIVE_PREFIX "a,"
2007 vc_getarchrel(Relation heaprel)
2012 archrelname = palloc(sizeof(ARCHIVE_PREFIX) + NAMEDATALEN); /* bogus */
2013 sprintf(archrelname, "%s%d", ARCHIVE_PREFIX, heaprel->rd_id);
2015 archrel = heap_openr(archrelname);
2022 * vc_archive() -- write a tuple to an archive relation
2024 * In the future, this will invoke the archived accessd method. For
2025 * now, archive relations are on mag disk.
2028 vc_archive(Relation archrel, HeapTuple htup)
2030 doinsert(archrel, htup);
2034 vc_isarchrel(char *rname)
2036 if (strncmp(ARCHIVE_PREFIX, rname,strlen(ARCHIVE_PREFIX)) == 0)
2043 vc_find_eq (char *bot, int nelem, int size, char *elm, int (*compar)(char *, char *))
2046 int last = nelem - 1;
2047 int celm = nelem / 2;
2048 bool last_move, first_move;
2050 last_move = first_move = true;
2053 if ( first_move == true )
2055 res = compar (bot, elm);
2062 if ( last_move == true )
2064 res = compar (elm, bot + last*size);
2068 return (bot + last*size);
2071 res = compar (elm, bot + celm*size);
2073 return (bot + celm*size);
2087 last = last - celm - 1;
2088 bot = bot + (celm+1)*size;
2089 celm = (last + 1) / 2;
2096 vc_cmp_blk (char *left, char *right)
2098 BlockNumber lblk, rblk;
2100 lblk = (*((VPageDescr*)left))->vpd_blkno;
2101 rblk = (*((VPageDescr*)right))->vpd_blkno;
2112 vc_cmp_offno (char *left, char *right)
2115 if ( *(OffsetNumber*)left < *(OffsetNumber*)right )
2117 if ( *(OffsetNumber*)left == *(OffsetNumber*)right )
2121 } /* vc_cmp_offno */
2125 vc_getindices (Oid relid, int *nindices, Relation **Irel)
2131 HeapScanDesc pgiscan;
2140 ioid = (Oid *) palloc(10*sizeof(Oid));
2142 /* prepare a heap scan on the pg_index relation */
2143 pgindex = heap_openr(IndexRelationName);
2144 pgidesc = RelationGetTupleDescriptor(pgindex);
2146 ScanKeyEntryInitialize(&pgikey, 0x0, Anum_pg_index_indrelid,
2147 ObjectIdEqualRegProcedure,
2148 ObjectIdGetDatum(relid));
2150 pgiscan = heap_beginscan(pgindex, false, NowTimeQual, 1, &pgikey);
2152 while (HeapTupleIsValid(pgitup = heap_getnext(pgiscan, 0, NULL))) {
2153 d = (Datum) heap_getattr(pgitup, InvalidBuffer, Anum_pg_index_indexrelid,
2157 ioid = (Oid *) repalloc(ioid, (i+10)*sizeof(Oid));
2158 ioid[i-1] = DatumGetObjectId(d);
2161 heap_endscan(pgiscan);
2162 heap_close(pgindex);
2164 if ( i == 0 ) { /* No one index found */
2169 if ( Irel != (Relation **) NULL )
2170 *Irel = (Relation *) palloc(i * sizeof(Relation));
2172 for (k = 0; i > 0; )
2174 irel = index_open(ioid[--i]);
2175 if ( irel != (Relation) NULL )
2177 if ( Irel != (Relation **) NULL )
2184 elog (NOTICE, "CAN't OPEN INDEX %u - SKIP IT", ioid[i]);
2189 if ( Irel != (Relation **) NULL && *nindices == 0 )
2192 *Irel = (Relation *) NULL;
2195 } /* vc_getindices */
2199 vc_clsindices (int nindices, Relation *Irel)
2202 if ( Irel == (Relation*) NULL )
2205 while (nindices--) {
2206 index_close (Irel[nindices]);
2210 } /* vc_clsindices */
2214 vc_mkindesc (Relation onerel, int nindices, Relation *Irel, IndDesc **Idesc)
2217 HeapTuple pgIndexTup;
2218 AttrNumber *attnumP;
2222 *Idesc = (IndDesc *) palloc (nindices * sizeof (IndDesc));
2224 for (i = 0, idcur = *Idesc; i < nindices; i++, idcur++) {
2226 SearchSysCacheTuple(INDEXRELID,
2227 ObjectIdGetDatum(Irel[i]->rd_id),
2230 idcur->tform = (IndexTupleForm)GETSTRUCT(pgIndexTup);
2231 for (attnumP = &(idcur->tform->indkey[0]), natts = 0;
2232 *attnumP != InvalidAttrNumber && natts != INDEX_MAX_KEYS;
2233 attnumP++, natts++);
2234 if (idcur->tform->indproc != InvalidOid) {
2235 idcur->finfoP = &(idcur->finfo);
2236 FIgetnArgs(idcur->finfoP) = natts;
2238 FIgetProcOid(idcur->finfoP) = idcur->tform->indproc;
2239 *(FIgetname(idcur->finfoP)) = '\0';
2241 idcur->finfoP = (FuncIndexInfo *) NULL;
2243 idcur->natts = natts;
2250 vc_enough_space (VPageDescr vpd, Size len)
2253 len = DOUBLEALIGN(len);
2255 if ( len > vpd->vpd_free )
2258 if ( vpd->vpd_nusd < vpd->vpd_noff ) /* there are free itemid(s) */
2259 return (true); /* and len <= free_space */
2261 /* ok. noff_usd >= noff_free and so we'll have to allocate new itemid */
2262 if ( len <= vpd->vpd_free - sizeof (ItemIdData) )
2267 } /* vc_enough_space */