1 /*-------------------------------------------------------------------------
4 * the postgres vacuum cleaner
6 * Copyright (c) 1994, Regents of the University of California
10 * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.20 1997/02/18 04:13:57 momjian Exp $
12 *-------------------------------------------------------------------------
16 #include <sys/types.h>
24 #include <utils/portal.h>
25 #include <access/genam.h>
26 #include <access/heapam.h>
27 #include <access/xact.h>
28 #include <storage/bufmgr.h>
29 #include <access/transam.h>
30 #include <catalog/pg_index.h>
31 #include <catalog/index.h>
32 #include <catalog/catname.h>
33 #include <catalog/catalog.h>
34 #include <catalog/pg_class.h>
35 #include <catalog/pg_proc.h>
36 #include <catalog/pg_statistic.h>
37 #include <catalog/pg_type.h>
38 #include <catalog/pg_operator.h>
39 #include <storage/smgr.h>
40 #include <storage/lmgr.h>
41 #include <utils/inval.h>
42 #include <utils/mcxt.h>
43 #include <utils/inval.h>
44 #include <utils/syscache.h>
45 #include <commands/vacuum.h>
46 #include <parser/catalog_utils.h>
47 #include <storage/bufpage.h>
48 #include "storage/shmem.h"
49 #ifndef HAVE_GETRUSAGE
50 # include <rusagestub.h>
52 # include <sys/time.h>
53 # include <sys/resource.h>
56 bool VacuumRunning = false;
57 static int MESSAGE_LEVEL; /* message level */
59 #define swapLong(a,b) {long tmp; tmp=a; a=b; b=tmp;}
60 #define swapInt(a,b) {int tmp; tmp=a; a=b; b=tmp;}
61 #define swapDatum(a,b) {Datum tmp; tmp=a; a=b; b=tmp;}
62 #define VacAttrStatsEqValid(stats) ( RegProcedureIsValid(stats->cmpeq))
63 #define VacAttrStatsLtGtValid(stats) ( RegProcedureIsValid(stats->cmplt) && \
64 RegProcedureIsValid(stats->cmpgt) && \
65 RegProcedureIsValid(stats->outfunc) )
68 /* non-export function prototypes */
69 static void vc_init(void);
70 static void vc_shutdown(void);
71 static void vc_vacuum(NameData *VacRelP);
72 static VRelList vc_getrels(Portal p, NameData *VacRelP);
73 static void vc_vacone (Oid relid);
74 static void vc_scanheap (VRelStats *vacrelstats, Relation onerel, VPageList Vvpl, VPageList Fvpl);
75 static void vc_rpfheap (VRelStats *vacrelstats, Relation onerel, VPageList Vvpl, VPageList Fvpl, int nindices, Relation *Irel);
76 static void vc_vacheap (VRelStats *vacrelstats, Relation onerel, VPageList vpl);
77 static void vc_vacpage (Page page, VPageDescr vpd, Relation archrel);
78 static void vc_vaconeind (VPageList vpl, Relation indrel, int nhtups);
79 static void vc_scanoneind (Relation indrel, int nhtups);
80 static void vc_attrstats(Relation onerel, VacAttrStats *vacattrstats, HeapTuple htup);
81 static void vc_bucketcpy(AttributeTupleForm attr, Datum value, Datum *bucket, int16 *bucket_len);
82 static void vc_updstats(Oid relid, int npages, int ntups, bool hasindex, VacAttrStats *vacattrstats);
83 static void vc_delhilowstats(Oid relid);
84 static void vc_setpagelock(Relation rel, BlockNumber blkno);
85 static VPageDescr vc_tidreapped (ItemPointer itemptr, VPageList vpl);
86 static void vc_reappage (VPageList vpl, VPageDescr vpc);
87 static void vc_vpinsert (VPageList vpl, VPageDescr vpnew);
88 static void vc_free(Portal p, VRelList vrl);
89 static void vc_getindices (Oid relid, int *nindices, Relation **Irel);
90 static void vc_clsindices (int nindices, Relation *Irel);
91 static Relation vc_getarchrel(Relation heaprel);
92 static void vc_archive(Relation archrel, HeapTuple htup);
93 static bool vc_isarchrel(char *rname);
94 static void vc_mkindesc (Relation onerel, int nindices, Relation *Irel, IndDesc **Idesc);
95 static char * vc_find_eq (char *bot, int nelem, int size, char *elm, int (*compar)(char *, char *));
96 static int vc_cmp_blk (char *left, char *right);
97 static int vc_cmp_offno (char *left, char *right);
98 static bool vc_enough_space (VPageDescr vpd, Size len);
101 vacuum(char *vacrel, bool verbose)
106 MESSAGE_LEVEL = NOTICE;
108 MESSAGE_LEVEL = DEBUG;
110 /* vacrel gets de-allocated on transaction commit */
112 /* initialize vacuum cleaner */
115 /* vacuum the database */
118 strcpy(VacRel.data,vacrel);
129 * vc_init(), vc_shutdown() -- start up and shut down the vacuum cleaner.
131 * We run exactly one vacuum cleaner at a time. We use the file system
132 * to guarantee an exclusive lock on vacuuming, since a single vacuum
133 * cleaner instantiation crosses transaction boundaries, and we'd lose
134 * postgres-style locks at the end of every transaction.
136 * The strangeness with committing and starting transactions in the
137 * init and shutdown routines is due to the fact that the vacuum cleaner
138 * is invoked via a sql command, and so is already executing inside
139 * a transaction. We need to leave ourselves in a predictable state
140 * on entry and exit to the vacuum cleaner. We commit the transaction
141 * started in PostgresMain() inside vc_init(), and start one in
142 * vc_shutdown() to match the commit waiting for us back in
150 if ((fd = open("pg_vlock", O_CREAT|O_EXCL, 0600)) < 0)
151 elog(WARN, "can't create lock file -- another vacuum cleaner running?");
156 * By here, exclusive open on the lock file succeeded. If we abort
157 * for any reason during vacuuming, we need to remove the lock file.
158 * This global variable is checked in the transaction manager on xact
159 * abort, and the routine vc_abort() is called if necessary.
162 VacuumRunning = true;
164 /* matches the StartTransaction in PostgresMain() */
165 CommitTransactionCommand();
171 /* on entry, not in a transaction */
172 if (unlink("pg_vlock") < 0)
173 elog(WARN, "vacuum: can't destroy lock file!");
175 /* okay, we're done */
176 VacuumRunning = false;
178 /* matches the CommitTransaction in PostgresMain() */
179 StartTransactionCommand();
186 /* on abort, remove the vacuum cleaner lock file */
187 (void) unlink("pg_vlock");
189 VacuumRunning = false;
193 * vc_vacuum() -- vacuum the database.
195 * This routine builds a list of relations to vacuum, and then calls
196 * code that vacuums them one at a time. We are careful to vacuum each
197 * relation in a separate transaction in order to avoid holding too many
201 vc_vacuum(NameData *VacRelP)
208 * Create a portal for safe memory across transctions. We need to
209 * palloc the name space for it because our hash function expects
210 * the name to be on a longword boundary. CreatePortal copies the
211 * name to safe storage for us.
214 pname = (char *) palloc(strlen(VACPNAME) + 1);
215 strcpy(pname, VACPNAME);
216 p = CreatePortal(pname);
219 /* get list of relations */
220 vrl = vc_getrels(p, VacRelP);
223 vc_delhilowstats(vrl->vrl_relid);
225 vc_delhilowstats(InvalidOid);
227 /* vacuum each heap relation */
228 for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
229 vc_vacone (cur->vrl_relid);
237 vc_getrels(Portal p, NameData *VacRelP)
241 HeapScanDesc pgcscan;
244 PortalVariableMemory portalmem;
255 StartTransactionCommand();
258 ScanKeyEntryInitialize(&pgckey, 0x0, Anum_pg_class_relname,
259 NameEqualRegProcedure,
260 PointerGetDatum(VacRelP->data));
262 ScanKeyEntryInitialize(&pgckey, 0x0, Anum_pg_class_relkind,
263 CharacterEqualRegProcedure, CharGetDatum('r'));
266 portalmem = PortalGetVariableMemory(p);
267 vrl = cur = (VRelList) NULL;
269 pgclass = heap_openr(RelationRelationName);
270 pgcdesc = RelationGetTupleDescriptor(pgclass);
272 pgcscan = heap_beginscan(pgclass, false, NowTimeQual, 1, &pgckey);
274 while (HeapTupleIsValid(pgctup = heap_getnext(pgcscan, 0, &buf))) {
279 * We have to be careful not to vacuum the archive (since it
280 * already contains vacuumed tuples), and not to vacuum
281 * relations on write-once storage managers like the Sony
282 * jukebox at Berkeley.
285 d = (Datum) heap_getattr(pgctup, buf, Anum_pg_class_relname,
289 /* skip archive relations */
290 if (vc_isarchrel(rname)) {
295 /* don't vacuum large objects for now - something breaks when we do */
296 if ( (strlen(rname) > 4) && rname[0] == 'X' &&
297 rname[1] == 'i' && rname[2] == 'n' &&
298 (rname[3] == 'v' || rname[3] == 'x'))
300 elog (NOTICE, "Rel %.*s: can't vacuum LargeObjects now",
306 d = (Datum) heap_getattr(pgctup, buf, Anum_pg_class_relsmgr,
308 smgrno = DatumGetInt16(d);
310 /* skip write-once storage managers */
311 if (smgriswo(smgrno)) {
316 d = (Datum) heap_getattr(pgctup, buf, Anum_pg_class_relkind,
319 rkind = DatumGetChar(d);
321 /* skip system relations */
324 elog(NOTICE, "Vacuum: can not process index and certain system tables" );
328 /* get a relation list entry for this guy */
329 old = MemoryContextSwitchTo((MemoryContext)portalmem);
330 if (vrl == (VRelList) NULL) {
331 vrl = cur = (VRelList) palloc(sizeof(VRelListData));
333 cur->vrl_next = (VRelList) palloc(sizeof(VRelListData));
336 (void) MemoryContextSwitchTo(old);
338 cur->vrl_relid = pgctup->t_oid;
339 cur->vrl_next = (VRelList) NULL;
341 /* wei hates it if you forget to do this */
345 elog(NOTICE, "Vacuum: table not found" );
348 heap_endscan(pgcscan);
351 CommitTransactionCommand();
357 * vc_vacone() -- vacuum one heap relation
359 * This routine vacuums a single heap, cleans out its indices, and
360 * updates its statistics npages and ntups statistics.
362 * Doing one heap at a time incurs extra overhead, since we need to
363 * check that the heap exists again just before we vacuum it. The
364 * reason that we do this is so that vacuuming can be spread across
365 * many small transactions. Otherwise, two-phase locking would require
366 * us to lock the entire database during one pass of the vacuum cleaner.
369 vc_vacone (Oid relid)
373 HeapTuple pgctup, pgttup;
375 HeapScanDesc pgcscan;
378 VPageListData Vvpl; /* List of pages to vacuum and/or clean indices */
379 VPageListData Fvpl; /* List of pages with space enough for re-using */
382 int32 nindices, i, attr_cnt;
383 AttributeTupleForm *attr;
384 VRelStats *vacrelstats;
386 StartTransactionCommand();
388 ScanKeyEntryInitialize(&pgckey, 0x0, ObjectIdAttributeNumber,
389 ObjectIdEqualRegProcedure,
390 ObjectIdGetDatum(relid));
392 pgclass = heap_openr(RelationRelationName);
393 pgcdesc = RelationGetTupleDescriptor(pgclass);
394 pgcscan = heap_beginscan(pgclass, false, NowTimeQual, 1, &pgckey);
397 * Race condition -- if the pg_class tuple has gone away since the
398 * last time we saw it, we don't need to vacuum it.
401 if (!HeapTupleIsValid(pgctup = heap_getnext(pgcscan, 0, &pgcbuf))) {
402 heap_endscan(pgcscan);
404 CommitTransactionCommand();
408 /* now open the class and vacuum it */
409 onerel = heap_open(relid);
411 attr_cnt = onerel->rd_att->natts;
412 attr = onerel->rd_att->attrs;
414 vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
415 vacrelstats->relid = relid;
416 vacrelstats->npages = vacrelstats->ntups = 0;
417 vacrelstats->hasindex = false;
418 vacrelstats->vacattrstats =
419 (VacAttrStats *) palloc(attr_cnt * sizeof(VacAttrStats));
421 for (i = 0; i < attr_cnt; i++) {
422 Operator func_operator;
423 OperatorTupleForm pgopform;
424 VacAttrStats *stats = &vacrelstats->vacattrstats[i];
426 stats->attr = palloc(ATTRIBUTE_TUPLE_SIZE);
427 memmove(stats->attr,attr[i],ATTRIBUTE_TUPLE_SIZE);
428 stats->best = stats->guess1 = stats->guess2 = 0;
429 stats->max = stats->min = 0;
430 stats->best_len = stats->guess1_len = stats->guess2_len = 0;
431 stats->max_len = stats->min_len = 0;
432 stats->initialized = false;
433 stats->best_cnt = stats->guess1_cnt = stats->guess1_hits = stats->guess2_hits = 0;
434 stats->max_cnt = stats->min_cnt = stats->null_cnt = stats->nonnull_cnt = 0;
436 func_operator = oper("=",stats->attr->atttypid,stats->attr->atttypid,true);
437 if (func_operator != NULL) {
438 pgopform = (OperatorTupleForm) GETSTRUCT(func_operator);
439 stats->cmpeq = pgopform->oprcode;
441 else stats->cmpeq = InvalidOid;
442 func_operator = oper("<",stats->attr->atttypid,stats->attr->atttypid,true);
443 if (func_operator != NULL) {
444 pgopform = (OperatorTupleForm) GETSTRUCT(func_operator);
445 stats->cmplt = pgopform->oprcode;
447 else stats->cmplt = InvalidOid;
448 func_operator = oper(">",stats->attr->atttypid,stats->attr->atttypid,true);
449 if (func_operator != NULL) {
450 pgopform = (OperatorTupleForm) GETSTRUCT(func_operator);
451 stats->cmpgt = pgopform->oprcode;
453 else stats->cmpgt = InvalidOid;
454 func_operator = oper(">",stats->attr->atttypid,stats->attr->atttypid,true);
455 if (func_operator != NULL) {
456 pgopform = (OperatorTupleForm) GETSTRUCT(func_operator);
457 stats->cmpgt = pgopform->oprcode;
459 else stats->cmpgt = InvalidOid;
460 pgttup = SearchSysCacheTuple(TYPOID,
461 ObjectIdGetDatum(stats->attr->atttypid),
463 if (HeapTupleIsValid(pgttup))
464 stats->outfunc = ((TypeTupleForm) GETSTRUCT(pgttup))->typoutput;
465 else stats->outfunc = InvalidOid;
468 /* we require the relation to be locked until the indices are cleaned */
469 RelationSetLockForWrite(onerel);
472 Vvpl.vpl_npages = Fvpl.vpl_npages = 0;
473 vc_scanheap(vacrelstats, onerel, &Vvpl, &Fvpl);
475 /* Now open indices */
476 Irel = (Relation *) NULL;
477 vc_getindices(vacrelstats->relid, &nindices, &Irel);
480 vacrelstats->hasindex = true;
482 vacrelstats->hasindex = false;
484 /* Clean/scan index relation(s) */
485 if ( Irel != (Relation*) NULL )
487 if ( Vvpl.vpl_npages > 0 )
489 for (i = 0; i < nindices; i++)
490 vc_vaconeind (&Vvpl, Irel[i], vacrelstats->ntups);
492 else /* just scan indices to update statistic */
494 for (i = 0; i < nindices; i++)
495 vc_scanoneind (Irel[i], vacrelstats->ntups);
499 if ( Fvpl.vpl_npages > 0 ) /* Try to shrink heap */
500 vc_rpfheap (vacrelstats, onerel, &Vvpl, &Fvpl, nindices, Irel);
503 if ( Irel != (Relation*) NULL )
504 vc_clsindices (nindices, Irel);
505 if ( Vvpl.vpl_npages > 0 ) /* Clean pages from Vvpl list */
506 vc_vacheap (vacrelstats, onerel, &Vvpl);
509 /* ok - free Vvpl list of reapped pages */
510 if ( Vvpl.vpl_npages > 0 )
512 vpp = Vvpl.vpl_pgdesc;
513 for (i = 0; i < Vvpl.vpl_npages; i++, vpp++)
515 pfree (Vvpl.vpl_pgdesc);
516 if ( Fvpl.vpl_npages > 0 )
517 pfree (Fvpl.vpl_pgdesc);
520 /* all done with this class */
522 heap_endscan(pgcscan);
525 /* update statistics in pg_class */
526 vc_updstats(vacrelstats->relid, vacrelstats->npages, vacrelstats->ntups,
527 vacrelstats->hasindex, vacrelstats->vacattrstats);
529 /* next command frees attribute stats */
531 CommitTransactionCommand();
535 * vc_scanheap() -- scan an open heap relation
537 * This routine sets commit times, constructs Vvpl list of
538 * empty/uninitialized pages and pages with dead tuples and
539 * ~LP_USED line pointers, constructs Fvpl list of pages
540 * appropriate for purposes of shrinking and maintains statistics
541 * on the number of live tuples in a heap.
544 vc_scanheap (VRelStats *vacrelstats, Relation onerel,
545 VPageList Vvpl, VPageList Fvpl)
552 Page page, tempPage = NULL;
553 OffsetNumber offnum, maxoff;
554 bool pgchanged, tupgone, dobufrel, notup;
555 AbsoluteTime purgetime, expiretime;
556 RelativeTime preservetime;
559 uint32 nvac, ntups, nunused, ncrash, nempg, nnepg, nchpg, nemend;
561 Size min_tlen = MAXTUPLEN;
563 int32 i/*, attr_cnt*/;
564 struct rusage ru0, ru1;
566 getrusage(RUSAGE_SELF, &ru0);
568 nvac = ntups = nunused = ncrash = nempg = nnepg = nchpg = nemend = 0;
571 relname = (RelationGetRelationName(onerel))->data;
573 nblocks = RelationGetNumberOfBlocks(onerel);
575 /* calculate the purge time: tuples that expired before this time
576 will be archived or deleted */
577 purgetime = GetCurrentTransactionStartTime();
578 expiretime = (AbsoluteTime)onerel->rd_rel->relexpires;
579 preservetime = (RelativeTime)onerel->rd_rel->relpreserved;
581 if (RelativeTimeIsValid(preservetime) && (preservetime)) {
582 purgetime -= preservetime;
583 if (AbsoluteTimeIsBackwardCompatiblyValid(expiretime) &&
584 expiretime > purgetime)
585 purgetime = expiretime;
588 else if (AbsoluteTimeIsBackwardCompatiblyValid(expiretime))
589 purgetime = expiretime;
591 vpc = (VPageDescr) palloc (sizeof(VPageDescrData) + MaxOffsetNumber*sizeof(OffsetNumber));
594 for (blkno = 0; blkno < nblocks; blkno++) {
595 buf = ReadBuffer(onerel, blkno);
596 page = BufferGetPage(buf);
597 vpc->vpd_blkno = blkno;
600 if (PageIsNew(page)) {
601 elog (NOTICE, "Rel %.*s: Uninitialized page %u - fixing",
602 NAMEDATALEN, relname, blkno);
603 PageInit (page, BufferGetPageSize (buf), 0);
604 vpc->vpd_free = ((PageHeader)page)->pd_upper - ((PageHeader)page)->pd_lower;
605 frsize += (vpc->vpd_free - sizeof (ItemIdData));
608 vc_reappage (Vvpl, vpc);
613 if (PageIsEmpty(page)) {
614 vpc->vpd_free = ((PageHeader)page)->pd_upper - ((PageHeader)page)->pd_lower;
615 frsize += (vpc->vpd_free - sizeof (ItemIdData));
618 vc_reappage (Vvpl, vpc);
625 maxoff = PageGetMaxOffsetNumber(page);
626 for (offnum = FirstOffsetNumber;
628 offnum = OffsetNumberNext(offnum)) {
629 itemid = PageGetItemId(page, offnum);
632 * Collect un-used items too - it's possible to have
633 * indices pointing here after crash.
635 if (!ItemIdIsUsed(itemid)) {
636 vpc->vpd_voff[vpc->vpd_noff++] = offnum;
641 htup = (HeapTuple) PageGetItem(page, itemid);
644 if (!AbsoluteTimeIsBackwardCompatiblyValid(htup->t_tmin) &&
645 TransactionIdIsValid((TransactionId)htup->t_xmin)) {
647 if (TransactionIdDidAbort(htup->t_xmin)) {
649 } else if (TransactionIdDidCommit(htup->t_xmin)) {
650 htup->t_tmin = TransactionIdGetCommitTime(htup->t_xmin);
652 } else if ( !TransactionIdIsInProgress (htup->t_xmin) ) {
654 * Not Aborted, Not Committed, Not in Progress -
655 * so it from crashed process. - vadim 11/26/96
661 elog (MESSAGE_LEVEL, "Rel %.*s: InsertTransactionInProgress %u for TID %u/%u",
662 NAMEDATALEN, relname, htup->t_xmin, blkno, offnum);
666 if (TransactionIdIsValid((TransactionId)htup->t_xmax)) {
667 if (TransactionIdDidAbort(htup->t_xmax)) {
668 StoreInvalidTransactionId(&(htup->t_xmax));
670 } else if (TransactionIdDidCommit(htup->t_xmax)) {
671 if (!AbsoluteTimeIsBackwardCompatiblyReal(htup->t_tmax)) {
673 htup->t_tmax = TransactionIdGetCommitTime(htup->t_xmax);
678 * Reap the dead tuple if its expiration time is
682 if (htup->t_tmax < purgetime) {
689 * Is it possible at all ? - vadim 11/26/96
691 if ( !TransactionIdIsValid((TransactionId)htup->t_xmin) )
693 elog (NOTICE, "TID %u/%u: INSERT_TRANSACTION_ID IS INVALID. \
694 DELETE_TRANSACTION_ID_VALID %d, TUPGONE %d.",
695 TransactionIdIsValid((TransactionId)htup->t_xmax),
700 * It's possibly! But from where it comes ?
701 * And should we fix it ? - vadim 11/28/96
703 itemptr = &(htup->t_ctid);
704 if ( !ItemPointerIsValid (itemptr) ||
705 BlockIdGetBlockNumber(&(itemptr->ip_blkid)) != blkno )
707 elog (NOTICE, "ITEM POINTER IS INVALID: %u/%u FOR %u/%u. TUPGONE %d.",
708 BlockIdGetBlockNumber(&(itemptr->ip_blkid)),
709 itemptr->ip_posid, blkno, offnum, tupgone);
715 if ( htup->t_len != itemid->lp_len )
717 elog (NOTICE, "PAGEHEADER' LEN %u IS NOT THE SAME AS HTUP' %u FOR %u/%u.TUPGONE %d.",
718 itemid->lp_len, htup->t_len, blkno, offnum, tupgone);
720 if ( !OidIsValid(htup->t_oid) )
722 elog (NOTICE, "OID IS INVALID FOR %u/%u.TUPGONE %d.",
723 blkno, offnum, tupgone);
729 if ( tempPage == (Page) NULL )
733 pageSize = PageGetPageSize(page);
734 tempPage = (Page) palloc(pageSize);
735 memmove (tempPage, page, pageSize);
738 lpp = &(((PageHeader) tempPage)->pd_linp[offnum - 1]);
741 lpp->lp_flags &= ~LP_USED;
743 vpc->vpd_voff[vpc->vpd_noff++] = offnum;
749 if ( htup->t_len < min_tlen )
750 min_tlen = htup->t_len;
751 if ( htup->t_len > max_tlen )
752 max_tlen = htup->t_len;
753 vc_attrstats(onerel, vacrelstats->vacattrstats, htup);
764 if ( tempPage != (Page) NULL )
765 { /* Some tuples are gone */
766 PageRepairFragmentation(tempPage);
767 vpc->vpd_free = ((PageHeader)tempPage)->pd_upper - ((PageHeader)tempPage)->pd_lower;
768 frsize += vpc->vpd_free;
769 vc_reappage (Vvpl, vpc);
771 tempPage = (Page) NULL;
773 else if ( vpc->vpd_noff > 0 )
774 { /* there are only ~LP_USED line pointers */
775 vpc->vpd_free = ((PageHeader)page)->pd_upper - ((PageHeader)page)->pd_lower;
776 frsize += vpc->vpd_free;
777 vc_reappage (Vvpl, vpc);
789 /* save stats in the rel list for use later */
790 vacrelstats->ntups = ntups;
791 vacrelstats->npages = nblocks;
792 /* vacrelstats->natts = attr_cnt;*/
794 min_tlen = max_tlen = 0;
795 vacrelstats->min_tlen = min_tlen;
796 vacrelstats->max_tlen = max_tlen;
798 Vvpl->vpl_nemend = nemend;
799 Fvpl->vpl_nemend = nemend;
802 * Try to make Fvpl keeping in mind that we can't use free space
803 * of "empty" end-pages and last page if it reapped.
805 if ( Vvpl->vpl_npages - nemend > 0 )
807 int nusf; /* blocks usefull for re-using */
809 nusf = Vvpl->vpl_npages - nemend;
810 if ( (Vvpl->vpl_pgdesc[nusf-1])->vpd_blkno == nblocks - nemend - 1 )
813 for (i = 0; i < nusf; i++)
815 vp = Vvpl->vpl_pgdesc[i];
816 if ( vc_enough_space (vp, min_tlen) )
818 vc_vpinsert (Fvpl, vp);
819 frsusf += vp->vpd_free;
824 getrusage(RUSAGE_SELF, &ru1);
826 elog (MESSAGE_LEVEL, "Rel %.*s: Pages %u: Changed %u, Reapped %u, Empty %u, New %u; \
827 Tup %u: Vac %u, Crash %u, UnUsed %u, MinLen %u, MaxLen %u; Re-using: Free/Avail. Space %u/%u; EndEmpty/Avail. Pages %u/%u. Elapsed %u/%u sec.",
828 NAMEDATALEN, relname,
829 nblocks, nchpg, Vvpl->vpl_npages, nempg, nnepg,
830 ntups, nvac, ncrash, nunused, min_tlen, max_tlen,
831 frsize, frsusf, nemend, Fvpl->vpl_npages,
832 ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
833 ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
839 * vc_rpfheap() -- try to repaire relation' fragmentation
841 * This routine marks dead tuples as unused and tries re-use dead space
842 * by moving tuples (and inserting indices if needed). It constructs
843 * Nvpl list of free-ed pages (moved tuples) and clean indices
844 * for them after committing (in hack-manner - without losing locks
845 * and freeing memory!) current transaction. It truncates relation
846 * if some end-blocks are gone away.
849 vc_rpfheap (VRelStats *vacrelstats, Relation onerel,
850 VPageList Vvpl, VPageList Fvpl, int nindices, Relation *Irel)
854 AbsoluteTime myCTM = 0;
857 Page page, ToPage = NULL;
858 OffsetNumber offnum = 0, maxoff = 0, newoff, moff;
859 ItemId itemid, newitemid;
860 HeapTuple htup, newtup;
861 TupleDesc tupdesc = NULL;
862 Datum *idatum = NULL;
864 InsertIndexResult iresult;
866 VPageDescr ToVpd = NULL, Fvplast, Vvplast, vpc, *vpp;
868 IndDesc *Idesc, *idcur;
869 int Fblklast, Vblklast, i;
871 int nmoved, Fnpages, Vnpages;
873 bool isempty, dowrite;
875 struct rusage ru0, ru1;
877 getrusage(RUSAGE_SELF, &ru0);
879 myXID = GetCurrentTransactionId();
880 myCID = GetCurrentCommandId();
882 if ( Irel != (Relation*) NULL ) /* preparation for index' inserts */
884 vc_mkindesc (onerel, nindices, Irel, &Idesc);
885 tupdesc = RelationGetTupleDescriptor(onerel);
886 idatum = (Datum *) palloc(INDEX_MAX_KEYS * sizeof (*idatum));
887 inulls = (char *) palloc(INDEX_MAX_KEYS * sizeof (*inulls));
890 /* if the relation has an archive, open it */
891 if (onerel->rd_rel->relarch != 'n')
893 archrel = vc_getarchrel(onerel);
894 /* Archive tuples from "empty" end-pages */
895 for ( vpp = Vvpl->vpl_pgdesc + Vvpl->vpl_npages - 1,
896 i = Vvpl->vpl_nemend; i > 0; i--, vpp-- )
898 if ( (*vpp)->vpd_noff > 0 )
900 buf = ReadBuffer(onerel, (*vpp)->vpd_blkno);
901 page = BufferGetPage(buf);
902 Assert ( !PageIsEmpty(page) );
903 vc_vacpage (page, *vpp, archrel);
909 archrel = (Relation) NULL;
912 Fnpages = Fvpl->vpl_npages;
913 Fvplast = Fvpl->vpl_pgdesc[Fnpages - 1];
914 Fblklast = Fvplast->vpd_blkno;
915 Assert ( Vvpl->vpl_npages > Vvpl->vpl_nemend );
916 Vnpages = Vvpl->vpl_npages - Vvpl->vpl_nemend;
917 Vvplast = Vvpl->vpl_pgdesc[Vnpages - 1];
918 Vblklast = Vvplast->vpd_blkno;
919 Assert ( Vblklast >= Fblklast );
920 ToBuf = InvalidBuffer;
923 vpc = (VPageDescr) palloc (sizeof(VPageDescrData) + MaxOffsetNumber*sizeof(OffsetNumber));
924 vpc->vpd_nusd = vpc->vpd_noff = 0;
926 nblocks = vacrelstats->npages;
927 for (blkno = nblocks - Vvpl->vpl_nemend - 1; ; blkno--)
929 /* if it's reapped page and it was used by me - quit */
930 if ( blkno == Fblklast && Fvplast->vpd_nusd > 0 )
933 buf = ReadBuffer(onerel, blkno);
934 page = BufferGetPage(buf);
938 isempty = PageIsEmpty(page);
941 if ( blkno == Vblklast ) /* it's reapped page */
943 if ( Vvplast->vpd_noff > 0 ) /* there are dead tuples */
944 { /* on this page - clean */
945 Assert ( ! isempty );
946 vc_vacpage (page, Vvplast, archrel);
951 Assert ( --Vnpages > 0 );
952 /* get prev reapped page from Vvpl */
953 Vvplast = Vvpl->vpl_pgdesc[Vnpages - 1];
954 Vblklast = Vvplast->vpd_blkno;
955 if ( blkno == Fblklast ) /* this page in Fvpl too */
957 Assert ( --Fnpages > 0 );
958 Assert ( Fvplast->vpd_nusd == 0 );
959 /* get prev reapped page from Fvpl */
960 Fvplast = Fvpl->vpl_pgdesc[Fnpages - 1];
961 Fblklast = Fvplast->vpd_blkno;
963 Assert ( Fblklast <= Vblklast );
972 Assert ( ! isempty );
975 vpc->vpd_blkno = blkno;
976 maxoff = PageGetMaxOffsetNumber(page);
977 for (offnum = FirstOffsetNumber;
979 offnum = OffsetNumberNext(offnum))
981 itemid = PageGetItemId(page, offnum);
983 if (!ItemIdIsUsed(itemid))
986 htup = (HeapTuple) PageGetItem(page, itemid);
989 /* try to find new page for this tuple */
990 if ( ToBuf == InvalidBuffer ||
991 ! vc_enough_space (ToVpd, tlen) )
993 if ( ToBuf != InvalidBuffer )
996 ToBuf = InvalidBuffer;
998 * If no one tuple can't be added to this page -
999 * remove page from Fvpl. - vadim 11/27/96
1001 if ( !vc_enough_space (ToVpd, vacrelstats->min_tlen) )
1003 if ( ToVpd != Fvplast )
1005 Assert ( Fnpages > ToVpI + 1 );
1006 memmove (Fvpl->vpl_pgdesc + ToVpI,
1007 Fvpl->vpl_pgdesc + ToVpI + 1,
1008 sizeof (VPageDescr*) * (Fnpages - ToVpI - 1));
1010 Assert ( Fnpages >= 1 );
1014 /* get prev reapped page from Fvpl */
1015 Fvplast = Fvpl->vpl_pgdesc[Fnpages - 1];
1016 Fblklast = Fvplast->vpd_blkno;
1019 for (i=0; i < Fnpages; i++)
1021 if ( vc_enough_space (Fvpl->vpl_pgdesc[i], tlen) )
1025 break; /* can't move item anywhere */
1027 ToVpd = Fvpl->vpl_pgdesc[ToVpI];
1028 ToBuf = ReadBuffer(onerel, ToVpd->vpd_blkno);
1029 ToPage = BufferGetPage(ToBuf);
1030 /* if this page was not used before - clean it */
1031 if ( ! PageIsEmpty(ToPage) && ToVpd->vpd_nusd == 0 )
1032 vc_vacpage (ToPage, ToVpd, archrel);
1036 newtup = (HeapTuple) palloc (tlen);
1037 memmove((char *) newtup, (char *) htup, tlen);
1039 /* store transaction information */
1040 TransactionIdStore(myXID, &(newtup->t_xmin));
1041 newtup->t_cmin = myCID;
1042 StoreInvalidTransactionId(&(newtup->t_xmax));
1043 newtup->t_tmin = INVALID_ABSTIME;
1044 newtup->t_tmax = CURRENT_ABSTIME;
1045 ItemPointerSetInvalid(&newtup->t_chain);
1047 /* add tuple to the page */
1048 newoff = PageAddItem (ToPage, (Item)newtup, tlen,
1049 InvalidOffsetNumber, LP_USED);
1050 if ( newoff == InvalidOffsetNumber )
1053 failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
1054 tlen, ToVpd->vpd_blkno, ToVpd->vpd_free,
1055 ToVpd->vpd_nusd, ToVpd->vpd_noff);
1057 newitemid = PageGetItemId(ToPage, newoff);
1059 newtup = (HeapTuple) PageGetItem(ToPage, newitemid);
1060 ItemPointerSet(&(newtup->t_ctid), ToVpd->vpd_blkno, newoff);
1062 /* now logically delete end-tuple */
1063 TransactionIdStore(myXID, &(htup->t_xmax));
1064 htup->t_cmax = myCID;
1065 memmove ((char*)&(htup->t_chain), (char*)&(newtup->t_ctid), sizeof (newtup->t_ctid));
1069 ToVpd->vpd_free = ((PageHeader)ToPage)->pd_upper - ((PageHeader)ToPage)->pd_lower;
1070 vpc->vpd_voff[vpc->vpd_noff++] = offnum;
1072 /* insert index' tuples if needed */
1073 if ( Irel != (Relation*) NULL )
1075 for (i = 0, idcur = Idesc; i < nindices; i++, idcur++)
1079 (AttrNumber *)&(idcur->tform->indkey[0]),
1086 iresult = index_insert (
1092 if (iresult) pfree(iresult);
1096 } /* walk along page */
1098 if ( vpc->vpd_noff > 0 ) /* some tuples were moved */
1100 vc_reappage (&Nvpl, vpc);
1108 if ( offnum <= maxoff )
1109 break; /* some item(s) left */
1111 } /* walk along relation */
1113 blkno++; /* new number of blocks */
1115 if ( ToBuf != InvalidBuffer )
1117 Assert (nmoved > 0);
1124 * We have to commit our tuple' movings before we'll truncate
1125 * relation, but we shouldn't lose our locks. And so - quick hack:
1126 * flush buffers and record status of current transaction
1127 * as committed, and continue. - vadim 11/13/96
1129 FlushBufferPool(!TransactionFlushEnabled());
1130 TransactionIdCommit(myXID);
1131 FlushBufferPool(!TransactionFlushEnabled());
1132 myCTM = TransactionIdGetCommitTime(myXID);
1136 * Clean uncleaned reapped pages from Vvpl list
1137 * and set commit' times for inserted tuples
1140 for (i = 0, vpp = Vvpl->vpl_pgdesc; i < Vnpages; i++, vpp++)
1142 Assert ( (*vpp)->vpd_blkno < blkno );
1143 buf = ReadBuffer(onerel, (*vpp)->vpd_blkno);
1144 page = BufferGetPage(buf);
1145 if ( (*vpp)->vpd_nusd == 0 ) /* this page was not used */
1147 /* noff == 0 in empty pages only - such pages should be re-used */
1148 Assert ( (*vpp)->vpd_noff > 0 );
1149 vc_vacpage (page, *vpp, archrel);
1151 else /* this page was used */
1154 moff = PageGetMaxOffsetNumber(page);
1155 for (newoff = FirstOffsetNumber;
1157 newoff = OffsetNumberNext(newoff))
1159 itemid = PageGetItemId(page, newoff);
1160 if (!ItemIdIsUsed(itemid))
1162 htup = (HeapTuple) PageGetItem(page, itemid);
1163 if ( TransactionIdEquals((TransactionId)htup->t_xmin, myXID) )
1165 htup->t_tmin = myCTM;
1169 Assert ( (*vpp)->vpd_nusd == ntups );
1174 Assert ( nmoved == nchkmvd );
1176 getrusage(RUSAGE_SELF, &ru1);
1178 elog (MESSAGE_LEVEL, "Rel %.*s: Pages: %u --> %u; Tuple(s) moved: %u. \
1179 Elapsed %u/%u sec.",
1180 NAMEDATALEN, (RelationGetRelationName(onerel))->data,
1181 nblocks, blkno, nmoved,
1182 ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
1183 ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
1185 if ( Nvpl.vpl_npages > 0 )
1187 /* vacuum indices again if needed */
1188 if ( Irel != (Relation*) NULL )
1190 VPageDescr *vpleft, *vpright, vpsave;
1192 /* re-sort Nvpl.vpl_pgdesc */
1193 for (vpleft = Nvpl.vpl_pgdesc,
1194 vpright = Nvpl.vpl_pgdesc + Nvpl.vpl_npages - 1;
1195 vpleft < vpright; vpleft++, vpright--)
1197 vpsave = *vpleft; *vpleft = *vpright; *vpright = vpsave;
1199 for (i = 0; i < nindices; i++)
1200 vc_vaconeind (&Nvpl, Irel[i], vacrelstats->ntups);
1204 * clean moved tuples from last page in Nvpl list
1205 * if some tuples left there
1207 if ( vpc->vpd_noff > 0 && offnum <= maxoff )
1209 Assert (vpc->vpd_blkno == blkno - 1);
1210 buf = ReadBuffer(onerel, vpc->vpd_blkno);
1211 page = BufferGetPage (buf);
1214 for (offnum = FirstOffsetNumber;
1216 offnum = OffsetNumberNext(offnum))
1218 itemid = PageGetItemId(page, offnum);
1219 if (!ItemIdIsUsed(itemid))
1221 htup = (HeapTuple) PageGetItem(page, itemid);
1222 Assert ( TransactionIdEquals((TransactionId)htup->t_xmax, myXID) );
1223 itemid->lp_flags &= ~LP_USED;
1226 Assert ( vpc->vpd_noff == ntups );
1227 PageRepairFragmentation(page);
1231 /* now - free new list of reapped pages */
1232 vpp = Nvpl.vpl_pgdesc;
1233 for (i = 0; i < Nvpl.vpl_npages; i++, vpp++)
1235 pfree (Nvpl.vpl_pgdesc);
1238 /* truncate relation */
1239 if ( blkno < nblocks )
1241 blkno = smgrtruncate (onerel->rd_rel->relsmgr, onerel, blkno);
1242 Assert ( blkno >= 0 );
1243 vacrelstats->npages = blkno; /* set new number of blocks */
1246 if ( archrel != (Relation) NULL )
1247 heap_close(archrel);
1249 if ( Irel != (Relation*) NULL ) /* pfree index' allocations */
1254 vc_clsindices (nindices, Irel);
1262 * vc_vacheap() -- free dead tuples
1264 * This routine marks dead tuples as unused and truncates relation
1265 * if there are "empty" end-blocks.
1268 vc_vacheap (VRelStats *vacrelstats, Relation onerel, VPageList Vvpl)
1277 nblocks = Vvpl->vpl_npages;
1278 /* if the relation has an archive, open it */
1279 if (onerel->rd_rel->relarch != 'n')
1280 archrel = vc_getarchrel(onerel);
1283 archrel = (Relation) NULL;
1284 nblocks -= Vvpl->vpl_nemend; /* nothing to do with them */
1287 for (i = 0, vpp = Vvpl->vpl_pgdesc; i < nblocks; i++, vpp++)
1289 if ( (*vpp)->vpd_noff > 0 )
1291 buf = ReadBuffer(onerel, (*vpp)->vpd_blkno);
1292 page = BufferGetPage (buf);
1293 vc_vacpage (page, *vpp, archrel);
1298 /* truncate relation if there are some empty end-pages */
1299 if ( Vvpl->vpl_nemend > 0 )
1301 Assert ( vacrelstats->npages >= Vvpl->vpl_nemend );
1302 nblocks = vacrelstats->npages - Vvpl->vpl_nemend;
1303 elog (MESSAGE_LEVEL, "Rel %.*s: Pages: %u --> %u.",
1304 NAMEDATALEN, (RelationGetRelationName(onerel))->data,
1305 vacrelstats->npages, nblocks);
1308 * we have to flush "empty" end-pages (if changed, but who knows it)
1311 FlushBufferPool(!TransactionFlushEnabled());
1313 nblocks = smgrtruncate (onerel->rd_rel->relsmgr, onerel, nblocks);
1314 Assert ( nblocks >= 0 );
1315 vacrelstats->npages = nblocks; /* set new number of blocks */
1318 if ( archrel != (Relation) NULL )
1319 heap_close(archrel);
1324 * vc_vacpage() -- free (and archive if needed) dead tuples on a page
1325 * and repaire its fragmentation.
1328 vc_vacpage (Page page, VPageDescr vpd, Relation archrel)
1334 Assert ( vpd->vpd_nusd == 0 );
1335 for (i=0; i < vpd->vpd_noff; i++)
1337 itemid = &(((PageHeader) page)->pd_linp[vpd->vpd_voff[i] - 1]);
1338 if ( archrel != (Relation) NULL && ItemIdIsUsed(itemid) )
1340 htup = (HeapTuple) PageGetItem (page, itemid);
1341 vc_archive (archrel, htup);
1343 itemid->lp_flags &= ~LP_USED;
1345 PageRepairFragmentation(page);
1350 * _vc_scanoneind() -- scan one index relation to update statistic.
1354 vc_scanoneind (Relation indrel, int nhtups)
1356 RetrieveIndexResult res;
1357 IndexScanDesc iscan;
1360 struct rusage ru0, ru1;
1362 getrusage(RUSAGE_SELF, &ru0);
1364 /* walk through the entire index */
1365 iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL);
1368 while ((res = index_getnext(iscan, ForwardScanDirection))
1369 != (RetrieveIndexResult) NULL)
1375 index_endscan(iscan);
1377 /* now update statistics in pg_class */
1378 nipages = RelationGetNumberOfBlocks(indrel);
1379 vc_updstats(indrel->rd_id, nipages, nitups, false, NULL);
1381 getrusage(RUSAGE_SELF, &ru1);
1383 elog (MESSAGE_LEVEL, "Ind %.*s: Pages %u; Tuples %u. Elapsed %u/%u sec.",
1384 NAMEDATALEN, indrel->rd_rel->relname.data, nipages, nitups,
1385 ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
1386 ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
1388 if ( nitups != nhtups )
1389 elog (NOTICE, "NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u)",
1392 } /* vc_scanoneind */
1395 * vc_vaconeind() -- vacuum one index relation.
1397 * Vpl is the VPageList of the heap we're currently vacuuming.
1398 * It's locked. Indrel is an index relation on the vacuumed heap.
1399 * We don't set locks on the index relation here, since the indexed
1400 * access methods support locking at different granularities.
1401 * We let them handle it.
1403 * Finally, we arrange to update the index relation's statistics in
1407 vc_vaconeind(VPageList vpl, Relation indrel, int nhtups)
1409 RetrieveIndexResult res;
1410 IndexScanDesc iscan;
1411 ItemPointer heapptr;
1416 struct rusage ru0, ru1;
1418 getrusage(RUSAGE_SELF, &ru0);
1420 /* walk through the entire index */
1421 iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL);
1425 while ((res = index_getnext(iscan, ForwardScanDirection))
1426 != (RetrieveIndexResult) NULL) {
1427 heapptr = &res->heap_iptr;
1429 if ( (vp = vc_tidreapped (heapptr, vpl)) != (VPageDescr) NULL)
1432 elog(DEBUG, "<%x,%x> -> <%x,%x>",
1433 ItemPointerGetBlockNumber(&(res->index_iptr)),
1434 ItemPointerGetOffsetNumber(&(res->index_iptr)),
1435 ItemPointerGetBlockNumber(&(res->heap_iptr)),
1436 ItemPointerGetOffsetNumber(&(res->heap_iptr)));
1438 if ( vp->vpd_noff == 0 )
1439 { /* this is EmptyPage !!! */
1440 elog (NOTICE, "Ind %.*s: pointer to EmptyPage (blk %u off %u) - fixing",
1441 NAMEDATALEN, indrel->rd_rel->relname.data,
1442 vp->vpd_blkno, ItemPointerGetOffsetNumber(heapptr));
1445 index_delete(indrel, &res->index_iptr);
1454 index_endscan(iscan);
1456 /* now update statistics in pg_class */
1457 nipages = RelationGetNumberOfBlocks(indrel);
1458 vc_updstats(indrel->rd_id, nipages, nitups, false, NULL);
1460 getrusage(RUSAGE_SELF, &ru1);
1462 elog (MESSAGE_LEVEL, "Ind %.*s: Pages %u; Tuples %u: Deleted %u. Elapsed %u/%u sec.",
1463 NAMEDATALEN, indrel->rd_rel->relname.data, nipages, nitups, nvac,
1464 ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
1465 ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
1467 if ( nitups != nhtups )
1468 elog (NOTICE, "NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u)",
1471 } /* vc_vaconeind */
1474 * vc_tidreapped() -- is a particular tid reapped?
1476 * vpl->VPageDescr_array is sorted in right order.
1479 vc_tidreapped(ItemPointer itemptr, VPageList vpl)
1481 OffsetNumber ioffno;
1483 VPageDescr vp, *vpp;
1486 vpd.vpd_blkno = ItemPointerGetBlockNumber(itemptr);
1487 ioffno = ItemPointerGetOffsetNumber(itemptr);
1490 vpp = (VPageDescr*) vc_find_eq ((char*)(vpl->vpl_pgdesc),
1491 vpl->vpl_npages, sizeof (VPageDescr), (char*)&vp,
1494 if ( vpp == (VPageDescr*) NULL )
1495 return ((VPageDescr)NULL);
1498 /* ok - we are on true page */
1500 if ( vp->vpd_noff == 0 ) { /* this is EmptyPage !!! */
1504 voff = (OffsetNumber*) vc_find_eq ((char*)(vp->vpd_voff),
1505 vp->vpd_noff, sizeof (OffsetNumber), (char*)&ioffno,
1508 if ( voff == (OffsetNumber*) NULL )
1509 return ((VPageDescr)NULL);
1513 } /* vc_tidreapped */
1516 * vc_attrstats() -- compute column statistics used by the optimzer
1518 * We compute the column min, max, null and non-null counts.
1519 * Plus we attempt to find the count of the value that occurs most
1520 * frequently in each column
1521 * These figures are used to compute the selectivity of the column
1523 * We use a three-bucked cache to get the most frequent item
1524 * The 'guess' buckets count hits. A cache miss causes guess1
1525 * to get the most hit 'guess' item in the most recent cycle, and
1526 * the new item goes into guess2. Whenever the total count of hits
1527 * of a 'guess' entry is larger than 'best', 'guess' becomes 'best'.
1529 * This method works perfectly for columns with unique values, and columns
1530 * with only two unique values, plus nulls.
1532 * It becomes less perfect as the number of unique values increases and
1533 * their distribution in the table becomes more random.
1537 vc_attrstats(Relation onerel, VacAttrStats *vacattrstats, HeapTuple htup)
1540 AttributeTupleForm *attr;
1545 attr_cnt = onerel->rd_att->natts;
1546 attr = onerel->rd_att->attrs;
1547 tupDesc = onerel->rd_att;
1549 for (i = 0; i < attr_cnt; i++) {
1550 VacAttrStats *stats = &vacattrstats[i];
1551 bool value_hit = true;
1553 value = (Datum) heap_getattr(htup, InvalidBuffer, i+1, tupDesc, &isnull);
1555 if (!VacAttrStatsEqValid(stats))
1561 stats->nonnull_cnt++;
1562 if (stats->initialized == false) {
1563 vc_bucketcpy(stats->attr, value, &stats->best, &stats->best_len);
1564 /* best_cnt gets incremented later */
1565 vc_bucketcpy(stats->attr, value, &stats->guess1, &stats->guess1_len);
1566 stats->guess1_cnt = stats->guess1_hits = 1;
1567 vc_bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
1568 stats->guess2_hits = 1;
1569 if (VacAttrStatsLtGtValid(stats)) {
1570 vc_bucketcpy(stats->attr, value, &stats->max , &stats->max_len);
1571 vc_bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
1573 stats->initialized = true;
1575 if (VacAttrStatsLtGtValid(stats) && fmgr(stats->cmplt,value,stats->min)) {
1576 vc_bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
1579 if (VacAttrStatsLtGtValid(stats) && fmgr(stats->cmpgt,value,stats->max)) {
1580 vc_bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
1583 if (VacAttrStatsLtGtValid(stats) && fmgr(stats->cmpeq,value,stats->min))
1585 else if (VacAttrStatsLtGtValid(stats) && fmgr(stats->cmpeq,value,stats->max))
1587 if (fmgr(stats->cmpeq,value,stats->best))
1589 else if (fmgr(stats->cmpeq,value,stats->guess1)) {
1590 stats->guess1_cnt++;
1591 stats-> guess1_hits++;
1593 else if (fmgr(stats->cmpeq,value,stats->guess2))
1594 stats->guess2_hits++;
1595 else value_hit = false;
1597 if (stats->guess2_hits > stats->guess1_hits) {
1598 swapDatum(stats->guess1,stats->guess2);
1599 swapInt(stats->guess1_len,stats->guess2_len);
1600 stats->guess1_cnt = stats->guess2_hits;
1601 swapLong(stats->guess1_hits, stats->guess2_hits);
1603 if (stats->guess1_cnt > stats->best_cnt) {
1604 swapDatum(stats->best,stats->guess1);
1605 swapLong(stats->best_cnt,stats->guess1_cnt);
1606 stats->guess1_hits = 1;
1607 stats-> guess2_hits = 1;
1610 vc_bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
1611 stats->guess1_hits = 1;
1612 stats-> guess2_hits = 1;
1620 * vc_bucketcpy() -- update pg_class statistics for one relation
1624 vc_bucketcpy(AttributeTupleForm attr, Datum value, Datum *bucket, int16 *bucket_len)
1626 if (attr->attbyval && attr->attlen != -1)
1629 int len = (attr->attlen != -1 ? attr->attlen : VARSIZE(value));
1631 if (len > *bucket_len)
1633 if (*bucket_len != 0)
1634 pfree(DatumGetPointer(*bucket));
1635 *bucket = PointerGetDatum(palloc(len));
1638 memmove(DatumGetPointer(*bucket), DatumGetPointer(value), len);
1643 * vc_updstats() -- update pg_class statistics for one relation
1645 * This routine works for both index and heap relation entries in
1646 * pg_class. We violate no-overwrite semantics here by storing new
1647 * values for ntups, npages, and hasindex directly in the pg_class
1648 * tuple that's already on the page. The reason for this is that if
1649 * we updated these tuples in the usual way, then every tuple in pg_class
1650 * would be replaced every day. This would make planning and executing
1651 * historical queries very expensive.
1654 vc_updstats(Oid relid, int npages, int ntups, bool hasindex, VacAttrStats *vacattrstats)
1656 Relation rd, ad, sd;
1657 HeapScanDesc rsdesc, asdesc;
1659 HeapTuple rtup, atup, stup;
1661 Form_pg_class pgcform;
1662 ScanKeyData rskey, askey;
1663 AttributeTupleForm attp;
1666 * update number of tuples and number of pages in pg_class
1668 ScanKeyEntryInitialize(&rskey, 0x0, ObjectIdAttributeNumber,
1669 ObjectIdEqualRegProcedure,
1670 ObjectIdGetDatum(relid));
1672 rd = heap_openr(RelationRelationName);
1673 rsdesc = heap_beginscan(rd, false, NowTimeQual, 1, &rskey);
1675 if (!HeapTupleIsValid(rtup = heap_getnext(rsdesc, 0, &rbuf)))
1676 elog(WARN, "pg_class entry for relid %d vanished during vacuuming",
1679 /* overwrite the existing statistics in the tuple */
1680 vc_setpagelock(rd, BufferGetBlockNumber(rbuf));
1681 pgcform = (Form_pg_class) GETSTRUCT(rtup);
1682 pgcform->reltuples = ntups;
1683 pgcform->relpages = npages;
1684 pgcform->relhasindex = hasindex;
1686 if (vacattrstats != NULL)
1688 ad = heap_openr(AttributeRelationName);
1689 sd = heap_openr(StatisticRelationName);
1690 ScanKeyEntryInitialize(&askey, 0, Anum_pg_attribute_attrelid,
1693 asdesc = heap_beginscan(ad, false, NowTimeQual, 1, &askey);
1695 while (HeapTupleIsValid(atup = heap_getnext(asdesc, 0, &abuf))) {
1697 double selratio; /* average ratio of rows selected for a random constant */
1698 VacAttrStats *stats;
1699 Datum values[ Natts_pg_statistic ];
1700 char nulls[ Natts_pg_statistic ];
1702 attp = (AttributeTupleForm) GETSTRUCT(atup);
1703 slot = attp->attnum - 1;
1704 if (slot < 0) /* skip system attributes for now,
1705 they are unique anyway */
1707 stats = &vacattrstats[slot];
1709 /* overwrite the existing statistics in the tuple */
1710 if (VacAttrStatsEqValid(stats)) {
1712 vc_setpagelock(ad, BufferGetBlockNumber(abuf));
1714 if (stats->nonnull_cnt + stats->null_cnt == 0 ||
1715 (stats->null_cnt <= 1 && stats->best_cnt == 1))
1717 else if (VacAttrStatsLtGtValid(stats) && stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
1719 double min_cnt_d = stats->min_cnt,
1720 max_cnt_d = stats->max_cnt,
1721 null_cnt_d = stats->null_cnt,
1722 nonnullcnt_d = stats->nonnull_cnt; /* prevent overflow */
1723 selratio = (min_cnt_d*min_cnt_d+max_cnt_d*max_cnt_d+null_cnt_d*null_cnt_d)/
1724 (nonnullcnt_d+null_cnt_d)/(nonnullcnt_d+null_cnt_d);
1727 double most = (double)(stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt);
1728 double total = ((double)stats->nonnull_cnt)+((double)stats->null_cnt);
1729 /* we assume count of other values are 20%
1730 of best count in table */
1731 selratio = (most*most + 0.20*most*(total-most))/total/total;
1735 attp->attnvals = (selratio ? (selratio * ATTNVALS_SCALE) : 0);
1736 WriteNoReleaseBuffer(abuf);
1738 /* DO PG_STATISTIC INSERTS */
1740 /* doing system relations, especially pg_statistic is a problem */
1741 if (VacAttrStatsLtGtValid(stats) && stats->initialized /* &&
1742 !IsSystemRelationName(pgcform->relname.data)*/) {
1743 func_ptr out_function;
1747 for (i = 0; i < Natts_pg_statistic; ++i) nulls[i] = ' ';
1750 * initialize values[]
1754 values[i++] = (Datum) relid; /* 1 */
1755 values[i++] = (Datum) attp->attnum; /* 2 */
1756 values[i++] = (Datum) InvalidOid; /* 3 */
1757 fmgr_info(stats->outfunc, &out_function, &dummy);
1758 out_string = (*out_function)(stats->min, stats->attr->atttypid);
1759 values[i++] = (Datum) fmgr(TextInRegProcedure,out_string);
1761 out_string = (char *)(*out_function)(stats->max, stats->attr->atttypid);
1762 values[i++] = (Datum) fmgr(TextInRegProcedure,out_string);
1767 stup = heap_formtuple(sdesc, values, nulls);
1770 * insert the tuple in the relation and get the tuple's oid.
1773 heap_insert(sd, stup);
1774 pfree(DatumGetPointer(values[3]));
1775 pfree(DatumGetPointer(values[4]));
1780 heap_endscan(asdesc);
1785 /* XXX -- after write, should invalidate relcache in other backends */
1786 WriteNoReleaseBuffer(rbuf); /* heap_endscan release scan' buffers ? */
1788 /* invalidating system relations confuses the function cache
1789 of pg_operator and pg_opclass */
1790 if ( !IsSystemRelationName(pgcform->relname.data))
1791 RelationInvalidateHeapTuple(rd, rtup);
1793 /* that's all, folks */
1794 heap_endscan(rsdesc);
1800 * vc_delhilowstats() -- delete pg_statistics rows
1804 vc_delhilowstats(Oid relid)
1806 Relation pgstatistic;
1807 HeapScanDesc pgsscan;
1811 pgstatistic = heap_openr(StatisticRelationName);
1813 if (relid != InvalidOid ) {
1814 ScanKeyEntryInitialize(&pgskey, 0x0, Anum_pg_statistic_starelid,
1815 ObjectIdEqualRegProcedure,
1816 ObjectIdGetDatum(relid));
1817 pgsscan = heap_beginscan(pgstatistic, false, NowTimeQual, 1, &pgskey);
1820 pgsscan = heap_beginscan(pgstatistic, false, NowTimeQual, 0, NULL);
1822 while (HeapTupleIsValid(pgstup = heap_getnext(pgsscan, 0, NULL))) {
1823 heap_delete(pgstatistic, &pgstup->t_ctid);
1826 heap_endscan(pgsscan);
1827 heap_close(pgstatistic);
1830 static void vc_setpagelock(Relation rel, BlockNumber blkno)
1832 ItemPointerData itm;
1834 ItemPointerSet(&itm, blkno, 1);
1836 RelationSetLockForWritePage(rel, &itm);
1841 * vc_reappage() -- save a page on the array of reapped pages.
1843 * As a side effect of the way that the vacuuming loop for a given
1844 * relation works, higher pages come after lower pages in the array
1845 * (and highest tid on a page is last).
1848 vc_reappage(VPageList vpl, VPageDescr vpc)
1852 /* allocate a VPageDescrData entry */
1853 newvpd = (VPageDescr) palloc(sizeof(VPageDescrData) + vpc->vpd_noff*sizeof(OffsetNumber));
1856 if ( vpc->vpd_noff > 0 )
1857 memmove (newvpd->vpd_voff, vpc->vpd_voff, vpc->vpd_noff*sizeof(OffsetNumber));
1858 newvpd->vpd_blkno = vpc->vpd_blkno;
1859 newvpd->vpd_free = vpc->vpd_free;
1860 newvpd->vpd_nusd = vpc->vpd_nusd;
1861 newvpd->vpd_noff = vpc->vpd_noff;
1863 /* insert this page into vpl list */
1864 vc_vpinsert (vpl, newvpd);
1869 vc_vpinsert (VPageList vpl, VPageDescr vpnew)
1872 /* allocate a VPageDescr entry if needed */
1873 if ( vpl->vpl_npages == 0 )
1874 vpl->vpl_pgdesc = (VPageDescr*) palloc(100*sizeof(VPageDescr));
1875 else if ( vpl->vpl_npages % 100 == 0 )
1876 vpl->vpl_pgdesc = (VPageDescr*) repalloc(vpl->vpl_pgdesc, (vpl->vpl_npages+100)*sizeof(VPageDescr));
1877 vpl->vpl_pgdesc[vpl->vpl_npages] = vpnew;
1878 (vpl->vpl_npages)++;
1883 vc_free(Portal p, VRelList vrl)
1887 PortalVariableMemory pmem;
1889 pmem = PortalGetVariableMemory(p);
1890 old = MemoryContextSwitchTo((MemoryContext)pmem);
1892 while (vrl != (VRelList) NULL) {
1894 /* free rel list entry */
1896 vrl = vrl->vrl_next;
1900 (void) MemoryContextSwitchTo(old);
1904 * vc_getarchrel() -- open the archive relation for a heap relation
1906 * The archive relation is named 'a,XXXXX' for the heap relation
1907 * whose relid is XXXXX.
1910 #define ARCHIVE_PREFIX "a,"
1913 vc_getarchrel(Relation heaprel)
1918 archrelname = palloc(sizeof(ARCHIVE_PREFIX) + NAMEDATALEN); /* bogus */
1919 sprintf(archrelname, "%s%d", ARCHIVE_PREFIX, heaprel->rd_id);
1921 archrel = heap_openr(archrelname);
1928 * vc_archive() -- write a tuple to an archive relation
1930 * In the future, this will invoke the archived accessd method. For
1931 * now, archive relations are on mag disk.
1934 vc_archive(Relation archrel, HeapTuple htup)
1936 doinsert(archrel, htup);
1940 vc_isarchrel(char *rname)
1942 if (strncmp(ARCHIVE_PREFIX, rname,strlen(ARCHIVE_PREFIX)) == 0)
1949 vc_find_eq (char *bot, int nelem, int size, char *elm, int (*compar)(char *, char *))
1952 int last = nelem - 1;
1953 int celm = nelem / 2;
1954 bool last_move, first_move;
1956 last_move = first_move = true;
1959 if ( first_move == true )
1961 res = compar (bot, elm);
1968 if ( last_move == true )
1970 res = compar (elm, bot + last*size);
1974 return (bot + last*size);
1977 res = compar (elm, bot + celm*size);
1979 return (bot + celm*size);
1993 last = last - celm - 1;
1994 bot = bot + (celm+1)*size;
1995 celm = (last + 1) / 2;
2002 vc_cmp_blk (char *left, char *right)
2004 BlockNumber lblk, rblk;
2006 lblk = (*((VPageDescr*)left))->vpd_blkno;
2007 rblk = (*((VPageDescr*)right))->vpd_blkno;
2018 vc_cmp_offno (char *left, char *right)
2021 if ( *(OffsetNumber*)left < *(OffsetNumber*)right )
2023 if ( *(OffsetNumber*)left == *(OffsetNumber*)right )
2027 } /* vc_cmp_offno */
2031 vc_getindices (Oid relid, int *nindices, Relation **Irel)
2037 HeapScanDesc pgiscan;
2046 ioid = (Oid *) palloc(10*sizeof(Oid));
2048 /* prepare a heap scan on the pg_index relation */
2049 pgindex = heap_openr(IndexRelationName);
2050 pgidesc = RelationGetTupleDescriptor(pgindex);
2052 ScanKeyEntryInitialize(&pgikey, 0x0, Anum_pg_index_indrelid,
2053 ObjectIdEqualRegProcedure,
2054 ObjectIdGetDatum(relid));
2056 pgiscan = heap_beginscan(pgindex, false, NowTimeQual, 1, &pgikey);
2058 while (HeapTupleIsValid(pgitup = heap_getnext(pgiscan, 0, NULL))) {
2059 d = (Datum) heap_getattr(pgitup, InvalidBuffer, Anum_pg_index_indexrelid,
2063 ioid = (Oid *) repalloc(ioid, (i+10)*sizeof(Oid));
2064 ioid[i-1] = DatumGetObjectId(d);
2067 heap_endscan(pgiscan);
2068 heap_close(pgindex);
2070 if ( i == 0 ) { /* No one index found */
2075 if ( Irel != (Relation **) NULL )
2076 *Irel = (Relation *) palloc(i * sizeof(Relation));
2078 for (k = 0; i > 0; )
2080 irel = index_open(ioid[--i]);
2081 if ( irel != (Relation) NULL )
2083 if ( Irel != (Relation **) NULL )
2090 elog (NOTICE, "CAN't OPEN INDEX %u - SKIP IT", ioid[i]);
2095 if ( Irel != (Relation **) NULL && *nindices == 0 )
2098 *Irel = (Relation *) NULL;
2101 } /* vc_getindices */
2105 vc_clsindices (int nindices, Relation *Irel)
2108 if ( Irel == (Relation*) NULL )
2111 while (nindices--) {
2112 index_close (Irel[nindices]);
2116 } /* vc_clsindices */
2120 vc_mkindesc (Relation onerel, int nindices, Relation *Irel, IndDesc **Idesc)
2123 HeapTuple pgIndexTup;
2124 AttrNumber *attnumP;
2128 *Idesc = (IndDesc *) palloc (nindices * sizeof (IndDesc));
2130 for (i = 0, idcur = *Idesc; i < nindices; i++, idcur++) {
2132 SearchSysCacheTuple(INDEXRELID,
2133 ObjectIdGetDatum(Irel[i]->rd_id),
2136 idcur->tform = (IndexTupleForm)GETSTRUCT(pgIndexTup);
2137 for (attnumP = &(idcur->tform->indkey[0]), natts = 0;
2138 *attnumP != InvalidAttrNumber && natts != INDEX_MAX_KEYS;
2139 attnumP++, natts++);
2140 if (idcur->tform->indproc != InvalidOid) {
2141 idcur->finfoP = &(idcur->finfo);
2142 FIgetnArgs(idcur->finfoP) = natts;
2144 FIgetProcOid(idcur->finfoP) = idcur->tform->indproc;
2145 *(FIgetname(idcur->finfoP)) = '\0';
2147 idcur->finfoP = (FuncIndexInfo *) NULL;
2149 idcur->natts = natts;
2156 vc_enough_space (VPageDescr vpd, Size len)
2159 len = DOUBLEALIGN(len);
2161 if ( len > vpd->vpd_free )
2164 if ( vpd->vpd_nusd < vpd->vpd_noff ) /* there are free itemid(s) */
2165 return (true); /* and len <= free_space */
2167 /* ok. noff_usd >= noff_free and so we'll have to allocate new itemid */
2168 if ( len <= vpd->vpd_free - sizeof (ItemIdData) )
2173 } /* vc_enough_space */