1 /*-------------------------------------------------------------------------
4 * the postgres vacuum cleaner
6 * Copyright (c) 1994, Regents of the University of California
10 * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.68 1998/07/26 04:30:25 scrappy Exp $
12 *-------------------------------------------------------------------------
14 #include <sys/types.h>
23 #include "access/genam.h"
24 #include "access/heapam.h"
25 #include "access/transam.h"
26 #include "access/xact.h"
27 #include "catalog/catalog.h"
28 #include "catalog/catname.h"
29 #include "catalog/index.h"
31 #include "catalog/pg_class_mb.h"
33 #include "catalog/pg_class.h"
35 #include "catalog/pg_index.h"
36 #include "catalog/pg_operator.h"
37 #include "catalog/pg_statistic.h"
38 #include "catalog/pg_type.h"
39 #include "commands/vacuum.h"
41 #include "parser/parse_oper.h"
42 #include "storage/bufmgr.h"
43 #include "storage/bufpage.h"
44 #include "storage/shmem.h"
45 #include "storage/smgr.h"
46 #include "storage/lmgr.h"
47 #include "utils/builtins.h"
48 #include "utils/inval.h"
49 #include "utils/mcxt.h"
50 #include "utils/portal.h"
51 #include "utils/syscache.h"
53 #ifndef HAVE_GETRUSAGE
54 #include <rusagestub.h>
57 #include <sys/resource.h>
60 /* #include <port-protos.h> *//* Why? */
62 extern int BlowawayRelationBuffers(Relation rdesc, BlockNumber block);
64 bool VacuumRunning = false;
66 static Portal vc_portal;
68 static int MESSAGE_LEVEL; /* message level */
70 #define swapLong(a,b) {long tmp; tmp=a; a=b; b=tmp;}
71 #define swapInt(a,b) {int tmp; tmp=a; a=b; b=tmp;}
72 #define swapDatum(a,b) {Datum tmp; tmp=a; a=b; b=tmp;}
73 #define VacAttrStatsEqValid(stats) ( stats->f_cmpeq.fn_addr != NULL )
74 #define VacAttrStatsLtGtValid(stats) ( stats->f_cmplt.fn_addr != NULL && \
75 stats->f_cmpgt.fn_addr != NULL && \
76 RegProcedureIsValid(stats->outfunc) )
79 /* non-export function prototypes */
80 static void vc_init(void);
81 static void vc_shutdown(void);
82 static void vc_vacuum(NameData *VacRelP, bool analyze, List *va_cols);
83 static VRelList vc_getrels(NameData *VacRelP);
84 static void vc_vacone(Oid relid, bool analyze, List *va_cols);
85 static void vc_scanheap(VRelStats *vacrelstats, Relation onerel, VPageList Vvpl, VPageList Fvpl);
86 static void vc_rpfheap(VRelStats *vacrelstats, Relation onerel, VPageList Vvpl, VPageList Fvpl, int nindices, Relation *Irel);
87 static void vc_vacheap(VRelStats *vacrelstats, Relation onerel, VPageList vpl);
88 static void vc_vacpage(Page page, VPageDescr vpd);
89 static void vc_vaconeind(VPageList vpl, Relation indrel, int nhtups);
90 static void vc_scanoneind(Relation indrel, int nhtups);
91 static void vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple htup);
92 static void vc_bucketcpy(AttributeTupleForm attr, Datum value, Datum *bucket, int16 *bucket_len);
93 static void vc_updstats(Oid relid, int npages, int ntups, bool hasindex, VRelStats *vacrelstats);
94 static void vc_delhilowstats(Oid relid, int attcnt, int *attnums);
95 static void vc_setpagelock(Relation rel, BlockNumber blkno);
96 static VPageDescr vc_tidreapped(ItemPointer itemptr, VPageList vpl);
97 static void vc_reappage(VPageList vpl, VPageDescr vpc);
98 static void vc_vpinsert(VPageList vpl, VPageDescr vpnew);
99 static void vc_free(VRelList vrl);
100 static void vc_getindices(Oid relid, int *nindices, Relation **Irel);
101 static void vc_clsindices(int nindices, Relation *Irel);
102 static void vc_mkindesc(Relation onerel, int nindices, Relation *Irel, IndDesc **Idesc);
103 static char *vc_find_eq(char *bot, int nelem, int size, char *elm, int (*compar) (char *, char *));
104 static int vc_cmp_blk(char *left, char *right);
105 static int vc_cmp_offno(char *left, char *right);
106 static bool vc_enough_space(VPageDescr vpd, Size len);
109 vacuum(char *vacrel, bool verbose, bool analyze, List *va_spec)
113 PortalVariableMemory pmem;
119 * Create a portal for safe memory across transctions. We need to
120 * palloc the name space for it because our hash function expects the
121 * name to be on a longword boundary. CreatePortal copies the name to
122 * safe storage for us.
124 pname = (char *) palloc(strlen(VACPNAME) + 1);
125 strcpy(pname, VACPNAME);
126 vc_portal = CreatePortal(pname);
130 MESSAGE_LEVEL = NOTICE;
132 MESSAGE_LEVEL = DEBUG;
134 /* vacrel gets de-allocated on transaction commit */
136 strcpy(VacRel.data, vacrel);
138 pmem = PortalGetVariableMemory(vc_portal);
139 old = MemoryContextSwitchTo((MemoryContext) pmem);
141 if (va_spec != NIL && !analyze)
142 elog(ERROR, "Can't vacuum columns, only tables. You can 'vacuum analyze' columns.");
146 char *col = (char *) lfirst(le);
149 dest = (char *) palloc(strlen(col) + 1);
151 va_cols = lappend(va_cols, dest);
153 MemoryContextSwitchTo(old);
155 /* initialize vacuum cleaner */
158 /* vacuum the database */
160 vc_vacuum(&VacRel, analyze, va_cols);
162 vc_vacuum(NULL, analyze, NIL);
164 PortalDestroy(&vc_portal);
171 * vc_init(), vc_shutdown() -- start up and shut down the vacuum cleaner.
173 * We run exactly one vacuum cleaner at a time. We use the file system
174 * to guarantee an exclusive lock on vacuuming, since a single vacuum
175 * cleaner instantiation crosses transaction boundaries, and we'd lose
176 * postgres-style locks at the end of every transaction.
178 * The strangeness with committing and starting transactions in the
179 * init and shutdown routines is due to the fact that the vacuum cleaner
180 * is invoked via a sql command, and so is already executing inside
181 * a transaction. We need to leave ourselves in a predictable state
182 * on entry and exit to the vacuum cleaner. We commit the transaction
183 * started in PostgresMain() inside vc_init(), and start one in
184 * vc_shutdown() to match the commit waiting for us back in
192 if ((fd = open("pg_vlock", O_CREAT | O_EXCL, 0600)) < 0)
193 elog(ERROR, "can't create lock file -- another vacuum cleaner running?");
198 * By here, exclusive open on the lock file succeeded. If we abort
199 * for any reason during vacuuming, we need to remove the lock file.
200 * This global variable is checked in the transaction manager on xact
201 * abort, and the routine vc_abort() is called if necessary.
204 VacuumRunning = true;
206 /* matches the StartTransaction in PostgresMain() */
207 CommitTransactionCommand();
213 /* on entry, not in a transaction */
214 if (unlink("pg_vlock") < 0)
215 elog(ERROR, "vacuum: can't destroy lock file!");
217 /* okay, we're done */
218 VacuumRunning = false;
220 /* matches the CommitTransaction in PostgresMain() */
221 StartTransactionCommand();
228 /* on abort, remove the vacuum cleaner lock file */
231 VacuumRunning = false;
235 * vc_vacuum() -- vacuum the database.
237 * This routine builds a list of relations to vacuum, and then calls
238 * code that vacuums them one at a time. We are careful to vacuum each
239 * relation in a separate transaction in order to avoid holding too many
243 vc_vacuum(NameData *VacRelP, bool analyze, List *va_cols)
248 /* get list of relations */
249 vrl = vc_getrels(VacRelP);
251 if (analyze && VacRelP == NULL && vrl != NULL)
252 vc_delhilowstats(InvalidOid, 0, NULL);
254 /* vacuum each heap relation */
255 for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
256 vc_vacone(cur->vrl_relid, analyze, va_cols);
262 vc_getrels(NameData *VacRelP)
266 HeapScanDesc pgcscan;
269 PortalVariableMemory portalmem;
280 StartTransactionCommand();
284 ScanKeyEntryInitialize(&pgckey, 0x0, Anum_pg_class_relname,
286 PointerGetDatum(VacRelP->data));
290 ScanKeyEntryInitialize(&pgckey, 0x0, Anum_pg_class_relkind,
291 F_CHAREQ, CharGetDatum('r'));
294 portalmem = PortalGetVariableMemory(vc_portal);
295 vrl = cur = (VRelList) NULL;
297 pgclass = heap_openr(RelationRelationName);
298 pgcdesc = RelationGetTupleDescriptor(pgclass);
300 pgcscan = heap_beginscan(pgclass, false, false, 1, &pgckey);
302 while (HeapTupleIsValid(pgctup = heap_getnext(pgcscan, 0, &buf)))
307 d = heap_getattr(pgctup, Anum_pg_class_relname, pgcdesc, &n);
311 * don't vacuum large objects for now - something breaks when we
314 if ((strlen(rname) >= 5) && rname[0] == 'x' &&
315 rname[1] == 'i' && rname[2] == 'n' &&
316 (rname[3] == 'v' || rname[3] == 'x') &&
317 rname[4] >= '0' && rname[4] <= '9')
319 elog(NOTICE, "Rel %s: can't vacuum LargeObjects now",
325 d = heap_getattr(pgctup, Anum_pg_class_relkind, pgcdesc, &n);
327 rkind = DatumGetChar(d);
329 /* skip system relations */
333 elog(NOTICE, "Vacuum: can not process index and certain system tables");
337 /* get a relation list entry for this guy */
338 old = MemoryContextSwitchTo((MemoryContext) portalmem);
339 if (vrl == (VRelList) NULL)
340 vrl = cur = (VRelList) palloc(sizeof(VRelListData));
343 cur->vrl_next = (VRelList) palloc(sizeof(VRelListData));
346 MemoryContextSwitchTo(old);
348 cur->vrl_relid = pgctup->t_oid;
349 cur->vrl_next = (VRelList) NULL;
351 /* wei hates it if you forget to do this */
355 elog(NOTICE, "Vacuum: table not found");
358 heap_endscan(pgcscan);
361 CommitTransactionCommand();
367 * vc_vacone() -- vacuum one heap relation
369 * This routine vacuums a single heap, cleans out its indices, and
370 * updates its statistics npages and ntups statistics.
372 * Doing one heap at a time incurs extra overhead, since we need to
373 * check that the heap exists again just before we vacuum it. The
374 * reason that we do this is so that vacuuming can be spread across
375 * many small transactions. Otherwise, two-phase locking would require
376 * us to lock the entire database during one pass of the vacuum cleaner.
379 vc_vacone(Oid relid, bool analyze, List *va_cols)
386 HeapScanDesc pgcscan;
389 VPageListData Vvpl; /* List of pages to vacuum and/or clean
391 VPageListData Fvpl; /* List of pages with space enough for
397 VRelStats *vacrelstats;
399 StartTransactionCommand();
401 ScanKeyEntryInitialize(&pgckey, 0x0, ObjectIdAttributeNumber,
403 ObjectIdGetDatum(relid));
405 pgclass = heap_openr(RelationRelationName);
406 pgcdesc = RelationGetTupleDescriptor(pgclass);
407 pgcscan = heap_beginscan(pgclass, false, false, 1, &pgckey);
410 * Race condition -- if the pg_class tuple has gone away since the
411 * last time we saw it, we don't need to vacuum it.
414 if (!HeapTupleIsValid(pgctup = heap_getnext(pgcscan, 0, &pgcbuf)))
416 heap_endscan(pgcscan);
418 CommitTransactionCommand();
422 /* now open the class and vacuum it */
423 onerel = heap_open(relid);
425 vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
426 vacrelstats->relid = relid;
427 vacrelstats->npages = vacrelstats->ntups = 0;
428 vacrelstats->hasindex = false;
429 if (analyze && !IsSystemRelationName((RelationGetRelationName(onerel))->data))
433 AttributeTupleForm *attr;
435 attr_cnt = onerel->rd_att->natts;
436 attr = onerel->rd_att->attrs;
443 if (length(va_cols) > attr_cnt)
444 elog(ERROR, "vacuum: too many attributes specified for relation %s",
445 (RelationGetRelationName(onerel))->data);
446 attnums = (int *) palloc(attr_cnt * sizeof(int));
449 char *col = (char *) lfirst(le);
451 for (i = 0; i < attr_cnt; i++)
453 if (namestrcmp(&(attr[i]->attname), col) == 0)
456 if (i < attr_cnt) /* found */
460 elog(ERROR, "vacuum: there is no attribute %s in %s",
461 col, (RelationGetRelationName(onerel))->data);
467 vacrelstats->vacattrstats =
468 (VacAttrStats *) palloc(attr_cnt * sizeof(VacAttrStats));
470 for (i = 0; i < attr_cnt; i++)
472 Operator func_operator;
473 OperatorTupleForm pgopform;
476 stats = &vacrelstats->vacattrstats[i];
477 stats->attr = palloc(ATTRIBUTE_TUPLE_SIZE);
478 memmove(stats->attr, attr[((attnums) ? attnums[i] : i)], ATTRIBUTE_TUPLE_SIZE);
479 stats->best = stats->guess1 = stats->guess2 = 0;
480 stats->max = stats->min = 0;
481 stats->best_len = stats->guess1_len = stats->guess2_len = 0;
482 stats->max_len = stats->min_len = 0;
483 stats->initialized = false;
484 stats->best_cnt = stats->guess1_cnt = stats->guess1_hits = stats->guess2_hits = 0;
485 stats->max_cnt = stats->min_cnt = stats->null_cnt = stats->nonnull_cnt = 0;
487 func_operator = oper("=", stats->attr->atttypid, stats->attr->atttypid, true);
488 if (func_operator != NULL)
490 pgopform = (OperatorTupleForm) GETSTRUCT(func_operator);
491 fmgr_info(pgopform->oprcode, &(stats->f_cmpeq));
494 stats->f_cmpeq.fn_addr = NULL;
496 func_operator = oper("<", stats->attr->atttypid, stats->attr->atttypid, true);
497 if (func_operator != NULL)
499 pgopform = (OperatorTupleForm) GETSTRUCT(func_operator);
500 fmgr_info(pgopform->oprcode, &(stats->f_cmplt));
503 stats->f_cmplt.fn_addr = NULL;
505 func_operator = oper(">", stats->attr->atttypid, stats->attr->atttypid, true);
506 if (func_operator != NULL)
508 pgopform = (OperatorTupleForm) GETSTRUCT(func_operator);
509 fmgr_info(pgopform->oprcode, &(stats->f_cmpgt));
512 stats->f_cmpgt.fn_addr = NULL;
514 pgttup = SearchSysCacheTuple(TYPOID,
515 ObjectIdGetDatum(stats->attr->atttypid),
517 if (HeapTupleIsValid(pgttup))
518 stats->outfunc = ((TypeTupleForm) GETSTRUCT(pgttup))->typoutput;
520 stats->outfunc = InvalidOid;
522 vacrelstats->va_natts = attr_cnt;
523 vc_delhilowstats(relid, ((attnums) ? attr_cnt : 0), attnums);
529 vacrelstats->va_natts = 0;
530 vacrelstats->vacattrstats = (VacAttrStats *) NULL;
533 /* we require the relation to be locked until the indices are cleaned */
534 RelationSetLockForWrite(onerel);
537 Vvpl.vpl_npages = Fvpl.vpl_npages = 0;
538 vc_scanheap(vacrelstats, onerel, &Vvpl, &Fvpl);
540 /* Now open indices */
541 Irel = (Relation *) NULL;
542 vc_getindices(vacrelstats->relid, &nindices, &Irel);
545 vacrelstats->hasindex = true;
547 vacrelstats->hasindex = false;
549 /* Clean/scan index relation(s) */
550 if (Irel != (Relation *) NULL)
552 if (Vvpl.vpl_npages > 0)
554 for (i = 0; i < nindices; i++)
555 vc_vaconeind(&Vvpl, Irel[i], vacrelstats->ntups);
558 /* just scan indices to update statistic */
560 for (i = 0; i < nindices; i++)
561 vc_scanoneind(Irel[i], vacrelstats->ntups);
565 if (Fvpl.vpl_npages > 0) /* Try to shrink heap */
566 vc_rpfheap(vacrelstats, onerel, &Vvpl, &Fvpl, nindices, Irel);
569 if (Irel != (Relation *) NULL)
570 vc_clsindices(nindices, Irel);
571 if (Vvpl.vpl_npages > 0)/* Clean pages from Vvpl list */
572 vc_vacheap(vacrelstats, onerel, &Vvpl);
575 /* ok - free Vvpl list of reapped pages */
576 if (Vvpl.vpl_npages > 0)
578 vpp = Vvpl.vpl_pgdesc;
579 for (i = 0; i < Vvpl.vpl_npages; i++, vpp++)
581 pfree(Vvpl.vpl_pgdesc);
582 if (Fvpl.vpl_npages > 0)
583 pfree(Fvpl.vpl_pgdesc);
586 /* all done with this class */
588 heap_endscan(pgcscan);
591 /* update statistics in pg_class */
592 vc_updstats(vacrelstats->relid, vacrelstats->npages, vacrelstats->ntups,
593 vacrelstats->hasindex, vacrelstats);
595 /* next command frees attribute stats */
597 CommitTransactionCommand();
601 * vc_scanheap() -- scan an open heap relation
603 * This routine sets commit times, constructs Vvpl list of
604 * empty/uninitialized pages and pages with dead tuples and
605 * ~LP_USED line pointers, constructs Fvpl list of pages
606 * appropriate for purposes of shrinking and maintains statistics
607 * on the number of live tuples in a heap.
610 vc_scanheap(VRelStats *vacrelstats, Relation onerel,
611 VPageList Vvpl, VPageList Fvpl)
640 Size min_tlen = MAXTUPLEN;
642 int32 i /* , attr_cnt */ ;
645 bool do_shrinking = true;
647 getrusage(RUSAGE_SELF, &ru0);
649 nvac = ntups = nunused = ncrash = nempg = nnepg = nchpg = nemend = 0;
652 relname = (RelationGetRelationName(onerel))->data;
654 nblocks = RelationGetNumberOfBlocks(onerel);
656 vpc = (VPageDescr) palloc(sizeof(VPageDescrData) + MaxOffsetNumber * sizeof(OffsetNumber));
659 elog(MESSAGE_LEVEL, "--Relation %s--", relname);
661 for (blkno = 0; blkno < nblocks; blkno++)
663 buf = ReadBuffer(onerel, blkno);
664 page = BufferGetPage(buf);
665 vpc->vpd_blkno = blkno;
670 elog(NOTICE, "Rel %s: Uninitialized page %u - fixing",
672 PageInit(page, BufferGetPageSize(buf), 0);
673 vpc->vpd_free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower;
674 frsize += (vpc->vpd_free - sizeof(ItemIdData));
677 vc_reappage(Vvpl, vpc);
682 if (PageIsEmpty(page))
684 vpc->vpd_free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower;
685 frsize += (vpc->vpd_free - sizeof(ItemIdData));
688 vc_reappage(Vvpl, vpc);
695 maxoff = PageGetMaxOffsetNumber(page);
696 for (offnum = FirstOffsetNumber;
698 offnum = OffsetNumberNext(offnum))
700 itemid = PageGetItemId(page, offnum);
703 * Collect un-used items too - it's possible to have indices
704 * pointing here after crash.
706 if (!ItemIdIsUsed(itemid))
708 vpc->vpd_voff[vpc->vpd_noff++] = offnum;
713 htup = (HeapTuple) PageGetItem(page, itemid);
716 if (!(htup->t_infomask & HEAP_XMIN_COMMITTED))
718 if (htup->t_infomask & HEAP_XMIN_INVALID)
722 if (TransactionIdDidAbort(htup->t_xmin))
724 else if (TransactionIdDidCommit(htup->t_xmin))
726 htup->t_infomask |= HEAP_XMIN_COMMITTED;
729 else if (!TransactionIdIsInProgress(htup->t_xmin))
733 * Not Aborted, Not Committed, Not in Progress -
734 * so it's from crashed process. - vadim 11/26/96
741 elog(NOTICE, "Rel %s: TID %u/%u: InsertTransactionInProgress %u - can't shrink relation",
742 relname, blkno, offnum, htup->t_xmin);
743 do_shrinking = false;
749 * here we are concerned about tuples with xmin committed and
750 * xmax unknown or committed
752 if (htup->t_infomask & HEAP_XMIN_COMMITTED &&
753 !(htup->t_infomask & HEAP_XMAX_INVALID))
755 if (htup->t_infomask & HEAP_XMAX_COMMITTED)
757 else if (TransactionIdDidAbort(htup->t_xmax))
759 htup->t_infomask |= HEAP_XMAX_INVALID;
762 else if (TransactionIdDidCommit(htup->t_xmax))
764 else if (!TransactionIdIsInProgress(htup->t_xmax))
768 * Not Aborted, Not Committed, Not in Progress - so it
769 * from crashed process. - vadim 06/02/97
771 htup->t_infomask |= HEAP_XMAX_INVALID;;
776 elog(NOTICE, "Rel %s: TID %u/%u: DeleteTransactionInProgress %u - can't shrink relation",
777 relname, blkno, offnum, htup->t_xmax);
778 do_shrinking = false;
783 * It's possibly! But from where it comes ? And should we fix
784 * it ? - vadim 11/28/96
786 itemptr = &(htup->t_ctid);
787 if (!ItemPointerIsValid(itemptr) ||
788 BlockIdGetBlockNumber(&(itemptr->ip_blkid)) != blkno)
790 elog(NOTICE, "Rel %s: TID %u/%u: TID IN TUPLEHEADER %u/%u IS NOT THE SAME. TUPGONE %d.",
791 relname, blkno, offnum,
792 BlockIdGetBlockNumber(&(itemptr->ip_blkid)),
793 itemptr->ip_posid, tupgone);
799 if (htup->t_len != itemid->lp_len)
801 elog(NOTICE, "Rel %s: TID %u/%u: TUPLE_LEN IN PAGEHEADER %u IS NOT THE SAME AS IN TUPLEHEADER %u. TUPGONE %d.",
802 relname, blkno, offnum,
803 itemid->lp_len, htup->t_len, tupgone);
805 if (!OidIsValid(htup->t_oid))
807 elog(NOTICE, "Rel %s: TID %u/%u: OID IS INVALID. TUPGONE %d.",
808 relname, blkno, offnum, tupgone);
815 if (tempPage == (Page) NULL)
819 pageSize = PageGetPageSize(page);
820 tempPage = (Page) palloc(pageSize);
821 memmove(tempPage, page, pageSize);
824 lpp = &(((PageHeader) tempPage)->pd_linp[offnum - 1]);
827 lpp->lp_flags &= ~LP_USED;
829 vpc->vpd_voff[vpc->vpd_noff++] = offnum;
837 if (htup->t_len < min_tlen)
838 min_tlen = htup->t_len;
839 if (htup->t_len > max_tlen)
840 max_tlen = htup->t_len;
841 vc_attrstats(onerel, vacrelstats, htup);
853 if (tempPage != (Page) NULL)
854 { /* Some tuples are gone */
855 PageRepairFragmentation(tempPage);
856 vpc->vpd_free = ((PageHeader) tempPage)->pd_upper - ((PageHeader) tempPage)->pd_lower;
857 frsize += vpc->vpd_free;
858 vc_reappage(Vvpl, vpc);
860 tempPage = (Page) NULL;
862 else if (vpc->vpd_noff > 0)
863 { /* there are only ~LP_USED line pointers */
864 vpc->vpd_free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower;
865 frsize += vpc->vpd_free;
866 vc_reappage(Vvpl, vpc);
878 /* save stats in the rel list for use later */
879 vacrelstats->ntups = ntups;
880 vacrelstats->npages = nblocks;
881 /* vacrelstats->natts = attr_cnt;*/
883 min_tlen = max_tlen = 0;
884 vacrelstats->min_tlen = min_tlen;
885 vacrelstats->max_tlen = max_tlen;
887 Vvpl->vpl_nemend = nemend;
888 Fvpl->vpl_nemend = nemend;
891 * Try to make Fvpl keeping in mind that we can't use free space of
892 * "empty" end-pages and last page if it reapped.
894 if (do_shrinking && Vvpl->vpl_npages - nemend > 0)
896 int nusf; /* blocks usefull for re-using */
898 nusf = Vvpl->vpl_npages - nemend;
899 if ((Vvpl->vpl_pgdesc[nusf - 1])->vpd_blkno == nblocks - nemend - 1)
902 for (i = 0; i < nusf; i++)
904 vp = Vvpl->vpl_pgdesc[i];
905 if (vc_enough_space(vp, min_tlen))
907 vc_vpinsert(Fvpl, vp);
908 frsusf += vp->vpd_free;
913 getrusage(RUSAGE_SELF, &ru1);
915 elog(MESSAGE_LEVEL, "Pages %u: Changed %u, Reapped %u, Empty %u, New %u; \
916 Tup %u: Vac %u, Crash %u, UnUsed %u, MinLen %u, MaxLen %u; Re-using: Free/Avail. Space %u/%u; EndEmpty/Avail. Pages %u/%u. Elapsed %u/%u sec.",
917 nblocks, nchpg, Vvpl->vpl_npages, nempg, nnepg,
918 ntups, nvac, ncrash, nunused, min_tlen, max_tlen,
919 frsize, frsusf, nemend, Fvpl->vpl_npages,
920 ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
921 ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
927 * vc_rpfheap() -- try to repaire relation' fragmentation
929 * This routine marks dead tuples as unused and tries re-use dead space
930 * by moving tuples (and inserting indices if needed). It constructs
931 * Nvpl list of free-ed pages (moved tuples) and clean indices
932 * for them after committing (in hack-manner - without losing locks
933 * and freeing memory!) current transaction. It truncates relation
934 * if some end-blocks are gone away.
937 vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
938 VPageList Vvpl, VPageList Fvpl, int nindices, Relation *Irel)
948 OffsetNumber offnum = 0,
956 TupleDesc tupdesc = NULL;
957 Datum *idatum = NULL;
959 InsertIndexResult iresult;
961 VPageDescr ToVpd = NULL,
983 getrusage(RUSAGE_SELF, &ru0);
985 myXID = GetCurrentTransactionId();
986 myCID = GetCurrentCommandId();
988 if (Irel != (Relation *) NULL) /* preparation for index' inserts */
990 vc_mkindesc(onerel, nindices, Irel, &Idesc);
991 tupdesc = RelationGetTupleDescriptor(onerel);
992 idatum = (Datum *) palloc(INDEX_MAX_KEYS * sizeof(*idatum));
993 inulls = (char *) palloc(INDEX_MAX_KEYS * sizeof(*inulls));
997 Fnpages = Fvpl->vpl_npages;
998 Fvplast = Fvpl->vpl_pgdesc[Fnpages - 1];
999 Fblklast = Fvplast->vpd_blkno;
1000 Assert(Vvpl->vpl_npages > Vvpl->vpl_nemend);
1001 Vnpages = Vvpl->vpl_npages - Vvpl->vpl_nemend;
1002 Vvplast = Vvpl->vpl_pgdesc[Vnpages - 1];
1003 Vblklast = Vvplast->vpd_blkno;
1004 Assert(Vblklast >= Fblklast);
1005 ToBuf = InvalidBuffer;
1008 vpc = (VPageDescr) palloc(sizeof(VPageDescrData) + MaxOffsetNumber * sizeof(OffsetNumber));
1009 vpc->vpd_nusd = vpc->vpd_noff = 0;
1011 nblocks = vacrelstats->npages;
1012 for (blkno = nblocks - Vvpl->vpl_nemend - 1;; blkno--)
1014 /* if it's reapped page and it was used by me - quit */
1015 if (blkno == Fblklast && Fvplast->vpd_nusd > 0)
1018 buf = ReadBuffer(onerel, blkno);
1019 page = BufferGetPage(buf);
1023 isempty = PageIsEmpty(page);
1026 if (blkno == Vblklast) /* it's reapped page */
1028 if (Vvplast->vpd_noff > 0) /* there are dead tuples */
1029 { /* on this page - clean */
1031 vc_vacpage(page, Vvplast);
1037 Assert(Vnpages > 0);
1038 /* get prev reapped page from Vvpl */
1039 Vvplast = Vvpl->vpl_pgdesc[Vnpages - 1];
1040 Vblklast = Vvplast->vpd_blkno;
1041 if (blkno == Fblklast) /* this page in Fvpl too */
1044 Assert(Fnpages > 0);
1045 Assert(Fvplast->vpd_nusd == 0);
1046 /* get prev reapped page from Fvpl */
1047 Fvplast = Fvpl->vpl_pgdesc[Fnpages - 1];
1048 Fblklast = Fvplast->vpd_blkno;
1050 Assert(Fblklast <= Vblklast);
1060 vpc->vpd_blkno = blkno;
1061 maxoff = PageGetMaxOffsetNumber(page);
1062 for (offnum = FirstOffsetNumber;
1064 offnum = OffsetNumberNext(offnum))
1066 itemid = PageGetItemId(page, offnum);
1068 if (!ItemIdIsUsed(itemid))
1071 htup = (HeapTuple) PageGetItem(page, itemid);
1074 /* try to find new page for this tuple */
1075 if (ToBuf == InvalidBuffer ||
1076 !vc_enough_space(ToVpd, tlen))
1078 if (ToBuf != InvalidBuffer)
1081 ToBuf = InvalidBuffer;
1084 * If no one tuple can't be added to this page -
1085 * remove page from Fvpl. - vadim 11/27/96
1087 * But we can't remove last page - this is our
1088 * "show-stopper" !!! - vadim 02/25/98
1090 if (ToVpd != Fvplast &&
1091 !vc_enough_space(ToVpd, vacrelstats->min_tlen))
1093 Assert(Fnpages > ToVpI + 1);
1094 memmove(Fvpl->vpl_pgdesc + ToVpI,
1095 Fvpl->vpl_pgdesc + ToVpI + 1,
1096 sizeof(VPageDescr *) * (Fnpages - ToVpI - 1));
1098 Assert(Fvplast == Fvpl->vpl_pgdesc[Fnpages - 1]);
1101 for (i = 0; i < Fnpages; i++)
1103 if (vc_enough_space(Fvpl->vpl_pgdesc[i], tlen))
1107 break; /* can't move item anywhere */
1109 ToVpd = Fvpl->vpl_pgdesc[ToVpI];
1110 ToBuf = ReadBuffer(onerel, ToVpd->vpd_blkno);
1111 ToPage = BufferGetPage(ToBuf);
1112 /* if this page was not used before - clean it */
1113 if (!PageIsEmpty(ToPage) && ToVpd->vpd_nusd == 0)
1114 vc_vacpage(ToPage, ToVpd);
1118 newtup = (HeapTuple) palloc(tlen);
1119 memmove((char *) newtup, (char *) htup, tlen);
1121 /* store transaction information */
1122 TransactionIdStore(myXID, &(newtup->t_xmin));
1123 newtup->t_cmin = myCID;
1124 StoreInvalidTransactionId(&(newtup->t_xmax));
1125 /* set xmin to unknown and xmax to invalid */
1126 newtup->t_infomask &= ~(HEAP_XACT_MASK);
1127 newtup->t_infomask |= HEAP_XMAX_INVALID;
1129 /* add tuple to the page */
1130 newoff = PageAddItem(ToPage, (Item) newtup, tlen,
1131 InvalidOffsetNumber, LP_USED);
1132 if (newoff == InvalidOffsetNumber)
1135 failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
1136 tlen, ToVpd->vpd_blkno, ToVpd->vpd_free,
1137 ToVpd->vpd_nusd, ToVpd->vpd_noff);
1139 newitemid = PageGetItemId(ToPage, newoff);
1141 newtup = (HeapTuple) PageGetItem(ToPage, newitemid);
1142 ItemPointerSet(&(newtup->t_ctid), ToVpd->vpd_blkno, newoff);
1144 /* now logically delete end-tuple */
1145 TransactionIdStore(myXID, &(htup->t_xmax));
1146 htup->t_cmax = myCID;
1147 /* set xmax to unknown */
1148 htup->t_infomask &= ~(HEAP_XMAX_INVALID | HEAP_XMAX_COMMITTED);
1152 ToVpd->vpd_free = ((PageHeader) ToPage)->pd_upper - ((PageHeader) ToPage)->pd_lower;
1153 vpc->vpd_voff[vpc->vpd_noff++] = offnum;
1155 /* insert index' tuples if needed */
1156 if (Irel != (Relation *) NULL)
1158 for (i = 0, idcur = Idesc; i < nindices; i++, idcur++)
1162 (AttrNumber *) &(idcur->tform->indkey[0]),
1169 iresult = index_insert(
1180 } /* walk along page */
1182 if (vpc->vpd_noff > 0) /* some tuples were moved */
1184 vc_reappage(&Nvpl, vpc);
1192 if (offnum <= maxoff)
1193 break; /* some item(s) left */
1195 } /* walk along relation */
1197 blkno++; /* new number of blocks */
1199 if (ToBuf != InvalidBuffer)
1209 * We have to commit our tuple' movings before we'll truncate
1210 * relation, but we shouldn't lose our locks. And so - quick hack:
1211 * flush buffers and record status of current transaction as
1212 * committed, and continue. - vadim 11/13/96
1214 FlushBufferPool(!TransactionFlushEnabled());
1215 TransactionIdCommit(myXID);
1216 FlushBufferPool(!TransactionFlushEnabled());
1220 * Clean uncleaned reapped pages from Vvpl list and set xmin committed
1221 * for inserted tuples
1224 for (i = 0, vpp = Vvpl->vpl_pgdesc; i < Vnpages; i++, vpp++)
1226 Assert((*vpp)->vpd_blkno < blkno);
1227 buf = ReadBuffer(onerel, (*vpp)->vpd_blkno);
1228 page = BufferGetPage(buf);
1229 if ((*vpp)->vpd_nusd == 0) /* this page was not used */
1233 * noff == 0 in empty pages only - such pages should be
1236 Assert((*vpp)->vpd_noff > 0);
1237 vc_vacpage(page, *vpp);
1240 /* this page was used */
1243 moff = PageGetMaxOffsetNumber(page);
1244 for (newoff = FirstOffsetNumber;
1246 newoff = OffsetNumberNext(newoff))
1248 itemid = PageGetItemId(page, newoff);
1249 if (!ItemIdIsUsed(itemid))
1251 htup = (HeapTuple) PageGetItem(page, itemid);
1252 if (TransactionIdEquals((TransactionId) htup->t_xmin, myXID))
1254 htup->t_infomask |= HEAP_XMIN_COMMITTED;
1258 Assert((*vpp)->vpd_nusd == ntups);
1263 Assert(nmoved == nchkmvd);
1265 getrusage(RUSAGE_SELF, &ru1);
1267 elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u; Tuple(s) moved: %u. \
1268 Elapsed %u/%u sec.",
1269 (RelationGetRelationName(onerel))->data,
1270 nblocks, blkno, nmoved,
1271 ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
1272 ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
1274 if (Nvpl.vpl_npages > 0)
1276 /* vacuum indices again if needed */
1277 if (Irel != (Relation *) NULL)
1283 /* re-sort Nvpl.vpl_pgdesc */
1284 for (vpleft = Nvpl.vpl_pgdesc,
1285 vpright = Nvpl.vpl_pgdesc + Nvpl.vpl_npages - 1;
1286 vpleft < vpright; vpleft++, vpright--)
1292 for (i = 0; i < nindices; i++)
1293 vc_vaconeind(&Nvpl, Irel[i], vacrelstats->ntups);
1297 * clean moved tuples from last page in Nvpl list if some tuples
1300 if (vpc->vpd_noff > 0 && offnum <= maxoff)
1302 Assert(vpc->vpd_blkno == blkno - 1);
1303 buf = ReadBuffer(onerel, vpc->vpd_blkno);
1304 page = BufferGetPage(buf);
1307 for (offnum = FirstOffsetNumber;
1309 offnum = OffsetNumberNext(offnum))
1311 itemid = PageGetItemId(page, offnum);
1312 if (!ItemIdIsUsed(itemid))
1314 htup = (HeapTuple) PageGetItem(page, itemid);
1315 Assert(TransactionIdEquals((TransactionId) htup->t_xmax, myXID));
1316 itemid->lp_flags &= ~LP_USED;
1319 Assert(vpc->vpd_noff == ntups);
1320 PageRepairFragmentation(page);
1324 /* now - free new list of reapped pages */
1325 vpp = Nvpl.vpl_pgdesc;
1326 for (i = 0; i < Nvpl.vpl_npages; i++, vpp++)
1328 pfree(Nvpl.vpl_pgdesc);
1331 /* truncate relation */
1332 if (blkno < nblocks)
1334 i = BlowawayRelationBuffers(onerel, blkno);
1336 elog(FATAL, "VACUUM (vc_rpfheap): BlowawayRelationBuffers returned %d", i);
1337 blkno = smgrtruncate(DEFAULT_SMGR, onerel, blkno);
1339 vacrelstats->npages = blkno; /* set new number of blocks */
1342 if (Irel != (Relation *) NULL) /* pfree index' allocations */
1347 vc_clsindices(nindices, Irel);
1355 * vc_vacheap() -- free dead tuples
1357 * This routine marks dead tuples as unused and truncates relation
1358 * if there are "empty" end-blocks.
1361 vc_vacheap(VRelStats *vacrelstats, Relation onerel, VPageList Vvpl)
1369 nblocks = Vvpl->vpl_npages;
1370 nblocks -= Vvpl->vpl_nemend;/* nothing to do with them */
1372 for (i = 0, vpp = Vvpl->vpl_pgdesc; i < nblocks; i++, vpp++)
1374 if ((*vpp)->vpd_noff > 0)
1376 buf = ReadBuffer(onerel, (*vpp)->vpd_blkno);
1377 page = BufferGetPage(buf);
1378 vc_vacpage(page, *vpp);
1383 /* truncate relation if there are some empty end-pages */
1384 if (Vvpl->vpl_nemend > 0)
1386 Assert(vacrelstats->npages >= Vvpl->vpl_nemend);
1387 nblocks = vacrelstats->npages - Vvpl->vpl_nemend;
1388 elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u.",
1389 (RelationGetRelationName(onerel))->data,
1390 vacrelstats->npages, nblocks);
1393 * we have to flush "empty" end-pages (if changed, but who knows
1394 * it) before truncation
1396 FlushBufferPool(!TransactionFlushEnabled());
1398 i = BlowawayRelationBuffers(onerel, nblocks);
1400 elog(FATAL, "VACUUM (vc_vacheap): BlowawayRelationBuffers returned %d", i);
1402 nblocks = smgrtruncate(DEFAULT_SMGR, onerel, nblocks);
1403 Assert(nblocks >= 0);
1404 vacrelstats->npages = nblocks; /* set new number of blocks */
1410 * vc_vacpage() -- free dead tuples on a page
1411 * and repaire its fragmentation.
1414 vc_vacpage(Page page, VPageDescr vpd)
1419 Assert(vpd->vpd_nusd == 0);
1420 for (i = 0; i < vpd->vpd_noff; i++)
1422 itemid = &(((PageHeader) page)->pd_linp[vpd->vpd_voff[i] - 1]);
1423 itemid->lp_flags &= ~LP_USED;
1425 PageRepairFragmentation(page);
1430 * _vc_scanoneind() -- scan one index relation to update statistic.
1434 vc_scanoneind(Relation indrel, int nhtups)
1436 RetrieveIndexResult res;
1437 IndexScanDesc iscan;
1443 getrusage(RUSAGE_SELF, &ru0);
1445 /* walk through the entire index */
1446 iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL);
1449 while ((res = index_getnext(iscan, ForwardScanDirection))
1450 != (RetrieveIndexResult) NULL)
1456 index_endscan(iscan);
1458 /* now update statistics in pg_class */
1459 nipages = RelationGetNumberOfBlocks(indrel);
1460 vc_updstats(indrel->rd_id, nipages, nitups, false, NULL);
1462 getrusage(RUSAGE_SELF, &ru1);
1464 elog(MESSAGE_LEVEL, "Ind %s: Pages %u; Tuples %u. Elapsed %u/%u sec.",
1465 indrel->rd_rel->relname.data, nipages, nitups,
1466 ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
1467 ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
1469 if (nitups != nhtups)
1470 elog(NOTICE, "Ind %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u)",
1471 indrel->rd_rel->relname.data, nitups, nhtups);
1473 } /* vc_scanoneind */
1476 * vc_vaconeind() -- vacuum one index relation.
1478 * Vpl is the VPageList of the heap we're currently vacuuming.
1479 * It's locked. Indrel is an index relation on the vacuumed heap.
1480 * We don't set locks on the index relation here, since the indexed
1481 * access methods support locking at different granularities.
1482 * We let them handle it.
1484 * Finally, we arrange to update the index relation's statistics in
1488 vc_vaconeind(VPageList vpl, Relation indrel, int nhtups)
1490 RetrieveIndexResult res;
1491 IndexScanDesc iscan;
1492 ItemPointer heapptr;
1500 getrusage(RUSAGE_SELF, &ru0);
1502 /* walk through the entire index */
1503 iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL);
1507 while ((res = index_getnext(iscan, ForwardScanDirection))
1508 != (RetrieveIndexResult) NULL)
1510 heapptr = &res->heap_iptr;
1512 if ((vp = vc_tidreapped(heapptr, vpl)) != (VPageDescr) NULL)
1515 elog(DEBUG, "<%x,%x> -> <%x,%x>",
1516 ItemPointerGetBlockNumber(&(res->index_iptr)),
1517 ItemPointerGetOffsetNumber(&(res->index_iptr)),
1518 ItemPointerGetBlockNumber(&(res->heap_iptr)),
1519 ItemPointerGetOffsetNumber(&(res->heap_iptr)));
1521 if (vp->vpd_noff == 0)
1522 { /* this is EmptyPage !!! */
1523 elog(NOTICE, "Ind %s: pointer to EmptyPage (blk %u off %u) - fixing",
1524 indrel->rd_rel->relname.data,
1525 vp->vpd_blkno, ItemPointerGetOffsetNumber(heapptr));
1528 index_delete(indrel, &res->index_iptr);
1537 index_endscan(iscan);
1539 /* now update statistics in pg_class */
1540 nipages = RelationGetNumberOfBlocks(indrel);
1541 vc_updstats(indrel->rd_id, nipages, nitups, false, NULL);
1543 getrusage(RUSAGE_SELF, &ru1);
1545 elog(MESSAGE_LEVEL, "Ind %s: Pages %u; Tuples %u: Deleted %u. Elapsed %u/%u sec.",
1546 indrel->rd_rel->relname.data, nipages, nitups, nvac,
1547 ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
1548 ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
1550 if (nitups != nhtups)
1551 elog(NOTICE, "Ind %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u)",
1552 indrel->rd_rel->relname.data, nitups, nhtups);
1554 } /* vc_vaconeind */
1557 * vc_tidreapped() -- is a particular tid reapped?
1559 * vpl->VPageDescr_array is sorted in right order.
1562 vc_tidreapped(ItemPointer itemptr, VPageList vpl)
1564 OffsetNumber ioffno;
1570 vpd.vpd_blkno = ItemPointerGetBlockNumber(itemptr);
1571 ioffno = ItemPointerGetOffsetNumber(itemptr);
1574 vpp = (VPageDescr *) vc_find_eq((char *) (vpl->vpl_pgdesc),
1575 vpl->vpl_npages, sizeof(VPageDescr), (char *) &vp,
1578 if (vpp == (VPageDescr *) NULL)
1579 return ((VPageDescr) NULL);
1582 /* ok - we are on true page */
1584 if (vp->vpd_noff == 0)
1585 { /* this is EmptyPage !!! */
1589 voff = (OffsetNumber *) vc_find_eq((char *) (vp->vpd_voff),
1590 vp->vpd_noff, sizeof(OffsetNumber), (char *) &ioffno,
1593 if (voff == (OffsetNumber *) NULL)
1594 return ((VPageDescr) NULL);
1598 } /* vc_tidreapped */
1601 * vc_attrstats() -- compute column statistics used by the optimzer
1603 * We compute the column min, max, null and non-null counts.
1604 * Plus we attempt to find the count of the value that occurs most
1605 * frequently in each column
1606 * These figures are used to compute the selectivity of the column
1608 * We use a three-bucked cache to get the most frequent item
1609 * The 'guess' buckets count hits. A cache miss causes guess1
1610 * to get the most hit 'guess' item in the most recent cycle, and
1611 * the new item goes into guess2. Whenever the total count of hits
1612 * of a 'guess' entry is larger than 'best', 'guess' becomes 'best'.
1614 * This method works perfectly for columns with unique values, and columns
1615 * with only two unique values, plus nulls.
1617 * It becomes less perfect as the number of unique values increases and
1618 * their distribution in the table becomes more random.
1622 vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple htup)
1625 attr_cnt = vacrelstats->va_natts;
1626 VacAttrStats *vacattrstats = vacrelstats->vacattrstats;
1627 TupleDesc tupDesc = onerel->rd_att;
1631 for (i = 0; i < attr_cnt; i++)
1633 VacAttrStats *stats = &vacattrstats[i];
1634 bool value_hit = true;
1636 value = heap_getattr(htup,
1637 stats->attr->attnum, tupDesc, &isnull);
1639 if (!VacAttrStatsEqValid(stats))
1646 stats->nonnull_cnt++;
1647 if (stats->initialized == false)
1649 vc_bucketcpy(stats->attr, value, &stats->best, &stats->best_len);
1650 /* best_cnt gets incremented later */
1651 vc_bucketcpy(stats->attr, value, &stats->guess1, &stats->guess1_len);
1652 stats->guess1_cnt = stats->guess1_hits = 1;
1653 vc_bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
1654 stats->guess2_hits = 1;
1655 if (VacAttrStatsLtGtValid(stats))
1657 vc_bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
1658 vc_bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
1660 stats->initialized = true;
1662 if (VacAttrStatsLtGtValid(stats))
1664 if ((*fmgr_faddr(&stats->f_cmplt)) (value, stats->min))
1666 vc_bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
1669 if ((*fmgr_faddr(&stats->f_cmpgt)) (value, stats->max))
1671 vc_bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
1674 if ((*fmgr_faddr(&stats->f_cmpeq)) (value, stats->min))
1676 else if ((*fmgr_faddr(&stats->f_cmpeq)) (value, stats->max))
1679 if ((*fmgr_faddr(&stats->f_cmpeq)) (value, stats->best))
1681 else if ((*fmgr_faddr(&stats->f_cmpeq)) (value, stats->guess1))
1683 stats->guess1_cnt++;
1684 stats->guess1_hits++;
1686 else if ((*fmgr_faddr(&stats->f_cmpeq)) (value, stats->guess2))
1687 stats->guess2_hits++;
1691 if (stats->guess2_hits > stats->guess1_hits)
1693 swapDatum(stats->guess1, stats->guess2);
1694 swapInt(stats->guess1_len, stats->guess2_len);
1695 stats->guess1_cnt = stats->guess2_hits;
1696 swapLong(stats->guess1_hits, stats->guess2_hits);
1698 if (stats->guess1_cnt > stats->best_cnt)
1700 swapDatum(stats->best, stats->guess1);
1701 swapInt(stats->best_len, stats->guess1_len);
1702 swapLong(stats->best_cnt, stats->guess1_cnt);
1703 stats->guess1_hits = 1;
1704 stats->guess2_hits = 1;
1708 vc_bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
1709 stats->guess1_hits = 1;
1710 stats->guess2_hits = 1;
1718 * vc_bucketcpy() -- update pg_class statistics for one relation
1722 vc_bucketcpy(AttributeTupleForm attr, Datum value, Datum *bucket, int16 *bucket_len)
1724 if (attr->attbyval && attr->attlen != -1)
1728 int len = (attr->attlen != -1 ? attr->attlen : VARSIZE(value));
1730 if (len > *bucket_len)
1732 if (*bucket_len != 0)
1733 pfree(DatumGetPointer(*bucket));
1734 *bucket = PointerGetDatum(palloc(len));
1737 memmove(DatumGetPointer(*bucket), DatumGetPointer(value), len);
1742 * vc_updstats() -- update pg_class statistics for one relation
1744 * This routine works for both index and heap relation entries in
1745 * pg_class. We violate no-overwrite semantics here by storing new
1746 * values for ntups, npages, and hasindex directly in the pg_class
1747 * tuple that's already on the page. The reason for this is that if
1748 * we updated these tuples in the usual way, then every tuple in pg_class
1749 * would be replaced every day. This would make planning and executing
1750 * historical queries very expensive.
1753 vc_updstats(Oid relid, int npages, int ntups, bool hasindex, VRelStats *vacrelstats)
1758 HeapScanDesc rsdesc,
1766 Form_pg_class pgcform;
1769 AttributeTupleForm attp;
1772 * update number of tuples and number of pages in pg_class
1774 ScanKeyEntryInitialize(&rskey, 0x0, ObjectIdAttributeNumber,
1776 ObjectIdGetDatum(relid));
1778 rd = heap_openr(RelationRelationName);
1779 rsdesc = heap_beginscan(rd, false, false, 1, &rskey);
1781 if (!HeapTupleIsValid(rtup = heap_getnext(rsdesc, 0, &rbuf)))
1782 elog(ERROR, "pg_class entry for relid %d vanished during vacuuming",
1785 /* overwrite the existing statistics in the tuple */
1786 vc_setpagelock(rd, BufferGetBlockNumber(rbuf));
1787 pgcform = (Form_pg_class) GETSTRUCT(rtup);
1788 pgcform->reltuples = ntups;
1789 pgcform->relpages = npages;
1790 pgcform->relhasindex = hasindex;
1792 if (vacrelstats != NULL && vacrelstats->va_natts > 0)
1794 VacAttrStats *vacattrstats = vacrelstats->vacattrstats;
1795 int natts = vacrelstats->va_natts;
1797 ad = heap_openr(AttributeRelationName);
1798 sd = heap_openr(StatisticRelationName);
1799 ScanKeyEntryInitialize(&askey, 0, Anum_pg_attribute_attrelid,
1802 asdesc = heap_beginscan(ad, false, false, 1, &askey);
1804 while (HeapTupleIsValid(atup = heap_getnext(asdesc, 0, &abuf)))
1807 float32data selratio; /* average ratio of rows selected
1808 * for a random constant */
1809 VacAttrStats *stats;
1810 Datum values[Natts_pg_statistic];
1811 char nulls[Natts_pg_statistic];
1813 attp = (AttributeTupleForm) GETSTRUCT(atup);
1814 if (attp->attnum <= 0) /* skip system attributes for now, */
1815 /* they are unique anyway */
1818 for (i = 0; i < natts; i++)
1820 if (attp->attnum == vacattrstats[i].attr->attnum)
1825 stats = &(vacattrstats[i]);
1827 /* overwrite the existing statistics in the tuple */
1828 if (VacAttrStatsEqValid(stats))
1831 vc_setpagelock(ad, BufferGetBlockNumber(abuf));
1833 if (stats->nonnull_cnt + stats->null_cnt == 0 ||
1834 (stats->null_cnt <= 1 && stats->best_cnt == 1))
1836 else if (VacAttrStatsLtGtValid(stats) && stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
1838 double min_cnt_d = stats->min_cnt,
1839 max_cnt_d = stats->max_cnt,
1840 null_cnt_d = stats->null_cnt,
1841 nonnullcnt_d = stats->nonnull_cnt; /* prevent overflow */
1843 selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) /
1844 (nonnullcnt_d + null_cnt_d) / (nonnullcnt_d + null_cnt_d);
1848 double most = (double) (stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt);
1849 double total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt);
1852 * we assume count of other values are 20% of best
1855 selratio = (most * most + 0.20 * most * (total - most)) / total / total;
1859 attp->attdisbursion = selratio;
1860 WriteNoReleaseBuffer(abuf);
1862 /* DO PG_STATISTIC INSERTS */
1865 * doing system relations, especially pg_statistic is a
1868 if (VacAttrStatsLtGtValid(stats) && stats->initialized /* &&
1869 * !IsSystemRelationName(
1871 pgcform->relname.data) */ )
1873 FmgrInfo out_function;
1876 for (i = 0; i < Natts_pg_statistic; ++i)
1880 * initialize values[]
1884 values[i++] = (Datum) relid; /* 1 */
1885 values[i++] = (Datum) attp->attnum; /* 2 */
1886 values[i++] = (Datum) InvalidOid; /* 3 */
1887 fmgr_info(stats->outfunc, &out_function);
1888 out_string = (*fmgr_faddr(&out_function)) (stats->min, stats->attr->atttypid);
1889 values[i++] = (Datum) fmgr(F_TEXTIN, out_string);
1891 out_string = (char *) (*fmgr_faddr(&out_function)) (stats->max, stats->attr->atttypid);
1892 values[i++] = (Datum) fmgr(F_TEXTIN, out_string);
1897 stup = heap_formtuple(sdesc, values, nulls);
1900 * insert the tuple in the relation and get the tuple's oid.
1903 heap_insert(sd, stup);
1904 pfree(DatumGetPointer(values[3]));
1905 pfree(DatumGetPointer(values[4]));
1910 heap_endscan(asdesc);
1915 /* XXX -- after write, should invalidate relcache in other backends */
1916 WriteNoReleaseBuffer(rbuf); /* heap_endscan release scan' buffers ? */
1919 * invalidating system relations confuses the function cache of
1920 * pg_operator and pg_opclass
1922 if (!IsSystemRelationName(pgcform->relname.data))
1923 RelationInvalidateHeapTuple(rd, rtup);
1925 /* that's all, folks */
1926 heap_endscan(rsdesc);
1931 * vc_delhilowstats() -- delete pg_statistics rows
1935 vc_delhilowstats(Oid relid, int attcnt, int *attnums)
1937 Relation pgstatistic;
1938 HeapScanDesc pgsscan;
1942 pgstatistic = heap_openr(StatisticRelationName);
1944 if (relid != InvalidOid)
1946 ScanKeyEntryInitialize(&pgskey, 0x0, Anum_pg_statistic_starelid,
1948 ObjectIdGetDatum(relid));
1949 pgsscan = heap_beginscan(pgstatistic, false, false, 1, &pgskey);
1952 pgsscan = heap_beginscan(pgstatistic, false, false, 0, NULL);
1954 while (HeapTupleIsValid(pgstup = heap_getnext(pgsscan, 0, NULL)))
1958 Form_pg_statistic pgs = (Form_pg_statistic) GETSTRUCT(pgstup);
1961 for (i = 0; i < attcnt; i++)
1963 if (pgs->staattnum == attnums[i] + 1)
1967 continue; /* don't delete it */
1969 heap_delete(pgstatistic, &pgstup->t_ctid);
1972 heap_endscan(pgsscan);
1973 heap_close(pgstatistic);
1977 vc_setpagelock(Relation rel, BlockNumber blkno)
1979 ItemPointerData itm;
1981 ItemPointerSet(&itm, blkno, 1);
1983 RelationSetLockForWritePage(rel, &itm);
1987 * vc_reappage() -- save a page on the array of reapped pages.
1989 * As a side effect of the way that the vacuuming loop for a given
1990 * relation works, higher pages come after lower pages in the array
1991 * (and highest tid on a page is last).
1994 vc_reappage(VPageList vpl, VPageDescr vpc)
1998 /* allocate a VPageDescrData entry */
1999 newvpd = (VPageDescr) palloc(sizeof(VPageDescrData) + vpc->vpd_noff * sizeof(OffsetNumber));
2002 if (vpc->vpd_noff > 0)
2003 memmove(newvpd->vpd_voff, vpc->vpd_voff, vpc->vpd_noff * sizeof(OffsetNumber));
2004 newvpd->vpd_blkno = vpc->vpd_blkno;
2005 newvpd->vpd_free = vpc->vpd_free;
2006 newvpd->vpd_nusd = vpc->vpd_nusd;
2007 newvpd->vpd_noff = vpc->vpd_noff;
2009 /* insert this page into vpl list */
2010 vc_vpinsert(vpl, newvpd);
2015 vc_vpinsert(VPageList vpl, VPageDescr vpnew)
2018 /* allocate a VPageDescr entry if needed */
2019 if (vpl->vpl_npages == 0)
2020 vpl->vpl_pgdesc = (VPageDescr *) palloc(100 * sizeof(VPageDescr));
2021 else if (vpl->vpl_npages % 100 == 0)
2022 vpl->vpl_pgdesc = (VPageDescr *) repalloc(vpl->vpl_pgdesc, (vpl->vpl_npages + 100) * sizeof(VPageDescr));
2023 vpl->vpl_pgdesc[vpl->vpl_npages] = vpnew;
2024 (vpl->vpl_npages)++;
2029 vc_free(VRelList vrl)
2033 PortalVariableMemory pmem;
2035 pmem = PortalGetVariableMemory(vc_portal);
2036 old = MemoryContextSwitchTo((MemoryContext) pmem);
2038 while (vrl != (VRelList) NULL)
2041 /* free rel list entry */
2043 vrl = vrl->vrl_next;
2047 MemoryContextSwitchTo(old);
2051 vc_find_eq(char *bot, int nelem, int size, char *elm, int (*compar) (char *, char *))
2054 int last = nelem - 1;
2055 int celm = nelem / 2;
2059 last_move = first_move = true;
2062 if (first_move == true)
2064 res = compar(bot, elm);
2071 if (last_move == true)
2073 res = compar(elm, bot + last * size);
2077 return (bot + last * size);
2080 res = compar(elm, bot + celm * size);
2082 return (bot + celm * size);
2096 last = last - celm - 1;
2097 bot = bot + (celm + 1) * size;
2098 celm = (last + 1) / 2;
2105 vc_cmp_blk(char *left, char *right)
2110 lblk = (*((VPageDescr *) left))->vpd_blkno;
2111 rblk = (*((VPageDescr *) right))->vpd_blkno;
2122 vc_cmp_offno(char *left, char *right)
2125 if (*(OffsetNumber *) left < *(OffsetNumber *) right)
2127 if (*(OffsetNumber *) left == *(OffsetNumber *) right)
2131 } /* vc_cmp_offno */
2135 vc_getindices(Oid relid, int *nindices, Relation **Irel)
2141 HeapScanDesc pgiscan;
2151 ioid = (Oid *) palloc(10 * sizeof(Oid));
2153 /* prepare a heap scan on the pg_index relation */
2154 pgindex = heap_openr(IndexRelationName);
2155 pgidesc = RelationGetTupleDescriptor(pgindex);
2157 ScanKeyEntryInitialize(&pgikey, 0x0, Anum_pg_index_indrelid,
2159 ObjectIdGetDatum(relid));
2161 pgiscan = heap_beginscan(pgindex, false, false, 1, &pgikey);
2163 while (HeapTupleIsValid(pgitup = heap_getnext(pgiscan, 0, NULL)))
2165 d = heap_getattr(pgitup, Anum_pg_index_indexrelid,
2169 ioid = (Oid *) repalloc(ioid, (i + 10) * sizeof(Oid));
2170 ioid[i - 1] = DatumGetObjectId(d);
2173 heap_endscan(pgiscan);
2174 heap_close(pgindex);
2177 { /* No one index found */
2182 if (Irel != (Relation **) NULL)
2183 *Irel = (Relation *) palloc(i * sizeof(Relation));
2187 irel = index_open(ioid[--i]);
2188 if (irel != (Relation) NULL)
2190 if (Irel != (Relation **) NULL)
2197 elog(NOTICE, "CAN't OPEN INDEX %u - SKIP IT", ioid[i]);
2202 if (Irel != (Relation **) NULL && *nindices == 0)
2205 *Irel = (Relation *) NULL;
2208 } /* vc_getindices */
2212 vc_clsindices(int nindices, Relation *Irel)
2215 if (Irel == (Relation *) NULL)
2219 index_close(Irel[nindices]);
2222 } /* vc_clsindices */
2226 vc_mkindesc(Relation onerel, int nindices, Relation *Irel, IndDesc **Idesc)
2229 HeapTuple pgIndexTup;
2230 AttrNumber *attnumP;
2234 *Idesc = (IndDesc *) palloc(nindices * sizeof(IndDesc));
2236 for (i = 0, idcur = *Idesc; i < nindices; i++, idcur++)
2239 SearchSysCacheTuple(INDEXRELID,
2240 ObjectIdGetDatum(Irel[i]->rd_id),
2243 idcur->tform = (IndexTupleForm) GETSTRUCT(pgIndexTup);
2244 for (attnumP = &(idcur->tform->indkey[0]), natts = 0;
2245 *attnumP != InvalidAttrNumber && natts != INDEX_MAX_KEYS;
2246 attnumP++, natts++);
2247 if (idcur->tform->indproc != InvalidOid)
2249 idcur->finfoP = &(idcur->finfo);
2250 FIgetnArgs(idcur->finfoP) = natts;
2252 FIgetProcOid(idcur->finfoP) = idcur->tform->indproc;
2253 *(FIgetname(idcur->finfoP)) = '\0';
2256 idcur->finfoP = (FuncIndexInfo *) NULL;
2258 idcur->natts = natts;
2265 vc_enough_space(VPageDescr vpd, Size len)
2268 len = DOUBLEALIGN(len);
2270 if (len > vpd->vpd_free)
2273 if (vpd->vpd_nusd < vpd->vpd_noff) /* there are free itemid(s) */
2274 return (true); /* and len <= free_space */
2276 /* ok. noff_usd >= noff_free and so we'll have to allocate new itemid */
2277 if (len <= vpd->vpd_free - sizeof(ItemIdData))
2282 } /* vc_enough_space */