1 /*-------------------------------------------------------------------------
4 * the postgres vacuum cleaner
6 * Copyright (c) 1994, Regents of the University of California
10 * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.80 1998/09/01 04:28:05 momjian Exp $
12 *-------------------------------------------------------------------------
14 #include <sys/types.h>
23 #include "miscadmin.h"
24 #include "access/genam.h"
25 #include "access/heapam.h"
26 #include "access/transam.h"
27 #include "access/xact.h"
28 #include "catalog/catalog.h"
29 #include "catalog/catname.h"
30 #include "catalog/index.h"
31 #include "catalog/pg_class.h"
32 #include "catalog/pg_index.h"
33 #include "catalog/pg_operator.h"
34 #include "catalog/pg_statistic.h"
35 #include "catalog/pg_type.h"
36 #include "commands/vacuum.h"
38 #include "parser/parse_oper.h"
39 #include "storage/bufmgr.h"
40 #include "storage/bufpage.h"
41 #include "storage/shmem.h"
42 #include "storage/smgr.h"
43 #include "storage/itemptr.h"
44 #include "storage/lmgr.h"
45 #include "utils/builtins.h"
46 #include "utils/inval.h"
47 #include "utils/mcxt.h"
48 #include "utils/portal.h"
49 #include "utils/syscache.h"
51 #ifndef HAVE_GETRUSAGE
52 #include <rusagestub.h>
55 #include <sys/resource.h>
58 /* #include <port-protos.h> *//* Why? */
60 extern int BlowawayRelationBuffers(Relation rel, BlockNumber block);
62 bool VacuumRunning = false;
64 static Portal vc_portal;
66 static int MESSAGE_LEVEL; /* message level */
68 #define swapLong(a,b) {long tmp; tmp=a; a=b; b=tmp;}
69 #define swapInt(a,b) {int tmp; tmp=a; a=b; b=tmp;}
70 #define swapDatum(a,b) {Datum tmp; tmp=a; a=b; b=tmp;}
71 #define VacAttrStatsEqValid(stats) ( stats->f_cmpeq.fn_addr != NULL )
72 #define VacAttrStatsLtGtValid(stats) ( stats->f_cmplt.fn_addr != NULL && \
73 stats->f_cmpgt.fn_addr != NULL && \
74 RegProcedureIsValid(stats->outfunc) )
77 /* non-export function prototypes */
78 static void vc_init(void);
79 static void vc_shutdown(void);
80 static void vc_vacuum(NameData *VacRelP, bool analyze, List *va_cols);
81 static VRelList vc_getrels(NameData *VacRelP);
82 static void vc_vacone(Oid relid, bool analyze, List *va_cols);
83 static void vc_scanheap(VRelStats *vacrelstats, Relation onerel, VPageList vacuum_pages, VPageList fraged_pages);
84 static void vc_rpfheap(VRelStats *vacrelstats, Relation onerel, VPageList vacuum_pages, VPageList fraged_pages, int nindices, Relation *Irel);
85 static void vc_vacheap(VRelStats *vacrelstats, Relation onerel, VPageList vpl);
86 static void vc_vacpage(Page page, VPageDescr vpd);
87 static void vc_vaconeind(VPageList vpl, Relation indrel, int num_tuples);
88 static void vc_scanoneind(Relation indrel, int num_tuples);
89 static void vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple);
90 static void vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int16 *bucket_len);
91 static void vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats);
92 static void vc_delhilowstats(Oid relid, int attcnt, int *attnums);
93 static void vc_setpagelock(Relation rel, BlockNumber blkno);
94 static VPageDescr vc_tidreapped(ItemPointer itemptr, VPageList vpl);
95 static void vc_reappage(VPageList vpl, VPageDescr vpc);
96 static void vc_vpinsert(VPageList vpl, VPageDescr vpnew);
97 static void vc_free(VRelList vrl);
98 static void vc_getindices(Oid relid, int *nindices, Relation **Irel);
99 static void vc_clsindices(int nindices, Relation *Irel);
100 static void vc_mkindesc(Relation onerel, int nindices, Relation *Irel, IndDesc **Idesc);
101 static char *vc_find_eq(char *bot, int nelem, int size, char *elm, int (*compar) (char *, char *));
102 static int vc_cmp_blk(char *left, char *right);
103 static int vc_cmp_offno(char *left, char *right);
104 static bool vc_enough_space(VPageDescr vpd, Size len);
107 vacuum(char *vacrel, bool verbose, bool analyze, List *va_spec)
111 PortalVariableMemory pmem;
117 * Create a portal for safe memory across transctions. We need to
118 * palloc the name space for it because our hash function expects the
119 * name to be on a longword boundary. CreatePortal copies the name to
120 * safe storage for us.
122 pname = (char *) palloc(strlen(VACPNAME) + 1);
123 strcpy(pname, VACPNAME);
124 vc_portal = CreatePortal(pname);
128 MESSAGE_LEVEL = NOTICE;
130 MESSAGE_LEVEL = DEBUG;
132 /* vacrel gets de-allocated on transaction commit */
134 strcpy(VacRel.data, vacrel);
136 pmem = PortalGetVariableMemory(vc_portal);
137 old = MemoryContextSwitchTo((MemoryContext) pmem);
139 if (va_spec != NIL && !analyze)
140 elog(ERROR, "Can't vacuum columns, only tables. You can 'vacuum analyze' columns.");
144 char *col = (char *) lfirst(le);
147 dest = (char *) palloc(strlen(col) + 1);
149 va_cols = lappend(va_cols, dest);
151 MemoryContextSwitchTo(old);
153 /* initialize vacuum cleaner */
156 /* vacuum the database */
158 vc_vacuum(&VacRel, analyze, va_cols);
160 vc_vacuum(NULL, analyze, NIL);
162 PortalDestroy(&vc_portal);
169 * vc_init(), vc_shutdown() -- start up and shut down the vacuum cleaner.
171 * We run exactly one vacuum cleaner at a time. We use the file system
172 * to guarantee an exclusive lock on vacuuming, since a single vacuum
173 * cleaner instantiation crosses transaction boundaries, and we'd lose
174 * postgres-style locks at the end of every transaction.
176 * The strangeness with committing and starting transactions in the
177 * init and shutdown routines is due to the fact that the vacuum cleaner
178 * is invoked via a sql command, and so is already executing inside
179 * a transaction. We need to leave ourselves in a predictable state
180 * on entry and exit to the vacuum cleaner. We commit the transaction
181 * started in PostgresMain() inside vc_init(), and start one in
182 * vc_shutdown() to match the commit waiting for us back in
190 if ((fd = open("pg_vlock", O_CREAT | O_EXCL, 0600)) < 0)
192 elog(ERROR, "Can't create lock file. Is another vacuum cleaner running?\n\
193 \tIf not, you may remove the pg_vlock file in the %s\n\
194 \tdirectory", DatabasePath);
199 * By here, exclusive open on the lock file succeeded. If we abort
200 * for any reason during vacuuming, we need to remove the lock file.
201 * This global variable is checked in the transaction manager on xact
202 * abort, and the routine vc_abort() is called if necessary.
205 VacuumRunning = true;
207 /* matches the StartTransaction in PostgresMain() */
208 CommitTransactionCommand();
214 /* on entry, not in a transaction */
215 if (unlink("pg_vlock") < 0)
216 elog(ERROR, "vacuum: can't destroy lock file!");
218 /* okay, we're done */
219 VacuumRunning = false;
221 /* matches the CommitTransaction in PostgresMain() */
222 StartTransactionCommand();
229 /* on abort, remove the vacuum cleaner lock file */
232 VacuumRunning = false;
236 * vc_vacuum() -- vacuum the database.
238 * This routine builds a list of relations to vacuum, and then calls
239 * code that vacuums them one at a time. We are careful to vacuum each
240 * relation in a separate transaction in order to avoid holding too many
244 vc_vacuum(NameData *VacRelP, bool analyze, List *va_cols)
249 /* get list of relations */
250 vrl = vc_getrels(VacRelP);
252 if (analyze && VacRelP == NULL && vrl != NULL)
253 vc_delhilowstats(InvalidOid, 0, NULL);
255 /* vacuum each heap relation */
256 for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
257 vc_vacone(cur->vrl_relid, analyze, va_cols);
263 vc_getrels(NameData *VacRelP)
269 PortalVariableMemory portalmem;
280 StartTransactionCommand();
284 ScanKeyEntryInitialize(&key, 0x0, Anum_pg_class_relname,
286 PointerGetDatum(VacRelP->data));
290 ScanKeyEntryInitialize(&key, 0x0, Anum_pg_class_relkind,
291 F_CHAREQ, CharGetDatum('r'));
294 portalmem = PortalGetVariableMemory(vc_portal);
295 vrl = cur = (VRelList) NULL;
297 rel = heap_openr(RelationRelationName);
298 tupdesc = RelationGetDescr(rel);
300 scan = heap_beginscan(rel, false, SnapshotNow, 1, &key);
302 while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
306 d = heap_getattr(tuple, Anum_pg_class_relname, tupdesc, &n);
310 * don't vacuum large objects for now - something breaks when we
313 if ((strlen(rname) >= 5) && rname[0] == 'x' &&
314 rname[1] == 'i' && rname[2] == 'n' &&
315 (rname[3] == 'v' || rname[3] == 'x') &&
316 rname[4] >= '0' && rname[4] <= '9')
318 elog(NOTICE, "Rel %s: can't vacuum LargeObjects now",
323 d = heap_getattr(tuple, Anum_pg_class_relkind, tupdesc, &n);
325 rkind = DatumGetChar(d);
327 /* skip system relations */
330 elog(NOTICE, "Vacuum: can not process index and certain system tables");
334 /* get a relation list entry for this guy */
335 old = MemoryContextSwitchTo((MemoryContext) portalmem);
336 if (vrl == (VRelList) NULL)
337 vrl = cur = (VRelList) palloc(sizeof(VRelListData));
340 cur->vrl_next = (VRelList) palloc(sizeof(VRelListData));
343 MemoryContextSwitchTo(old);
345 cur->vrl_relid = tuple->t_oid;
346 cur->vrl_next = (VRelList) NULL;
349 elog(NOTICE, "Vacuum: table not found");
355 CommitTransactionCommand();
361 * vc_vacone() -- vacuum one heap relation
363 * This routine vacuums a single heap, cleans out its indices, and
364 * updates its statistics num_pages and num_tuples statistics.
366 * Doing one heap at a time incurs extra overhead, since we need to
367 * check that the heap exists again just before we vacuum it. The
368 * reason that we do this is so that vacuuming can be spread across
369 * many small transactions. Otherwise, two-phase locking would require
370 * us to lock the entire database during one pass of the vacuum cleaner.
373 vc_vacone(Oid relid, bool analyze, List *va_cols)
380 VPageListData vacuum_pages; /* List of pages to vacuum and/or clean
382 VPageListData fraged_pages; /* List of pages with space enough for
388 VRelStats *vacrelstats;
390 StartTransactionCommand();
392 rel = heap_openr(RelationRelationName);
393 tupdesc = RelationGetDescr(rel);
396 * Race condition -- if the pg_class tuple has gone away since the
397 * last time we saw it, we don't need to vacuum it.
399 tuple = SearchSysCacheTuple(RELOID,
400 ObjectIdGetDatum(relid),
402 if (!HeapTupleIsValid(tuple))
405 CommitTransactionCommand();
409 /* now open the class and vacuum it */
410 onerel = heap_open(relid);
412 vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
413 vacrelstats->relid = relid;
414 vacrelstats->num_pages = vacrelstats->num_tuples = 0;
415 vacrelstats->hasindex = false;
416 if (analyze && !IsSystemRelationName((RelationGetRelationName(onerel))->data))
420 Form_pg_attribute *attr;
422 attr_cnt = onerel->rd_att->natts;
423 attr = onerel->rd_att->attrs;
430 if (length(va_cols) > attr_cnt)
431 elog(ERROR, "vacuum: too many attributes specified for relation %s",
432 (RelationGetRelationName(onerel))->data);
433 attnums = (int *) palloc(attr_cnt * sizeof(int));
436 char *col = (char *) lfirst(le);
438 for (i = 0; i < attr_cnt; i++)
440 if (namestrcmp(&(attr[i]->attname), col) == 0)
443 if (i < attr_cnt) /* found */
447 elog(ERROR, "vacuum: there is no attribute %s in %s",
448 col, (RelationGetRelationName(onerel))->data);
454 vacrelstats->vacattrstats =
455 (VacAttrStats *) palloc(attr_cnt * sizeof(VacAttrStats));
457 for (i = 0; i < attr_cnt; i++)
459 Operator func_operator;
460 Form_pg_operator pgopform;
463 stats = &vacrelstats->vacattrstats[i];
464 stats->attr = palloc(ATTRIBUTE_TUPLE_SIZE);
465 memmove(stats->attr, attr[((attnums) ? attnums[i] : i)], ATTRIBUTE_TUPLE_SIZE);
466 stats->best = stats->guess1 = stats->guess2 = 0;
467 stats->max = stats->min = 0;
468 stats->best_len = stats->guess1_len = stats->guess2_len = 0;
469 stats->max_len = stats->min_len = 0;
470 stats->initialized = false;
471 stats->best_cnt = stats->guess1_cnt = stats->guess1_hits = stats->guess2_hits = 0;
472 stats->max_cnt = stats->min_cnt = stats->null_cnt = stats->nonnull_cnt = 0;
474 func_operator = oper("=", stats->attr->atttypid, stats->attr->atttypid, true);
475 if (func_operator != NULL)
477 pgopform = (Form_pg_operator) GETSTRUCT(func_operator);
478 fmgr_info(pgopform->oprcode, &(stats->f_cmpeq));
481 stats->f_cmpeq.fn_addr = NULL;
483 func_operator = oper("<", stats->attr->atttypid, stats->attr->atttypid, true);
484 if (func_operator != NULL)
486 pgopform = (Form_pg_operator) GETSTRUCT(func_operator);
487 fmgr_info(pgopform->oprcode, &(stats->f_cmplt));
490 stats->f_cmplt.fn_addr = NULL;
492 func_operator = oper(">", stats->attr->atttypid, stats->attr->atttypid, true);
493 if (func_operator != NULL)
495 pgopform = (Form_pg_operator) GETSTRUCT(func_operator);
496 fmgr_info(pgopform->oprcode, &(stats->f_cmpgt));
499 stats->f_cmpgt.fn_addr = NULL;
501 typetuple = SearchSysCacheTuple(TYPOID,
502 ObjectIdGetDatum(stats->attr->atttypid),
504 if (HeapTupleIsValid(typetuple))
505 stats->outfunc = ((Form_pg_type) GETSTRUCT(typetuple))->typoutput;
507 stats->outfunc = InvalidOid;
509 vacrelstats->va_natts = attr_cnt;
510 vc_delhilowstats(relid, ((attnums) ? attr_cnt : 0), attnums);
516 vacrelstats->va_natts = 0;
517 vacrelstats->vacattrstats = (VacAttrStats *) NULL;
520 /* we require the relation to be locked until the indices are cleaned */
521 RelationSetLockForWrite(onerel);
524 vacuum_pages.vpl_num_pages = fraged_pages.vpl_num_pages = 0;
525 vc_scanheap(vacrelstats, onerel, &vacuum_pages, &fraged_pages);
527 /* Now open indices */
528 Irel = (Relation *) NULL;
529 vc_getindices(vacrelstats->relid, &nindices, &Irel);
532 vacrelstats->hasindex = true;
534 vacrelstats->hasindex = false;
536 /* Clean/scan index relation(s) */
537 if (Irel != (Relation *) NULL)
539 if (vacuum_pages.vpl_num_pages > 0)
541 for (i = 0; i < nindices; i++)
542 vc_vaconeind(&vacuum_pages, Irel[i], vacrelstats->num_tuples);
545 /* just scan indices to update statistic */
547 for (i = 0; i < nindices; i++)
548 vc_scanoneind(Irel[i], vacrelstats->num_tuples);
552 if (fraged_pages.vpl_num_pages > 0) /* Try to shrink heap */
553 vc_rpfheap(vacrelstats, onerel, &vacuum_pages, &fraged_pages, nindices, Irel);
556 if (Irel != (Relation *) NULL)
557 vc_clsindices(nindices, Irel);
558 if (vacuum_pages.vpl_num_pages > 0) /* Clean pages from
559 * vacuum_pages list */
560 vc_vacheap(vacrelstats, onerel, &vacuum_pages);
563 /* ok - free vacuum_pages list of reapped pages */
564 if (vacuum_pages.vpl_num_pages > 0)
566 vpp = vacuum_pages.vpl_pagedesc;
567 for (i = 0; i < vacuum_pages.vpl_num_pages; i++, vpp++)
569 pfree(vacuum_pages.vpl_pagedesc);
570 if (fraged_pages.vpl_num_pages > 0)
571 pfree(fraged_pages.vpl_pagedesc);
574 /* all done with this class */
578 /* update statistics in pg_class */
579 vc_updstats(vacrelstats->relid, vacrelstats->num_pages, vacrelstats->num_tuples,
580 vacrelstats->hasindex, vacrelstats);
582 /* next command frees attribute stats */
584 CommitTransactionCommand();
588 * vc_scanheap() -- scan an open heap relation
590 * This routine sets commit times, constructs vacuum_pages list of
591 * empty/uninitialized pages and pages with dead tuples and
592 * ~LP_USED line pointers, constructs fraged_pages list of pages
593 * appropriate for purposes of shrinking and maintains statistics
594 * on the number of live tuples in a heap.
597 vc_scanheap(VRelStats *vacrelstats, Relation onerel,
598 VPageList vacuum_pages, VPageList fraged_pages)
617 uint32 tups_vacuumed,
627 Size min_tlen = MAXTUPLEN;
632 bool do_shrinking = true;
634 getrusage(RUSAGE_SELF, &ru0);
636 tups_vacuumed = num_tuples = nunused = ncrash = empty_pages =
637 new_pages = changed_pages = empty_end_pages = 0;
638 free_size = usable_free_size = 0;
640 relname = (RelationGetRelationName(onerel))->data;
642 nblocks = RelationGetNumberOfBlocks(onerel);
644 vpc = (VPageDescr) palloc(sizeof(VPageDescrData) + MaxOffsetNumber * sizeof(OffsetNumber));
645 vpc->vpd_offsets_used = 0;
647 elog(MESSAGE_LEVEL, "--Relation %s--", relname);
649 for (blkno = 0; blkno < nblocks; blkno++)
651 buf = ReadBuffer(onerel, blkno);
652 page = BufferGetPage(buf);
653 vpc->vpd_blkno = blkno;
654 vpc->vpd_offsets_free = 0;
658 elog(NOTICE, "Rel %s: Uninitialized page %u - fixing",
660 PageInit(page, BufferGetPageSize(buf), 0);
661 vpc->vpd_free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower;
662 free_size += (vpc->vpd_free - sizeof(ItemIdData));
665 vc_reappage(vacuum_pages, vpc);
670 if (PageIsEmpty(page))
672 vpc->vpd_free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower;
673 free_size += (vpc->vpd_free - sizeof(ItemIdData));
676 vc_reappage(vacuum_pages, vpc);
683 maxoff = PageGetMaxOffsetNumber(page);
684 for (offnum = FirstOffsetNumber;
686 offnum = OffsetNumberNext(offnum))
688 itemid = PageGetItemId(page, offnum);
691 * Collect un-used items too - it's possible to have indices
692 * pointing here after crash.
694 if (!ItemIdIsUsed(itemid))
696 vpc->vpd_offsets[vpc->vpd_offsets_free++] = offnum;
701 tuple = (HeapTuple) PageGetItem(page, itemid);
704 if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
706 if (tuple->t_infomask & HEAP_XMIN_INVALID)
710 if (TransactionIdDidAbort(tuple->t_xmin))
712 else if (TransactionIdDidCommit(tuple->t_xmin))
714 tuple->t_infomask |= HEAP_XMIN_COMMITTED;
717 else if (!TransactionIdIsInProgress(tuple->t_xmin))
721 * Not Aborted, Not Committed, Not in Progress -
722 * so it's from crashed process. - vadim 11/26/96
729 elog(NOTICE, "Rel %s: TID %u/%u: InsertTransactionInProgress %u - can't shrink relation",
730 relname, blkno, offnum, tuple->t_xmin);
731 do_shrinking = false;
737 * here we are concerned about tuples with xmin committed and
738 * xmax unknown or committed
740 if (tuple->t_infomask & HEAP_XMIN_COMMITTED &&
741 !(tuple->t_infomask & HEAP_XMAX_INVALID))
743 if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
745 else if (TransactionIdDidAbort(tuple->t_xmax))
747 tuple->t_infomask |= HEAP_XMAX_INVALID;
750 else if (TransactionIdDidCommit(tuple->t_xmax))
752 else if (!TransactionIdIsInProgress(tuple->t_xmax))
756 * Not Aborted, Not Committed, Not in Progress - so it
757 * from crashed process. - vadim 06/02/97
759 tuple->t_infomask |= HEAP_XMAX_INVALID;;
764 elog(NOTICE, "Rel %s: TID %u/%u: DeleteTransactionInProgress %u - can't shrink relation",
765 relname, blkno, offnum, tuple->t_xmax);
766 do_shrinking = false;
771 * It's possibly! But from where it comes ? And should we fix
772 * it ? - vadim 11/28/96
774 itemptr = &(tuple->t_ctid);
775 if (!ItemPointerIsValid(itemptr) ||
776 BlockIdGetBlockNumber(&(itemptr->ip_blkid)) != blkno)
778 elog(NOTICE, "Rel %s: TID %u/%u: TID IN TUPLEHEADER %u/%u IS NOT THE SAME. TUPGONE %d.",
779 relname, blkno, offnum,
780 BlockIdGetBlockNumber(&(itemptr->ip_blkid)),
781 itemptr->ip_posid, tupgone);
787 if (tuple->t_len != itemid->lp_len)
789 elog(NOTICE, "Rel %s: TID %u/%u: TUPLE_LEN IN PAGEHEADER %u IS NOT THE SAME AS IN TUPLEHEADER %u. TUPGONE %d.",
790 relname, blkno, offnum,
791 itemid->lp_len, tuple->t_len, tupgone);
793 if (!OidIsValid(tuple->t_oid))
795 elog(NOTICE, "Rel %s: TID %u/%u: OID IS INVALID. TUPGONE %d.",
796 relname, blkno, offnum, tupgone);
803 if (tempPage == (Page) NULL)
807 pageSize = PageGetPageSize(page);
808 tempPage = (Page) palloc(pageSize);
809 memmove(tempPage, page, pageSize);
812 lpp = &(((PageHeader) tempPage)->pd_linp[offnum - 1]);
815 lpp->lp_flags &= ~LP_USED;
817 vpc->vpd_offsets[vpc->vpd_offsets_free++] = offnum;
825 if (tuple->t_len < min_tlen)
826 min_tlen = tuple->t_len;
827 if (tuple->t_len > max_tlen)
828 max_tlen = tuple->t_len;
829 vc_attrstats(onerel, vacrelstats, tuple);
841 if (tempPage != (Page) NULL)
842 { /* Some tuples are gone */
843 PageRepairFragmentation(tempPage);
844 vpc->vpd_free = ((PageHeader) tempPage)->pd_upper - ((PageHeader) tempPage)->pd_lower;
845 free_size += vpc->vpd_free;
846 vc_reappage(vacuum_pages, vpc);
848 tempPage = (Page) NULL;
850 else if (vpc->vpd_offsets_free > 0)
851 { /* there are only ~LP_USED line pointers */
852 vpc->vpd_free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower;
853 free_size += vpc->vpd_free;
854 vc_reappage(vacuum_pages, vpc);
866 /* save stats in the rel list for use later */
867 vacrelstats->num_tuples = num_tuples;
868 vacrelstats->num_pages = nblocks;
869 /* vacrelstats->natts = attr_cnt;*/
871 min_tlen = max_tlen = 0;
872 vacrelstats->min_tlen = min_tlen;
873 vacrelstats->max_tlen = max_tlen;
875 vacuum_pages->vpl_empty_end_pages = empty_end_pages;
876 fraged_pages->vpl_empty_end_pages = empty_end_pages;
879 * Try to make fraged_pages keeping in mind that we can't use free
880 * space of "empty" end-pages and last page if it reapped.
882 if (do_shrinking && vacuum_pages->vpl_num_pages - empty_end_pages > 0)
884 int nusf; /* blocks usefull for re-using */
886 nusf = vacuum_pages->vpl_num_pages - empty_end_pages;
887 if ((vacuum_pages->vpl_pagedesc[nusf - 1])->vpd_blkno == nblocks - empty_end_pages - 1)
890 for (i = 0; i < nusf; i++)
892 vp = vacuum_pages->vpl_pagedesc[i];
893 if (vc_enough_space(vp, min_tlen))
895 vc_vpinsert(fraged_pages, vp);
896 usable_free_size += vp->vpd_free;
901 getrusage(RUSAGE_SELF, &ru1);
903 elog(MESSAGE_LEVEL, "Pages %u: Changed %u, Reapped %u, Empty %u, New %u; \
904 Tup %u: Vac %u, Crash %u, UnUsed %u, MinLen %u, MaxLen %u; Re-using: Free/Avail. Space %u/%u; EndEmpty/Avail. Pages %u/%u. Elapsed %u/%u sec.",
905 nblocks, changed_pages, vacuum_pages->vpl_num_pages, empty_pages, new_pages,
906 num_tuples, tups_vacuumed, ncrash, nunused, min_tlen, max_tlen,
907 free_size, usable_free_size, empty_end_pages, fraged_pages->vpl_num_pages,
908 ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
909 ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
915 * vc_rpfheap() -- try to repaire relation' fragmentation
917 * This routine marks dead tuples as unused and tries re-use dead space
918 * by moving tuples (and inserting indices if needed). It constructs
919 * Nvpl list of free-ed pages (moved tuples) and clean indices
920 * for them after committing (in hack-manner - without losing locks
921 * and freeing memory!) current transaction. It truncates relation
922 * if some end-blocks are gone away.
925 vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
926 VPageList vacuum_pages, VPageList fraged_pages, int nindices, Relation *Irel)
936 OffsetNumber offnum = 0,
944 TupleDesc tupdesc = NULL;
945 Datum *idatum = NULL;
947 InsertIndexResult iresult;
949 VPageDescr cur_page = NULL,
957 int last_fraged_block,
971 getrusage(RUSAGE_SELF, &ru0);
973 myXID = GetCurrentTransactionId();
974 myCID = GetCurrentCommandId();
976 if (Irel != (Relation *) NULL) /* preparation for index' inserts */
978 vc_mkindesc(onerel, nindices, Irel, &Idesc);
979 tupdesc = RelationGetDescr(onerel);
980 idatum = (Datum *) palloc(INDEX_MAX_KEYS * sizeof(*idatum));
981 inulls = (char *) palloc(INDEX_MAX_KEYS * sizeof(*inulls));
984 Nvpl.vpl_num_pages = 0;
985 num_fraged_pages = fraged_pages->vpl_num_pages;
986 last_fraged_page = fraged_pages->vpl_pagedesc[num_fraged_pages - 1];
987 last_fraged_block = last_fraged_page->vpd_blkno;
988 Assert(vacuum_pages->vpl_num_pages > vacuum_pages->vpl_empty_end_pages);
989 vacuumed_pages = vacuum_pages->vpl_num_pages - vacuum_pages->vpl_empty_end_pages;
990 last_vacuum_page = vacuum_pages->vpl_pagedesc[vacuumed_pages - 1];
991 last_vacuum_block = last_vacuum_page->vpd_blkno;
992 Assert(last_vacuum_block >= last_fraged_block);
993 cur_buffer = InvalidBuffer;
996 vpc = (VPageDescr) palloc(sizeof(VPageDescrData) + MaxOffsetNumber * sizeof(OffsetNumber));
997 vpc->vpd_offsets_used = vpc->vpd_offsets_free = 0;
999 nblocks = vacrelstats->num_pages;
1000 for (blkno = nblocks - vacuum_pages->vpl_empty_end_pages - 1;; blkno--)
1002 /* if it's reapped page and it was used by me - quit */
1003 if (blkno == last_fraged_block && last_fraged_page->vpd_offsets_used > 0)
1006 buf = ReadBuffer(onerel, blkno);
1007 page = BufferGetPage(buf);
1009 vpc->vpd_offsets_free = 0;
1011 isempty = PageIsEmpty(page);
1014 if (blkno == last_vacuum_block) /* it's reapped page */
1016 if (last_vacuum_page->vpd_offsets_free > 0) /* there are dead tuples */
1017 { /* on this page - clean */
1019 vc_vacpage(page, last_vacuum_page);
1025 Assert(vacuumed_pages > 0);
1026 /* get prev reapped page from vacuum_pages */
1027 last_vacuum_page = vacuum_pages->vpl_pagedesc[vacuumed_pages - 1];
1028 last_vacuum_block = last_vacuum_page->vpd_blkno;
1029 if (blkno == last_fraged_block) /* this page in
1030 * fraged_pages too */
1033 Assert(num_fraged_pages > 0);
1034 Assert(last_fraged_page->vpd_offsets_used == 0);
1035 /* get prev reapped page from fraged_pages */
1036 last_fraged_page = fraged_pages->vpl_pagedesc[num_fraged_pages - 1];
1037 last_fraged_block = last_fraged_page->vpd_blkno;
1039 Assert(last_fraged_block <= last_vacuum_block);
1049 vpc->vpd_blkno = blkno;
1050 maxoff = PageGetMaxOffsetNumber(page);
1051 for (offnum = FirstOffsetNumber;
1053 offnum = OffsetNumberNext(offnum))
1055 itemid = PageGetItemId(page, offnum);
1057 if (!ItemIdIsUsed(itemid))
1060 tuple = (HeapTuple) PageGetItem(page, itemid);
1061 tuple_len = tuple->t_len;
1063 /* try to find new page for this tuple */
1064 if (cur_buffer == InvalidBuffer ||
1065 !vc_enough_space(cur_page, tuple_len))
1067 if (cur_buffer != InvalidBuffer)
1069 WriteBuffer(cur_buffer);
1070 cur_buffer = InvalidBuffer;
1073 * If no one tuple can't be added to this page -
1074 * remove page from fraged_pages. - vadim 11/27/96
1076 * But we can't remove last page - this is our
1077 * "show-stopper" !!! - vadim 02/25/98
1079 if (cur_page != last_fraged_page &&
1080 !vc_enough_space(cur_page, vacrelstats->min_tlen))
1082 Assert(num_fraged_pages > cur_item + 1);
1083 memmove(fraged_pages->vpl_pagedesc + cur_item,
1084 fraged_pages->vpl_pagedesc + cur_item + 1,
1085 sizeof(VPageDescr *) * (num_fraged_pages - cur_item - 1));
1087 Assert(last_fraged_page == fraged_pages->vpl_pagedesc[num_fraged_pages - 1]);
1090 for (i = 0; i < num_fraged_pages; i++)
1092 if (vc_enough_space(fraged_pages->vpl_pagedesc[i], tuple_len))
1095 if (i == num_fraged_pages)
1096 break; /* can't move item anywhere */
1098 cur_page = fraged_pages->vpl_pagedesc[cur_item];
1099 cur_buffer = ReadBuffer(onerel, cur_page->vpd_blkno);
1100 ToPage = BufferGetPage(cur_buffer);
1101 /* if this page was not used before - clean it */
1102 if (!PageIsEmpty(ToPage) && cur_page->vpd_offsets_used == 0)
1103 vc_vacpage(ToPage, cur_page);
1107 newtup = (HeapTuple) palloc(tuple_len);
1108 memmove((char *) newtup, (char *) tuple, tuple_len);
1110 /* store transaction information */
1111 TransactionIdStore(myXID, &(newtup->t_xmin));
1112 newtup->t_cmin = myCID;
1113 StoreInvalidTransactionId(&(newtup->t_xmax));
1114 /* set xmin to unknown and xmax to invalid */
1115 newtup->t_infomask &= ~(HEAP_XACT_MASK);
1116 newtup->t_infomask |= HEAP_XMAX_INVALID;
1118 /* add tuple to the page */
1119 newoff = PageAddItem(ToPage, (Item) newtup, tuple_len,
1120 InvalidOffsetNumber, LP_USED);
1121 if (newoff == InvalidOffsetNumber)
1124 failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
1125 tuple_len, cur_page->vpd_blkno, cur_page->vpd_free,
1126 cur_page->vpd_offsets_used, cur_page->vpd_offsets_free);
1128 newitemid = PageGetItemId(ToPage, newoff);
1130 newtup = (HeapTuple) PageGetItem(ToPage, newitemid);
1131 ItemPointerSet(&(newtup->t_ctid), cur_page->vpd_blkno, newoff);
1133 /* now logically delete end-tuple */
1134 TransactionIdStore(myXID, &(tuple->t_xmax));
1135 tuple->t_cmax = myCID;
1136 /* set xmax to unknown */
1137 tuple->t_infomask &= ~(HEAP_XMAX_INVALID | HEAP_XMAX_COMMITTED);
1139 cur_page->vpd_offsets_used++;
1141 cur_page->vpd_free = ((PageHeader) ToPage)->pd_upper - ((PageHeader) ToPage)->pd_lower;
1142 vpc->vpd_offsets[vpc->vpd_offsets_free++] = offnum;
1144 /* insert index' tuples if needed */
1145 if (Irel != (Relation *) NULL)
1147 for (i = 0, idcur = Idesc; i < nindices; i++, idcur++)
1149 FormIndexDatum(idcur->natts,
1150 (AttrNumber *) &(idcur->tform->indkey[0]),
1156 iresult = index_insert(Irel[i],
1166 } /* walk along page */
1168 if (vpc->vpd_offsets_free > 0) /* some tuples were moved */
1170 vc_reappage(&Nvpl, vpc);
1178 if (offnum <= maxoff)
1179 break; /* some item(s) left */
1181 } /* walk along relation */
1183 blkno++; /* new number of blocks */
1185 if (cur_buffer != InvalidBuffer)
1187 Assert(num_moved > 0);
1188 WriteBuffer(cur_buffer);
1195 * We have to commit our tuple' movings before we'll truncate
1196 * relation, but we shouldn't lose our locks. And so - quick hack:
1197 * flush buffers and record status of current transaction as
1198 * committed, and continue. - vadim 11/13/96
1200 FlushBufferPool(!TransactionFlushEnabled());
1201 TransactionIdCommit(myXID);
1202 FlushBufferPool(!TransactionFlushEnabled());
1206 * Clean uncleaned reapped pages from vacuum_pages list and set xmin
1207 * committed for inserted tuples
1210 for (i = 0, vpp = vacuum_pages->vpl_pagedesc; i < vacuumed_pages; i++, vpp++)
1212 Assert((*vpp)->vpd_blkno < blkno);
1213 buf = ReadBuffer(onerel, (*vpp)->vpd_blkno);
1214 page = BufferGetPage(buf);
1215 if ((*vpp)->vpd_offsets_used == 0) /* this page was not used */
1219 * noff == 0 in empty pages only - such pages should be
1222 Assert((*vpp)->vpd_offsets_free > 0);
1223 vc_vacpage(page, *vpp);
1226 /* this page was used */
1229 max_offset = PageGetMaxOffsetNumber(page);
1230 for (newoff = FirstOffsetNumber;
1231 newoff <= max_offset;
1232 newoff = OffsetNumberNext(newoff))
1234 itemid = PageGetItemId(page, newoff);
1235 if (!ItemIdIsUsed(itemid))
1237 tuple = (HeapTuple) PageGetItem(page, itemid);
1238 if (TransactionIdEquals((TransactionId) tuple->t_xmin, myXID))
1240 tuple->t_infomask |= HEAP_XMIN_COMMITTED;
1244 Assert((*vpp)->vpd_offsets_used == num_tuples);
1245 checked_moved += num_tuples;
1249 Assert(num_moved == checked_moved);
1251 getrusage(RUSAGE_SELF, &ru1);
1253 elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u; Tuple(s) moved: %u. \
1254 Elapsed %u/%u sec.",
1255 (RelationGetRelationName(onerel))->data,
1256 nblocks, blkno, num_moved,
1257 ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
1258 ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
1260 if (Nvpl.vpl_num_pages > 0)
1262 /* vacuum indices again if needed */
1263 if (Irel != (Relation *) NULL)
1269 /* re-sort Nvpl.vpl_pagedesc */
1270 for (vpleft = Nvpl.vpl_pagedesc,
1271 vpright = Nvpl.vpl_pagedesc + Nvpl.vpl_num_pages - 1;
1272 vpleft < vpright; vpleft++, vpright--)
1278 for (i = 0; i < nindices; i++)
1279 vc_vaconeind(&Nvpl, Irel[i], vacrelstats->num_tuples);
1283 * clean moved tuples from last page in Nvpl list if some tuples
1286 if (vpc->vpd_offsets_free > 0 && offnum <= maxoff)
1288 Assert(vpc->vpd_blkno == blkno - 1);
1289 buf = ReadBuffer(onerel, vpc->vpd_blkno);
1290 page = BufferGetPage(buf);
1293 for (offnum = FirstOffsetNumber;
1295 offnum = OffsetNumberNext(offnum))
1297 itemid = PageGetItemId(page, offnum);
1298 if (!ItemIdIsUsed(itemid))
1300 tuple = (HeapTuple) PageGetItem(page, itemid);
1301 Assert(TransactionIdEquals((TransactionId) tuple->t_xmax, myXID));
1302 itemid->lp_flags &= ~LP_USED;
1305 Assert(vpc->vpd_offsets_free == num_tuples);
1306 PageRepairFragmentation(page);
1310 /* now - free new list of reapped pages */
1311 vpp = Nvpl.vpl_pagedesc;
1312 for (i = 0; i < Nvpl.vpl_num_pages; i++, vpp++)
1314 pfree(Nvpl.vpl_pagedesc);
1317 /* truncate relation */
1318 if (blkno < nblocks)
1320 i = BlowawayRelationBuffers(onerel, blkno);
1322 elog(FATAL, "VACUUM (vc_rpfheap): BlowawayRelationBuffers returned %d", i);
1323 blkno = smgrtruncate(DEFAULT_SMGR, onerel, blkno);
1325 vacrelstats->num_pages = blkno; /* set new number of blocks */
1328 if (Irel != (Relation *) NULL) /* pfree index' allocations */
1333 vc_clsindices(nindices, Irel);
1341 * vc_vacheap() -- free dead tuples
1343 * This routine marks dead tuples as unused and truncates relation
1344 * if there are "empty" end-blocks.
1347 vc_vacheap(VRelStats *vacrelstats, Relation onerel, VPageList vacuum_pages)
1355 nblocks = vacuum_pages->vpl_num_pages;
1356 nblocks -= vacuum_pages->vpl_empty_end_pages; /* nothing to do with
1359 for (i = 0, vpp = vacuum_pages->vpl_pagedesc; i < nblocks; i++, vpp++)
1361 if ((*vpp)->vpd_offsets_free > 0)
1363 buf = ReadBuffer(onerel, (*vpp)->vpd_blkno);
1364 page = BufferGetPage(buf);
1365 vc_vacpage(page, *vpp);
1370 /* truncate relation if there are some empty end-pages */
1371 if (vacuum_pages->vpl_empty_end_pages > 0)
1373 Assert(vacrelstats->num_pages >= vacuum_pages->vpl_empty_end_pages);
1374 nblocks = vacrelstats->num_pages - vacuum_pages->vpl_empty_end_pages;
1375 elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u.",
1376 (RelationGetRelationName(onerel))->data,
1377 vacrelstats->num_pages, nblocks);
1380 * we have to flush "empty" end-pages (if changed, but who knows
1381 * it) before truncation
1383 FlushBufferPool(!TransactionFlushEnabled());
1385 i = BlowawayRelationBuffers(onerel, nblocks);
1387 elog(FATAL, "VACUUM (vc_vacheap): BlowawayRelationBuffers returned %d", i);
1389 nblocks = smgrtruncate(DEFAULT_SMGR, onerel, nblocks);
1390 Assert(nblocks >= 0);
1391 vacrelstats->num_pages = nblocks; /* set new number of
1398 * vc_vacpage() -- free dead tuples on a page
1399 * and repaire its fragmentation.
1402 vc_vacpage(Page page, VPageDescr vpd)
1407 Assert(vpd->vpd_offsets_used == 0);
1408 for (i = 0; i < vpd->vpd_offsets_free; i++)
1410 itemid = &(((PageHeader) page)->pd_linp[vpd->vpd_offsets[i] - 1]);
1411 itemid->lp_flags &= ~LP_USED;
1413 PageRepairFragmentation(page);
1418 * _vc_scanoneind() -- scan one index relation to update statistic.
1422 vc_scanoneind(Relation indrel, int num_tuples)
1424 RetrieveIndexResult res;
1425 IndexScanDesc iscan;
1431 getrusage(RUSAGE_SELF, &ru0);
1433 /* walk through the entire index */
1434 iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL);
1437 while ((res = index_getnext(iscan, ForwardScanDirection))
1438 != (RetrieveIndexResult) NULL)
1444 index_endscan(iscan);
1446 /* now update statistics in pg_class */
1447 nipages = RelationGetNumberOfBlocks(indrel);
1448 vc_updstats(RelationGetRelid(indrel), nipages, nitups, false, NULL);
1450 getrusage(RUSAGE_SELF, &ru1);
1452 elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. Elapsed %u/%u sec.",
1453 indrel->rd_rel->relname.data, nipages, nitups,
1454 ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
1455 ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
1457 if (nitups != num_tuples)
1458 elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u)",
1459 indrel->rd_rel->relname.data, nitups, num_tuples);
1461 } /* vc_scanoneind */
1464 * vc_vaconeind() -- vacuum one index relation.
1466 * Vpl is the VPageList of the heap we're currently vacuuming.
1467 * It's locked. Indrel is an index relation on the vacuumed heap.
1468 * We don't set locks on the index relation here, since the indexed
1469 * access methods support locking at different granularities.
1470 * We let them handle it.
1472 * Finally, we arrange to update the index relation's statistics in
1476 vc_vaconeind(VPageList vpl, Relation indrel, int num_tuples)
1478 RetrieveIndexResult res;
1479 IndexScanDesc iscan;
1480 ItemPointer heapptr;
1482 int num_index_tuples;
1488 getrusage(RUSAGE_SELF, &ru0);
1490 /* walk through the entire index */
1491 iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL);
1493 num_index_tuples = 0;
1495 while ((res = index_getnext(iscan, ForwardScanDirection))
1496 != (RetrieveIndexResult) NULL)
1498 heapptr = &res->heap_iptr;
1500 if ((vp = vc_tidreapped(heapptr, vpl)) != (VPageDescr) NULL)
1503 elog(DEBUG, "<%x,%x> -> <%x,%x>",
1504 ItemPointerGetBlockNumber(&(res->index_iptr)),
1505 ItemPointerGetOffsetNumber(&(res->index_iptr)),
1506 ItemPointerGetBlockNumber(&(res->heap_iptr)),
1507 ItemPointerGetOffsetNumber(&(res->heap_iptr)));
1509 if (vp->vpd_offsets_free == 0)
1510 { /* this is EmptyPage !!! */
1511 elog(NOTICE, "Index %s: pointer to EmptyPage (blk %u off %u) - fixing",
1512 indrel->rd_rel->relname.data,
1513 vp->vpd_blkno, ItemPointerGetOffsetNumber(heapptr));
1516 index_delete(indrel, &res->index_iptr);
1524 index_endscan(iscan);
1526 /* now update statistics in pg_class */
1527 num_pages = RelationGetNumberOfBlocks(indrel);
1528 vc_updstats(RelationGetRelid(indrel), num_pages, num_index_tuples, false, NULL);
1530 getrusage(RUSAGE_SELF, &ru1);
1532 elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. Elapsed %u/%u sec.",
1533 indrel->rd_rel->relname.data, num_pages, num_index_tuples, tups_vacuumed,
1534 ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
1535 ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
1537 if (num_index_tuples != num_tuples)
1538 elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u)",
1539 indrel->rd_rel->relname.data, num_index_tuples, num_tuples);
1541 } /* vc_vaconeind */
1544 * vc_tidreapped() -- is a particular tid reapped?
1546 * vpl->VPageDescr_array is sorted in right order.
1549 vc_tidreapped(ItemPointer itemptr, VPageList vpl)
1551 OffsetNumber ioffno;
1557 vpd.vpd_blkno = ItemPointerGetBlockNumber(itemptr);
1558 ioffno = ItemPointerGetOffsetNumber(itemptr);
1561 vpp = (VPageDescr *) vc_find_eq((char *) (vpl->vpl_pagedesc),
1562 vpl->vpl_num_pages, sizeof(VPageDescr), (char *) &vp,
1565 if (vpp == (VPageDescr *) NULL)
1566 return (VPageDescr) NULL;
1569 /* ok - we are on true page */
1571 if (vp->vpd_offsets_free == 0)
1572 { /* this is EmptyPage !!! */
1576 voff = (OffsetNumber *) vc_find_eq((char *) (vp->vpd_offsets),
1577 vp->vpd_offsets_free, sizeof(OffsetNumber), (char *) &ioffno,
1580 if (voff == (OffsetNumber *) NULL)
1581 return (VPageDescr) NULL;
1585 } /* vc_tidreapped */
1588 * vc_attrstats() -- compute column statistics used by the optimzer
1590 * We compute the column min, max, null and non-null counts.
1591 * Plus we attempt to find the count of the value that occurs most
1592 * frequently in each column
1593 * These figures are used to compute the selectivity of the column
1595 * We use a three-bucked cache to get the most frequent item
1596 * The 'guess' buckets count hits. A cache miss causes guess1
1597 * to get the most hit 'guess' item in the most recent cycle, and
1598 * the new item goes into guess2. Whenever the total count of hits
1599 * of a 'guess' entry is larger than 'best', 'guess' becomes 'best'.
1601 * This method works perfectly for columns with unique values, and columns
1602 * with only two unique values, plus nulls.
1604 * It becomes less perfect as the number of unique values increases and
1605 * their distribution in the table becomes more random.
1609 vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple)
1612 attr_cnt = vacrelstats->va_natts;
1613 VacAttrStats *vacattrstats = vacrelstats->vacattrstats;
1614 TupleDesc tupDesc = onerel->rd_att;
1618 for (i = 0; i < attr_cnt; i++)
1620 VacAttrStats *stats = &vacattrstats[i];
1621 bool value_hit = true;
1623 value = heap_getattr(tuple,
1624 stats->attr->attnum, tupDesc, &isnull);
1626 if (!VacAttrStatsEqValid(stats))
1633 stats->nonnull_cnt++;
1634 if (stats->initialized == false)
1636 vc_bucketcpy(stats->attr, value, &stats->best, &stats->best_len);
1637 /* best_cnt gets incremented later */
1638 vc_bucketcpy(stats->attr, value, &stats->guess1, &stats->guess1_len);
1639 stats->guess1_cnt = stats->guess1_hits = 1;
1640 vc_bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
1641 stats->guess2_hits = 1;
1642 if (VacAttrStatsLtGtValid(stats))
1644 vc_bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
1645 vc_bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
1647 stats->initialized = true;
1649 if (VacAttrStatsLtGtValid(stats))
1651 if ((*fmgr_faddr(&stats->f_cmplt)) (value, stats->min))
1653 vc_bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
1656 if ((*fmgr_faddr(&stats->f_cmpgt)) (value, stats->max))
1658 vc_bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
1661 if ((*fmgr_faddr(&stats->f_cmpeq)) (value, stats->min))
1663 else if ((*fmgr_faddr(&stats->f_cmpeq)) (value, stats->max))
1666 if ((*fmgr_faddr(&stats->f_cmpeq)) (value, stats->best))
1668 else if ((*fmgr_faddr(&stats->f_cmpeq)) (value, stats->guess1))
1670 stats->guess1_cnt++;
1671 stats->guess1_hits++;
1673 else if ((*fmgr_faddr(&stats->f_cmpeq)) (value, stats->guess2))
1674 stats->guess2_hits++;
1678 if (stats->guess2_hits > stats->guess1_hits)
1680 swapDatum(stats->guess1, stats->guess2);
1681 swapInt(stats->guess1_len, stats->guess2_len);
1682 stats->guess1_cnt = stats->guess2_hits;
1683 swapLong(stats->guess1_hits, stats->guess2_hits);
1685 if (stats->guess1_cnt > stats->best_cnt)
1687 swapDatum(stats->best, stats->guess1);
1688 swapInt(stats->best_len, stats->guess1_len);
1689 swapLong(stats->best_cnt, stats->guess1_cnt);
1690 stats->guess1_hits = 1;
1691 stats->guess2_hits = 1;
1695 vc_bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
1696 stats->guess1_hits = 1;
1697 stats->guess2_hits = 1;
1705 * vc_bucketcpy() -- update pg_class statistics for one relation
1709 vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int16 *bucket_len)
1711 if (attr->attbyval && attr->attlen != -1)
1715 int len = (attr->attlen != -1 ? attr->attlen : VARSIZE(value));
1717 if (len > *bucket_len)
1719 if (*bucket_len != 0)
1720 pfree(DatumGetPointer(*bucket));
1721 *bucket = PointerGetDatum(palloc(len));
1724 memmove(DatumGetPointer(*bucket), DatumGetPointer(value), len);
1729 * vc_updstats() -- update pg_class statistics for one relation
1731 * This routine works for both index and heap relation entries in
1732 * pg_class. We violate no-overwrite semantics here by storing new
1733 * values for num_tuples, num_pages, and hasindex directly in the pg_class
1734 * tuple that's already on the page. The reason for this is that if
1735 * we updated these tuples in the usual way, then every tuple in pg_class
1736 * would be replaced every day. This would make planning and executing
1737 * historical queries very expensive.
1740 vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats)
1749 Form_pg_class pgcform;
1751 Form_pg_attribute attp;
1755 * update number of tuples and number of pages in pg_class
1757 rtup = SearchSysCacheTuple(RELOID,
1758 ObjectIdGetDatum(relid),
1760 if (!HeapTupleIsValid(rtup))
1761 elog(ERROR, "pg_class entry for relid %d vanished during vacuuming",
1764 rd = heap_openr(RelationRelationName);
1766 /* get the buffer cache tuple */
1767 rtup = heap_fetch(rd, SnapshotNow, &rtup->t_ctid, &buffer);
1769 /* overwrite the existing statistics in the tuple */
1770 vc_setpagelock(rd, ItemPointerGetBlockNumber(&rtup->t_ctid));
1771 pgcform = (Form_pg_class) GETSTRUCT(rtup);
1772 pgcform->reltuples = num_tuples;
1773 pgcform->relpages = num_pages;
1774 pgcform->relhasindex = hasindex;
1776 if (vacrelstats != NULL && vacrelstats->va_natts > 0)
1778 VacAttrStats *vacattrstats = vacrelstats->vacattrstats;
1779 int natts = vacrelstats->va_natts;
1781 ad = heap_openr(AttributeRelationName);
1782 sd = heap_openr(StatisticRelationName);
1783 ScanKeyEntryInitialize(&askey, 0, Anum_pg_attribute_attrelid,
1786 scan = heap_beginscan(ad, false, SnapshotNow, 1, &askey);
1788 while (HeapTupleIsValid(atup = heap_getnext(scan, 0)))
1791 float32data selratio; /* average ratio of rows selected
1792 * for a random constant */
1793 VacAttrStats *stats;
1794 Datum values[Natts_pg_statistic];
1795 char nulls[Natts_pg_statistic];
1797 attp = (Form_pg_attribute) GETSTRUCT(atup);
1798 if (attp->attnum <= 0) /* skip system attributes for now, */
1799 /* they are unique anyway */
1802 for (i = 0; i < natts; i++)
1804 if (attp->attnum == vacattrstats[i].attr->attnum)
1809 stats = &(vacattrstats[i]);
1811 /* overwrite the existing statistics in the tuple */
1812 if (VacAttrStatsEqValid(stats))
1815 vc_setpagelock(ad, ItemPointerGetBlockNumber(&atup->t_ctid));
1817 if (stats->nonnull_cnt + stats->null_cnt == 0 ||
1818 (stats->null_cnt <= 1 && stats->best_cnt == 1))
1820 else if (VacAttrStatsLtGtValid(stats) && stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
1822 double min_cnt_d = stats->min_cnt,
1823 max_cnt_d = stats->max_cnt,
1824 null_cnt_d = stats->null_cnt,
1825 nonnullcnt_d = stats->nonnull_cnt; /* prevent overflow */
1827 selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) /
1828 (nonnullcnt_d + null_cnt_d) / (nonnullcnt_d + null_cnt_d);
1832 double most = (double) (stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt);
1833 double total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt);
1836 * we assume count of other values are 20% of best
1839 selratio = (most * most + 0.20 * most * (total - most)) / total / total;
1843 attp->attdisbursion = selratio;
1844 WriteNoReleaseBuffer(ItemPointerGetBlockNumber(&atup->t_ctid));
1846 /* DO PG_STATISTIC INSERTS */
1849 * doing system relations, especially pg_statistic is a
1852 if (VacAttrStatsLtGtValid(stats) && stats->initialized /* &&
1853 * !IsSystemRelationName(
1855 pgcform->relname.data) */ )
1857 FmgrInfo out_function;
1860 for (i = 0; i < Natts_pg_statistic; ++i)
1864 * initialize values[]
1868 values[i++] = (Datum) relid; /* 1 */
1869 values[i++] = (Datum) attp->attnum; /* 2 */
1870 values[i++] = (Datum) InvalidOid; /* 3 */
1871 fmgr_info(stats->outfunc, &out_function);
1872 out_string = (*fmgr_faddr(&out_function)) (stats->min, stats->attr->atttypid);
1873 values[i++] = (Datum) fmgr(F_TEXTIN, out_string);
1875 out_string = (char *) (*fmgr_faddr(&out_function)) (stats->max, stats->attr->atttypid);
1876 values[i++] = (Datum) fmgr(F_TEXTIN, out_string);
1879 stup = heap_formtuple(sd->rd_att, values, nulls);
1882 * insert the tuple in the relation and get the tuple's oid.
1885 heap_insert(sd, stup);
1886 pfree(DatumGetPointer(values[3]));
1887 pfree(DatumGetPointer(values[4]));
1897 /* XXX -- after write, should invalidate relcache in other backends */
1898 WriteNoReleaseBuffer(ItemPointerGetBlockNumber(&rtup->t_ctid));
1901 * invalidating system relations confuses the function cache of
1902 * pg_operator and pg_opclass, bjm
1904 if (!IsSystemRelationName(pgcform->relname.data))
1905 RelationInvalidateHeapTuple(rd, rtup);
1907 ReleaseBuffer(buffer);
1912 * vc_delhilowstats() -- delete pg_statistics rows
1916 vc_delhilowstats(Oid relid, int attcnt, int *attnums)
1918 Relation pgstatistic;
1923 pgstatistic = heap_openr(StatisticRelationName);
1925 if (relid != InvalidOid)
1927 ScanKeyEntryInitialize(&key, 0x0, Anum_pg_statistic_starelid,
1929 ObjectIdGetDatum(relid));
1930 scan = heap_beginscan(pgstatistic, false, SnapshotNow, 1, &key);
1933 scan = heap_beginscan(pgstatistic, false, SnapshotNow, 0, NULL);
1935 while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
1939 Form_pg_statistic pgs = (Form_pg_statistic) GETSTRUCT(tuple);
1942 for (i = 0; i < attcnt; i++)
1944 if (pgs->staattnum == attnums[i] + 1)
1948 continue; /* don't delete it */
1950 heap_delete(pgstatistic, &tuple->t_ctid);
1954 heap_close(pgstatistic);
1958 vc_setpagelock(Relation rel, BlockNumber blkno)
1960 ItemPointerData itm;
1962 ItemPointerSet(&itm, blkno, 1);
1964 RelationSetLockForWritePage(rel, &itm);
1968 * vc_reappage() -- save a page on the array of reapped pages.
1970 * As a side effect of the way that the vacuuming loop for a given
1971 * relation works, higher pages come after lower pages in the array
1972 * (and highest tid on a page is last).
1975 vc_reappage(VPageList vpl, VPageDescr vpc)
1979 /* allocate a VPageDescrData entry */
1980 newvpd = (VPageDescr) palloc(sizeof(VPageDescrData) + vpc->vpd_offsets_free * sizeof(OffsetNumber));
1983 if (vpc->vpd_offsets_free > 0)
1984 memmove(newvpd->vpd_offsets, vpc->vpd_offsets, vpc->vpd_offsets_free * sizeof(OffsetNumber));
1985 newvpd->vpd_blkno = vpc->vpd_blkno;
1986 newvpd->vpd_free = vpc->vpd_free;
1987 newvpd->vpd_offsets_used = vpc->vpd_offsets_used;
1988 newvpd->vpd_offsets_free = vpc->vpd_offsets_free;
1990 /* insert this page into vpl list */
1991 vc_vpinsert(vpl, newvpd);
1996 vc_vpinsert(VPageList vpl, VPageDescr vpnew)
1999 /* allocate a VPageDescr entry if needed */
2000 if (vpl->vpl_num_pages == 0)
2001 vpl->vpl_pagedesc = (VPageDescr *) palloc(100 * sizeof(VPageDescr));
2002 else if (vpl->vpl_num_pages % 100 == 0)
2003 vpl->vpl_pagedesc = (VPageDescr *) repalloc(vpl->vpl_pagedesc, (vpl->vpl_num_pages + 100) * sizeof(VPageDescr));
2004 vpl->vpl_pagedesc[vpl->vpl_num_pages] = vpnew;
2005 (vpl->vpl_num_pages)++;
2010 vc_free(VRelList vrl)
2014 PortalVariableMemory pmem;
2016 pmem = PortalGetVariableMemory(vc_portal);
2017 old = MemoryContextSwitchTo((MemoryContext) pmem);
2019 while (vrl != (VRelList) NULL)
2022 /* free rel list entry */
2024 vrl = vrl->vrl_next;
2028 MemoryContextSwitchTo(old);
2032 vc_find_eq(char *bot, int nelem, int size, char *elm, int (*compar) (char *, char *))
2035 int last = nelem - 1;
2036 int celm = nelem / 2;
2040 last_move = first_move = true;
2043 if (first_move == true)
2045 res = compar(bot, elm);
2052 if (last_move == true)
2054 res = compar(elm, bot + last * size);
2058 return bot + last * size;
2061 res = compar(elm, bot + celm * size);
2063 return bot + celm * size;
2077 last = last - celm - 1;
2078 bot = bot + (celm + 1) * size;
2079 celm = (last + 1) / 2;
2086 vc_cmp_blk(char *left, char *right)
2091 lblk = (*((VPageDescr *) left))->vpd_blkno;
2092 rblk = (*((VPageDescr *) right))->vpd_blkno;
2103 vc_cmp_offno(char *left, char *right)
2106 if (*(OffsetNumber *) left < *(OffsetNumber *) right)
2108 if (*(OffsetNumber *) left == *(OffsetNumber *) right)
2112 } /* vc_cmp_offno */
2116 vc_getindices(Oid relid, int *nindices, Relation **Irel)
2132 ioid = (Oid *) palloc(10 * sizeof(Oid));
2134 /* prepare a heap scan on the pg_index relation */
2135 pgindex = heap_openr(IndexRelationName);
2136 tupdesc = RelationGetDescr(pgindex);
2138 ScanKeyEntryInitialize(&key, 0x0, Anum_pg_index_indrelid,
2140 ObjectIdGetDatum(relid));
2142 scan = heap_beginscan(pgindex, false, SnapshotNow, 1, &key);
2144 while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
2146 d = heap_getattr(tuple, Anum_pg_index_indexrelid,
2150 ioid = (Oid *) repalloc(ioid, (i + 10) * sizeof(Oid));
2151 ioid[i - 1] = DatumGetObjectId(d);
2155 heap_close(pgindex);
2158 { /* No one index found */
2163 if (Irel != (Relation **) NULL)
2164 *Irel = (Relation *) palloc(i * sizeof(Relation));
2168 irel = index_open(ioid[--i]);
2169 if (irel != (Relation) NULL)
2171 if (Irel != (Relation **) NULL)
2178 elog(NOTICE, "CAN't OPEN INDEX %u - SKIP IT", ioid[i]);
2183 if (Irel != (Relation **) NULL && *nindices == 0)
2186 *Irel = (Relation *) NULL;
2189 } /* vc_getindices */
2193 vc_clsindices(int nindices, Relation *Irel)
2196 if (Irel == (Relation *) NULL)
2200 index_close(Irel[nindices]);
2203 } /* vc_clsindices */
2207 vc_mkindesc(Relation onerel, int nindices, Relation *Irel, IndDesc **Idesc)
2210 HeapTuple cachetuple;
2211 AttrNumber *attnumP;
2215 *Idesc = (IndDesc *) palloc(nindices * sizeof(IndDesc));
2217 for (i = 0, idcur = *Idesc; i < nindices; i++, idcur++)
2219 cachetuple = SearchSysCacheTupleCopy(INDEXRELID,
2220 ObjectIdGetDatum(RelationGetRelid(Irel[i])),
2225 * we never free the copy we make, because Idesc needs it for
2228 idcur->tform = (Form_pg_index) GETSTRUCT(cachetuple);
2229 for (attnumP = &(idcur->tform->indkey[0]), natts = 0;
2230 *attnumP != InvalidAttrNumber && natts != INDEX_MAX_KEYS;
2231 attnumP++, natts++);
2232 if (idcur->tform->indproc != InvalidOid)
2234 idcur->finfoP = &(idcur->finfo);
2235 FIgetnArgs(idcur->finfoP) = natts;
2237 FIgetProcOid(idcur->finfoP) = idcur->tform->indproc;
2238 *(FIgetname(idcur->finfoP)) = '\0';
2241 idcur->finfoP = (FuncIndexInfo *) NULL;
2243 idcur->natts = natts;
2250 vc_enough_space(VPageDescr vpd, Size len)
2253 len = DOUBLEALIGN(len);
2255 if (len > vpd->vpd_free)
2258 if (vpd->vpd_offsets_used < vpd->vpd_offsets_free) /* there are free
2260 return true; /* and len <= free_space */
2262 /* ok. noff_usd >= noff_free and so we'll have to allocate new itemid */
2263 if (len <= vpd->vpd_free - sizeof(ItemIdData))
2268 } /* vc_enough_space */