*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.9 2000/11/16 22:30:19 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.10 2000/12/02 19:38:34 tgl Exp $
*
-
*-------------------------------------------------------------------------
*/
+#include "postgres.h"
+
#include <sys/types.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
-#include "postgres.h"
-
#include "access/heapam.h"
#include "catalog/catname.h"
#include "catalog/indexing.h"
stats = &vacattrstats[i];
stats->attr = palloc(ATTRIBUTE_TUPLE_SIZE);
- memmove(stats->attr, attr[((attnums) ? attnums[i] : i)], ATTRIBUTE_TUPLE_SIZE);
+ memcpy(stats->attr, attr[((attnums) ? attnums[i] : i)],
+ ATTRIBUTE_TUPLE_SIZE);
stats->best = stats->guess1 = stats->guess2 = 0;
stats->max = stats->min = 0;
stats->best_len = stats->guess1_len = stats->guess2_len = 0;
/* delete existing pg_statistic rows for relation */
del_stats(relid, ((attnums) ? attr_cnt : 0), attnums);
+ /* scan relation to gather statistics */
scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL);
while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
}
/*
- * attr_stats() -- compute column statistics used by the optimzer
+ * attr_stats() -- compute column statistics used by the planner
*
* We compute the column min, max, null and non-null counts.
* Plus we attempt to find the count of the value that occurs most
for (i = 0; i < attr_cnt; i++)
{
VacAttrStats *stats = &vacattrstats[i];
+ Datum origvalue;
Datum value;
bool isnull;
bool value_hit;
continue;
#endif /* _DROP_COLUMN_HACK__ */
- value = heap_getattr(tuple,
- stats->attr->attnum, tupDesc, &isnull);
+ origvalue = heap_getattr(tuple, stats->attr->attnum,
+ tupDesc, &isnull);
if (isnull)
{
stats->null_cnt++;
continue;
}
-
stats->nonnull_cnt++;
+
+ /*
+ * If the value is toasted, detoast it to avoid repeated detoastings
+ * and resultant memory leakage inside the comparison routines.
+ */
+ if (!stats->attr->attbyval && stats->attr->attlen == -1)
+ value = PointerGetDatum(PG_DETOAST_DATUM(origvalue));
+ else
+ value = origvalue;
+
if (! stats->initialized)
{
bucketcpy(stats->attr, value, &stats->best, &stats->best_len);
stats->guess1_hits = 1;
stats->guess2_hits = 1;
}
+
+ /* Clean up detoasted copy, if any */
+ if (value != origvalue)
+ pfree(DatumGetPointer(value));
}
}
/*
* bucketcpy() -- copy a new value into one of the statistics buckets
- *
*/
static void
bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len)
{
- if (attr->attbyval && attr->attlen != -1)
+ if (attr->attbyval)
*bucket = value;
else
{
int len = (attr->attlen != -1 ? attr->attlen : VARSIZE(value));
+ /* Avoid unnecessary palloc() traffic... */
if (len > *bucket_len)
{
if (*bucket_len != 0)
/*
* update_attstats() -- update attribute statistics for one relation
*
- * Updates of pg_attribute statistics are handled by over-write,
- * for reasons described above. pg_statistic rows are added normally.
+ * Statistics are stored in several places: the pg_class row for the
+ * relation has stats about the whole relation, the pg_attribute rows
+ * for each attribute store "dispersion", and there is a pg_statistic
+ * row for each (non-system) attribute. (Dispersion probably ought to
+ * be moved to pg_statistic, but it's not worth doing unless there's
+ * another reason to have to change pg_attribute.) The pg_class values
+ * are updated by VACUUM, not here.
+ *
+ * We violate no-overwrite semantics here by storing new values for
+ * the dispersion column directly into the pg_attribute tuple that's
+ * already on the page. The reason for this is that if we updated
+ * these tuples in the usual way, vacuuming pg_attribute itself
+ * wouldn't work very well --- by the time we got done with a vacuum
+ * cycle, most of the tuples in pg_attribute would've been obsoleted.
+ * Updating pg_attribute's own statistics would be especially tricky.
+ * Of course, this only works for fixed-size never-null columns, but
+ * dispersion is.
+ *
+ * pg_statistic rows are just added normally. This means that
+ * pg_statistic will probably contain some deleted rows at the
+ * completion of a vacuum cycle, unless it happens to get vacuumed last.
*
* To keep things simple, we punt for pg_statistic, and don't try
* to compute or store rows for pg_statistic itself in pg_statistic.
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.174 2000/11/30 08:46:22 vadim Exp $
+ * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.175 2000/12/02 19:38:34 tgl Exp $
*
-
*-------------------------------------------------------------------------
*/
+#include "postgres.h"
+
#include <sys/types.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
-#include "postgres.h"
+#ifndef HAVE_GETRUSAGE
+#include "rusagestub.h"
+#else
+#include <sys/time.h>
+#include <sys/resource.h>
+#endif
#include "access/genam.h"
#include "access/heapam.h"
+#include "access/xlog.h"
#include "catalog/catalog.h"
#include "catalog/catname.h"
#include "catalog/index.h"
#include "utils/syscache.h"
#include "utils/temprel.h"
-#ifndef HAVE_GETRUSAGE
-#include "rusagestub.h"
-#else
-#include <sys/time.h>
-#include <sys/resource.h>
-#endif
-
-#include "access/xlog.h"
extern XLogRecPtr log_heap_move(Relation reln,
ItemPointerData from, HeapTuple newtup);
static void vacuum_shutdown(void);
static void vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2);
static VRelList getrels(NameData *VacRelP);
-static void vacuum_rel(Oid relid, bool analyze, bool is_toastrel);
+static void vacuum_rel(Oid relid, bool is_toastrel);
static void scan_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages);
static void repair_frag(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages, int nindices, Relation *Irel);
static void vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacpagelist);
/* vacuum each heap relation */
for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
{
- vacuum_rel(cur->vrl_relid, analyze, false);
+ vacuum_rel(cur->vrl_relid, false);
/* analyze separately so locking is minimized */
if (analyze)
analyze_rel(cur->vrl_relid, anal_cols2, MESSAGE_LEVEL);
* us to lock the entire database during one pass of the vacuum cleaner.
*/
static void
-vacuum_rel(Oid relid, bool analyze, bool is_toastrel)
+vacuum_rel(Oid relid, bool is_toastrel)
{
Relation onerel;
VacPageListData vacuum_pages; /* List of pages to vacuum and/or clean
* totally unimportant for toast relations
*/
if (toast_relid != InvalidOid)
- vacuum_rel(toast_relid, false, true);
+ vacuum_rel(toast_relid, true);
/* next command frees attribute stats */
if (!is_toastrel)
/*
* update_relstats() -- update statistics for one relation
*
- * Statistics are stored in several places: the pg_class row for the
- * relation has stats about the whole relation, the pg_attribute rows
- * for each attribute store "dispersion", and there is a pg_statistic
- * row for each (non-system) attribute. (Dispersion probably ought to
- * be moved to pg_statistic, but it's not worth doing unless there's
- * another reason to have to change pg_attribute.) Dispersion and
- * pg_statistic values are only updated by VACUUM ANALYZE, but we
- * always update the stats in pg_class.
+ * Update the whole-relation statistics that are kept in its pg_class
+ * row. There are additional stats that will be updated if we are
+ * doing VACUUM ANALYZE, but we always update these stats.
*
* This routine works for both index and heap relation entries in
* pg_class. We violate no-overwrite semantics here by storing new