*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.68 2004/02/12 23:41:02 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.69 2004/02/13 06:39:49 tgl Exp $
*
*-------------------------------------------------------------------------
*/
static double select_next_random_record(double t, int n, double *stateptr);
static int compare_rows(const void *a, const void *b);
static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);
+static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
static bool std_typanalyze(VacAttrStats *stats);
old_context = MemoryContextSwitchTo(col_context);
for (i = 0; i < attr_cnt; i++)
{
- (*vacattrstats[i]->compute_stats) (vacattrstats[i],
- vacattrstats[i]->tupattnum,
- onerel->rd_att,
- totalrows,
- rows,
- numrows);
+ VacAttrStats *stats = vacattrstats[i];
+
+ stats->rows = rows;
+ stats->tupDesc = onerel->rd_att;
+ (*stats->compute_stats) (stats,
+ std_fetch_func,
+ numrows,
+ totalrows);
MemoryContextResetAndDeleteChildren(col_context);
}
MemoryContextSwitchTo(old_context);
heap_close(sd, RowExclusiveLock);
}
+/*
+ * Standard fetch function for use by compute_stats subroutines.
+ *
+ * This exists to provide some insulation between compute_stats routines
+ * and the actual storage of the sample data.
+ */
+static Datum
+std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
+{
+ int attnum = stats->tupattnum;
+ HeapTuple tuple = stats->rows[rownum];
+ TupleDesc tupDesc = stats->tupDesc;
+
+ return heap_getattr(tuple, attnum, tupDesc, isNull);
+}
+
/*==========================================================================
*
static int *datumCmpTupnoLink;
-static void compute_minimal_stats(VacAttrStats *stats, int attnum,
- TupleDesc tupDesc, double totalrows,
- HeapTuple *rows, int numrows);
-static void compute_scalar_stats(VacAttrStats *stats, int attnum,
- TupleDesc tupDesc, double totalrows,
- HeapTuple *rows, int numrows);
+static void compute_minimal_stats(VacAttrStatsP stats,
+ AnalyzeAttrFetchFunc fetchfunc,
+ int samplerows,
+ double totalrows);
+static void compute_scalar_stats(VacAttrStatsP stats,
+ AnalyzeAttrFetchFunc fetchfunc,
+ int samplerows,
+ double totalrows);
static int compare_scalars(const void *a, const void *b);
static int compare_mcvs(const void *a, const void *b);
* depend mainly on the length of the list we are willing to keep.
*/
static void
-compute_minimal_stats(VacAttrStats *stats, int attnum,
- TupleDesc tupDesc, double totalrows,
- HeapTuple *rows, int numrows)
+compute_minimal_stats(VacAttrStatsP stats,
+ AnalyzeAttrFetchFunc fetchfunc,
+ int samplerows,
+ double totalrows)
{
int i;
int null_cnt = 0;
fmgr_info(mystats->eqfunc, &f_cmpeq);
- for (i = 0; i < numrows; i++)
+ for (i = 0; i < samplerows; i++)
{
- HeapTuple tuple = rows[i];
Datum value;
bool isnull;
bool match;
vacuum_delay_point();
- value = heap_getattr(tuple, attnum, tupDesc, &isnull);
+ value = fetchfunc(stats, i, &isnull);
/* Check for null/nonnull */
if (isnull)
stats->stats_valid = true;
/* Do the simple null-frac and width stats */
- stats->stanullfrac = (double) null_cnt / (double) numrows;
+ stats->stanullfrac = (double) null_cnt / (double) samplerows;
if (is_varwidth)
stats->stawidth = total_width / (double) nonnull_cnt;
else
denom,
stadistinct;
- numer = (double) numrows *(double) d;
+ numer = (double) samplerows *(double) d;
- denom = (double) (numrows - f1) +
- (double) f1 *(double) numrows / totalrows;
+ denom = (double) (samplerows - f1) +
+ (double) f1 *(double) samplerows / totalrows;
stadistinct = numer / denom;
/* Clamp to sane range in case of roundoff error */
if (ndistinct < 0)
ndistinct = -ndistinct * totalrows;
/* estimate # of occurrences in sample of a typical value */
- avgcount = (double) numrows / ndistinct;
+ avgcount = (double) samplerows / ndistinct;
/* set minimum threshold count to store a value */
mincount = avgcount * 1.25;
if (mincount < 2)
mcv_values[i] = datumCopy(track[i].value,
stats->attr->attbyval,
stats->attr->attlen);
- mcv_freqs[i] = (double) track[i].count / (double) numrows;
+ mcv_freqs[i] = (double) track[i].count / (double) samplerows;
}
MemoryContextSwitchTo(old_context);
* data values into order.
*/
static void
-compute_scalar_stats(VacAttrStats *stats, int attnum,
- TupleDesc tupDesc, double totalrows,
- HeapTuple *rows, int numrows)
+compute_scalar_stats(VacAttrStatsP stats,
+ AnalyzeAttrFetchFunc fetchfunc,
+ int samplerows,
+ double totalrows)
{
int i;
int null_cnt = 0;
int num_bins = stats->attr->attstattarget;
StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
- values = (ScalarItem *) palloc(numrows * sizeof(ScalarItem));
- tupnoLink = (int *) palloc(numrows * sizeof(int));
+ values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem));
+ tupnoLink = (int *) palloc(samplerows * sizeof(int));
track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
SelectSortFunction(mystats->ltopr, &cmpFn, &cmpFnKind);
fmgr_info(cmpFn, &f_cmpfn);
/* Initial scan to find sortable values */
- for (i = 0; i < numrows; i++)
+ for (i = 0; i < samplerows; i++)
{
- HeapTuple tuple = rows[i];
Datum value;
bool isnull;
vacuum_delay_point();
- value = heap_getattr(tuple, attnum, tupDesc, &isnull);
+ value = fetchfunc(stats, i, &isnull);
/* Check for null/nonnull */
if (isnull)
stats->stats_valid = true;
/* Do the simple null-frac and width stats */
- stats->stanullfrac = (double) null_cnt / (double) numrows;
+ stats->stanullfrac = (double) null_cnt / (double) samplerows;
if (is_varwidth)
stats->stawidth = total_width / (double) nonnull_cnt;
else
denom,
stadistinct;
- numer = (double) numrows *(double) d;
+ numer = (double) samplerows *(double) d;
- denom = (double) (numrows - f1) +
- (double) f1 *(double) numrows / totalrows;
+ denom = (double) (samplerows - f1) +
+ (double) f1 *(double) samplerows / totalrows;
stadistinct = numer / denom;
/* Clamp to sane range in case of roundoff error */
if (ndistinct < 0)
ndistinct = -ndistinct * totalrows;
/* estimate # of occurrences in sample of a typical value */
- avgcount = (double) numrows / ndistinct;
+ avgcount = (double) samplerows / ndistinct;
/* set minimum threshold count to store a value */
mincount = avgcount * 1.25;
if (mincount < 2)
mincount = 2;
/* don't let threshold exceed 1/K, however */
- maxmincount = (double) numrows / (double) num_bins;
+ maxmincount = (double) samplerows / (double) num_bins;
if (mincount > maxmincount)
mincount = maxmincount;
if (num_mcv > track_cnt)
mcv_values[i] = datumCopy(values[track[i].first].value,
stats->attr->attbyval,
stats->attr->attlen);
- mcv_freqs[i] = (double) track[i].count / (double) numrows;
+ mcv_freqs[i] = (double) track[i].count / (double) samplerows;
}
MemoryContextSwitchTo(old_context);
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.49 2004/02/12 23:41:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.50 2004/02/13 06:39:49 tgl Exp $
*
*-------------------------------------------------------------------------
*/
* and must return TRUE to continue analysis, FALSE to skip analysis of this
* column. In the TRUE case it must set the compute_stats and minrows fields,
* and can optionally set extra_data to pass additional info to compute_stats.
+ * minrows is its request for the minimum number of sample rows to be gathered
+ * (but note this request might not be honored, eg if there are fewer rows
+ * than that in the table).
*
* The compute_stats routine will be called after sample rows have been
* gathered. Aside from this struct, it is passed:
- * attnum: attribute number within the supplied tuples
- * tupDesc: tuple descriptor for the supplied tuples
+ * fetchfunc: a function for accessing the column values from the
+ * sample rows
+ * samplerows: the number of sample tuples
* totalrows: estimated total number of rows in relation
- * rows: an array of the sample tuples
- * numrows: the number of sample tuples
- * Note that the passed attnum and tupDesc could possibly be different from
- * what one would expect by looking at the pg_attribute row. It is important
- * to use these values for extracting attribute values from the given rows
- * (and not for any other purpose).
+ * The fetchfunc may be called with rownum running from 0 to samplerows-1.
+ * It returns a Datum and an isNull flag.
*
* compute_stats should set stats_valid TRUE if it is able to compute
* any useful statistics. If it does, the remainder of the struct holds
* be CurrentMemoryContext when compute_stats is called.
*----------
*/
+typedef struct VacAttrStats *VacAttrStatsP;
+
+typedef Datum (*AnalyzeAttrFetchFunc) (VacAttrStatsP stats, int rownum,
+ bool *isNull);
+
typedef struct VacAttrStats
{
/*
* These fields must be filled in by the typanalyze routine,
* unless it returns FALSE.
*/
- void (*compute_stats) (struct VacAttrStats *stats, int attnum,
- TupleDesc tupDesc, double totalrows,
- HeapTuple *rows, int numrows);
+ void (*compute_stats) (VacAttrStatsP stats,
+ AnalyzeAttrFetchFunc fetchfunc,
+ int samplerows,
+ double totalrows);
int minrows; /* Minimum # of rows wanted for stats */
void *extra_data; /* for extra type-specific data */
* be looked at by type-specific functions.
*/
int tupattnum; /* attribute number within tuples */
+ HeapTuple *rows; /* access info for fetch function */
+ TupleDesc tupDesc;
} VacAttrStats;