* along with the relation's initial contents.
*
*
- * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: pg_statistic.h,v 1.13 2001/10/25 05:49:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_statistic.h,v 1.36 2008/07/14 00:51:45 tgl Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
#ifndef PG_STATISTIC_H
#define PG_STATISTIC_H
-/* ----------------
- * postgres.h contains the system type definintions and the
- * CATALOG(), BOOTSTRAP and DATA() sugar words so this file
- * can be read by both genbki.sh and the C compiler.
- * ----------------
+#include "catalog/genbki.h"
+
+/*
+ * The CATALOG definition has to refer to the type of stavaluesN as
+ * "anyarray" so that bootstrap mode recognizes it. There is no real
+ * typedef for that, however. Since the fields are potentially-null and
+ * therefore can't be accessed directly from C code, there is no particular
+ * need for the C struct definition to show a valid field type --- instead
+ * we just make it int.
*/
+#define anyarray int
/* ----------------
* pg_statistic definition. cpp turns this into
* typedef struct FormData_pg_statistic
* ----------------
*/
-CATALOG(pg_statistic) BKI_WITHOUT_OIDS
+#define StatisticRelationId 2619
+
+CATALOG(pg_statistic,2619) BKI_WITHOUT_OIDS
{
/* These fields form the unique key for the entry: */
Oid starelid; /* relation containing attribute */
/*
* stawidth is the average width in bytes of non-null entries. For
- * fixed-width datatypes this is of course the same as the typlen, but
- * for varlena types it is more useful. Note that this is the average
- * width of the data as actually stored, post-TOASTing (eg, for a
+ * fixed-width datatypes this is of course the same as the typlen, but for
+ * var-width types it is more useful. Note that this is the average width
+ * of the data as actually stored, post-TOASTing (eg, for a
* moved-out-of-line value, only the size of the pointer object is
- * counted). This is the appropriate definition for the primary use
- * of the statistic, which is to estimate sizes of in-memory hash
- * tables of tuples.
+ * counted). This is the appropriate definition for the primary use of
+ * the statistic, which is to estimate sizes of in-memory hash tables of
+ * tuples.
*/
int4 stawidth;
* kind integer code identifying kind of data
* op OID of associated operator, if needed
* numbers float4 array (for statistical values)
- * values text array (for representations of data values)
+ * values anyarray (for representations of data values)
* The ID and operator fields are never NULL; they are zeroes in an
* unused slot. The numbers and values fields are NULL in an unused
* slot, and might also be NULL in a used slot if the slot kind has
Oid staop4;
/*
- * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be
- * absent (NULL). They cannot be accessed as C struct entries; you
- * have to use the full field access machinery (heap_getattr) for
- * them. We declare them here for the catalog machinery.
+ * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent
+ * (NULL). They cannot be accessed as C struct entries; you have to use
+ * the full field access machinery (heap_getattr) for them. We declare
+ * them here for the catalog machinery.
*/
float4 stanumbers1[1];
float4 stanumbers4[1];
/*
- * Values in these text arrays are external representations of values
- * of the column's data type. To re-create the actual Datum, do
- * datatypein(textout(arrayelement)).
+ * Values in these arrays are values of the column's data type. We
+ * presently have to cheat quite a bit to allow polymorphic arrays of this
+ * kind, but perhaps someday it'll be a less bogus facility.
*/
- text stavalues1[1];
- text stavalues2[1];
- text stavalues3[1];
- text stavalues4[1];
+ anyarray stavalues1;
+ anyarray stavalues2;
+ anyarray stavalues3;
+ anyarray stavalues4;
} FormData_pg_statistic;
#define STATISTIC_NUM_SLOTS 4
+#undef anyarray
+
+
/* ----------------
* Form_pg_statistic corresponds to a pointer to a tuple with
* the format of pg_statistic relation.
/*
* Currently, three statistical slot "kinds" are defined: most common values,
* histogram, and correlation. Additional "kinds" will probably appear in
- * future to help cope with non-scalar datatypes.
+ * future to help cope with non-scalar datatypes. Also, custom data types
+ * can define their own "kind" codes by mutual agreement between a custom
+ * typanalyze routine and the selectivity estimation functions of the type's
+ * operators.
*
* Code reading the pg_statistic relation should not assume that a particular
* data "kind" will appear in any particular slot. Instead, search the
- * stakind fields to see if the desired data is available.
+ * stakind fields to see if the desired data is available. (The standard
+ * function get_attstatsslot() may be used for this.)
+ */
+
+/*
+ * The present allocation of "kind" codes is:
+ *
+ * 1-99: reserved for assignment by the core PostgreSQL project
+ * (values in this range will be documented in this file)
+ * 100-199: reserved for assignment by the PostGIS project
+ * (values to be documented in PostGIS documentation)
+ * 200-299: reserved for assignment by the ESRI ST_Geometry project
+ * (values to be documented in ESRI ST_Geometry documentation)
+ * 300-9999: reserved for future public assignments
+ *
+ * For private use you may choose a "kind" code at random in the range
+ * 10000-30000. However, for code that is to be widely disseminated it is
+ * better to obtain a publicly defined "kind" code by request from the
+ * PostgreSQL Global Development Group.
*/
/*
* their actual tuple positions. The coefficient ranges from +1 to -1.
*/
#define STATISTIC_KIND_CORRELATION 3
-#endif /* PG_STATISTIC_H */
+
+/*
+ * A "most common elements" slot is similar to a "most common values" slot,
+ * except that it stores the most common non-null *elements* of the column
+ * values. This is useful when the column datatype is an array or some other
+ * type with identifiable elements (for instance, tsvector). staop contains
+ * the equality operator appropriate to the element type. stavalues contains
+ * the most common element values, and stanumbers their frequencies, with the
+ * same rules as for MCV slots.
+ *
+ * Note: in current usage for tsvector columns, the stavalues elements are of
+ * type text, even though their representation within tsvector is not
+ * exactly text.
+ */
+#define STATISTIC_KIND_MCELEM 4
+
+#endif /* PG_STATISTIC_H */