From: Tom Lane Date: Sun, 15 Feb 2004 21:01:39 +0000 (+0000) Subject: First steps towards statistics on expressional (nee functional) indexes. X-Git-Tag: REL8_0_0BETA1~1163 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f0c9397f808531b4207ebe60ff3ba9b038812443;p=postgresql First steps towards statistics on expressional (nee functional) indexes. This commit teaches ANALYZE to store such stats in pg_statistic, but nothing is done yet about teaching the planner to use 'em. Also, repair longstanding oversight in separate ANALYZE command: it updated the pg_class.relpages and reltuples counts for the table proper, but not for indexes. --- diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 4baae556b3..196fdc6efd 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1,6 +1,6 @@ @@ -3068,22 +3068,31 @@ - The catalog pg_statistic stores statistical data about - the contents of the database. Entries are created by + The catalog pg_statistic stores statistical data + about the contents of the database. Entries are created by ANALYZE and subsequently used by the query planner. There is one entry for each table column that has been analyzed. Note that all the statistical data is inherently approximate, even assuming that it is up-to-date. + + pg_statistic also stores statistical data about + the values of index expressions. These are described as if they were + actual data columns; in particular, starelid + references the index. No entry is made for an ordinary non-expression + index column, however, since it would be redundant with the entry + for the underlying table column. + + Since different kinds of statistics may be appropriate for different kinds of data, pg_statistic is designed not to assume very much about what sort of statistics it stores. Only extremely general statistics (such as nullness) are given dedicated columns in pg_statistic. Everything else - is stored in slots, which are groups of associated columns whose - content is identified by a code number in one of the slot's columns. + is stored in slots, which are groups of associated columns + whose content is identified by a code number in one of the slot's columns. For more information see src/include/catalog/pg_statistic.h. @@ -3117,7 +3126,7 @@ starelid oid pg_class.oid - The table that the described column belongs to + The table or index that the described column belongs to diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 63595962e4..1fcc8e071d 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.259 2004/02/12 23:41:02 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.260 2004/02/15 21:01:39 tgl Exp $ * * * INTERFACE ROUTINES @@ -76,7 +76,6 @@ static void StoreAttrDefault(Relation rel, AttrNumber attnum, char *adbin); static void StoreRelCheck(Relation rel, char *ccname, char *ccbin); static void StoreConstraints(Relation rel, TupleDesc tupdesc); static void SetRelationNumChecks(Relation rel, int numchecks); -static void RemoveStatistics(Relation rel, AttrNumber attnum); /* ---------------------------------------------------------------- @@ -1868,7 +1867,7 @@ RemoveRelConstraints(Relation rel, const char *constrName, * If attnum is zero, remove all entries for rel; else remove only the one * for that column. */ -static void +void RemoveStatistics(Relation rel, AttrNumber attnum) { Relation pgstatistic; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 9c92f21740..86076960a4 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.227 2004/02/10 01:55:24 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.228 2004/02/15 21:01:39 tgl Exp $ * * * INTERFACE ROUTINES @@ -153,7 +153,7 @@ ConstructTupleDescriptor(Relation heapRelation, */ to->attnum = i + 1; - to->attstattarget = 0; + to->attstattarget = -1; to->attcacheoff = -1; to->attnotnull = false; to->atthasdef = false; @@ -197,6 +197,7 @@ ConstructTupleDescriptor(Relation heapRelation, to->attbyval = typeTup->typbyval; to->attstorage = typeTup->typstorage; to->attalign = typeTup->typalign; + to->attstattarget = -1; to->attcacheoff = -1; to->atttypmod = -1; to->attislocal = true; @@ -753,6 +754,7 @@ index_drop(Oid indexId) Relation userIndexRelation; Relation indexRelation; HeapTuple tuple; + bool hasexprs; int i; Assert(OidIsValid(indexId)); @@ -786,7 +788,7 @@ index_drop(Oid indexId) DeleteAttributeTuples(indexId); /* - * fix INDEX relation + * fix INDEX relation, and check for expressional index */ indexRelation = heap_openr(IndexRelationName, RowExclusiveLock); @@ -796,11 +798,20 @@ index_drop(Oid indexId) if (!HeapTupleIsValid(tuple)) elog(ERROR, "cache lookup failed for index %u", indexId); + hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs); + simple_heap_delete(indexRelation, &tuple->t_self); ReleaseSysCache(tuple); heap_close(indexRelation, RowExclusiveLock); + /* + * if it has any expression columns, we might have stored + * statistics about them. + */ + if (hasexprs) + RemoveStatistics(userIndexRelation, 0); + /* * flush buffer cache and physically remove the file */ diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index aba4255595..bd82a96b65 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.69 2004/02/13 06:39:49 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.70 2004/02/15 21:01:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,11 +20,14 @@ #include "access/tuptoaster.h" #include "catalog/catalog.h" #include "catalog/catname.h" +#include "catalog/index.h" #include "catalog/indexing.h" #include "catalog/namespace.h" #include "catalog/pg_operator.h" #include "commands/vacuum.h" +#include "executor/executor.h" #include "miscadmin.h" +#include "parser/parse_expr.h" #include "parser/parse_oper.h" #include "parser/parse_relation.h" #include "utils/acl.h" @@ -36,6 +39,16 @@ #include "utils/tuplesort.h" +/* Per-index data for ANALYZE */ +typedef struct AnlIndexData +{ + IndexInfo *indexInfo; /* BuildIndexInfo result */ + double tupleFract; /* fraction of rows for partial index */ + VacAttrStats **vacattrstats; /* index attrs to analyze */ + int attr_cnt; +} AnlIndexData; + + /* Default statistics target (GUC parameter) */ int default_statistics_target = 10; @@ -44,6 +57,10 @@ static int elevel = -1; static MemoryContext anl_context = NULL; +static void compute_index_stats(Relation onerel, double totalrows, + AnlIndexData *indexdata, int nindexes, + HeapTuple *rows, int numrows, + MemoryContext col_context); static VacAttrStats *examine_attribute(Relation onerel, int attnum); static int acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows, double *totalrows); @@ -53,6 +70,7 @@ static double select_next_random_record(double t, int n, double *stateptr); static int compare_rows(const void *a, const void *b); static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats); static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); +static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); static bool std_typanalyze(VacAttrStats *stats); @@ -66,8 +84,14 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt) Relation onerel; int attr_cnt, tcnt, - i; + i, + ind; + Relation *Irel; + int nindexes; + bool hasindex; + bool analyzableindex; VacAttrStats **vacattrstats; + AnlIndexData *indexdata; int targrows, numrows; double totalrows; @@ -201,11 +225,78 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt) attr_cnt = tcnt; } + /* + * Open all indexes of the relation, and see if there are any analyzable + * columns in the indexes. We do not analyze index columns if there was + * an explicit column list in the ANALYZE command, however. + */ + vac_open_indexes(onerel, &nindexes, &Irel); + hasindex = (nindexes > 0); + indexdata = NULL; + analyzableindex = false; + if (hasindex) + { + indexdata = (AnlIndexData *) palloc0(nindexes * sizeof(AnlIndexData)); + for (ind = 0; ind < nindexes; ind++) + { + AnlIndexData *thisdata = &indexdata[ind]; + IndexInfo *indexInfo; + + thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]); + thisdata->tupleFract = 1.0; /* fix later if partial */ + if (indexInfo->ii_Expressions != NIL && vacstmt->va_cols == NIL) + { + List *indexprs = indexInfo->ii_Expressions; + + thisdata->vacattrstats = (VacAttrStats **) + palloc(indexInfo->ii_NumIndexAttrs * sizeof(VacAttrStats *)); + tcnt = 0; + for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) + { + int keycol = indexInfo->ii_KeyAttrNumbers[i]; + + if (keycol == 0) + { + /* Found an index expression */ + Node *indexkey; + + if (indexprs == NIL) /* shouldn't happen */ + elog(ERROR, "too few entries in indexprs list"); + indexkey = (Node *) lfirst(indexprs); + indexprs = lnext(indexprs); + + /* + * Can't analyze if the opclass uses a storage type + * different from the expression result type. We'd + * get confused because the type shown in pg_attribute + * for the index column doesn't match what we are + * getting from the expression. Perhaps this can be + * fixed someday, but for now, punt. + */ + if (exprType(indexkey) != + Irel[ind]->rd_att->attrs[i]->atttypid) + continue; + + thisdata->vacattrstats[tcnt] = + examine_attribute(Irel[ind], i+1); + if (thisdata->vacattrstats[tcnt] != NULL) + { + tcnt++; + analyzableindex = true; + } + } + } + thisdata->attr_cnt = tcnt; + } + } + } + /* * Quit if no analyzable columns */ - if (attr_cnt <= 0) + if (attr_cnt <= 0 && !analyzableindex) { + vac_close_indexes(nindexes, Irel); relation_close(onerel, AccessShareLock); return; } @@ -221,6 +312,16 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt) if (targrows < vacattrstats[i]->minrows) targrows = vacattrstats[i]->minrows; } + for (ind = 0; ind < nindexes; ind++) + { + AnlIndexData *thisdata = &indexdata[ind]; + + for (i = 0; i < thisdata->attr_cnt; i++) + { + if (targrows < thisdata->vacattrstats[i]->minrows) + targrows = thisdata->vacattrstats[i]->minrows; + } + } /* * Acquire the sample rows @@ -228,19 +329,6 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt) rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple)); numrows = acquire_sample_rows(onerel, rows, targrows, &totalrows); - /* - * If we are running a standalone ANALYZE, update pages/tuples stats - * in pg_class. We have the accurate page count from heap_beginscan, - * but only an approximate number of tuples; therefore, if we are part - * of VACUUM ANALYZE do *not* overwrite the accurate count already - * inserted by VACUUM. - */ - if (!vacstmt->vacuum) - vac_update_relstats(RelationGetRelid(onerel), - onerel->rd_nblocks, - totalrows, - RelationGetForm(onerel)->relhasindex); - /* * Compute the statistics. Temporary results during the calculations * for each column are stored in a child context. The calc routines @@ -258,6 +346,7 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt) ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); old_context = MemoryContextSwitchTo(col_context); + for (i = 0; i < attr_cnt; i++) { VacAttrStats *stats = vacattrstats[i]; @@ -270,6 +359,13 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt) totalrows); MemoryContextResetAndDeleteChildren(col_context); } + + if (hasindex) + compute_index_stats(onerel, totalrows, + indexdata, nindexes, + rows, numrows, + col_context); + MemoryContextSwitchTo(old_context); MemoryContextDelete(col_context); @@ -280,8 +376,45 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt) * them alone.) */ update_attstats(relid, attr_cnt, vacattrstats); + + for (ind = 0; ind < nindexes; ind++) + { + AnlIndexData *thisdata = &indexdata[ind]; + + update_attstats(RelationGetRelid(Irel[ind]), + thisdata->attr_cnt, thisdata->vacattrstats); + } } + /* + * If we are running a standalone ANALYZE, update pages/tuples stats + * in pg_class. We have the accurate page count from heap_beginscan, + * but only an approximate number of tuples; therefore, if we are part + * of VACUUM ANALYZE do *not* overwrite the accurate count already + * inserted by VACUUM. The same consideration applies to indexes. + */ + if (!vacstmt->vacuum) + { + vac_update_relstats(RelationGetRelid(onerel), + onerel->rd_nblocks, + totalrows, + hasindex); + for (ind = 0; ind < nindexes; ind++) + { + AnlIndexData *thisdata = &indexdata[ind]; + double totalindexrows; + + totalindexrows = ceil(thisdata->tupleFract * totalrows); + vac_update_relstats(RelationGetRelid(Irel[ind]), + RelationGetNumberOfBlocks(Irel[ind]), + totalindexrows, + false); + } + } + + /* Done with indexes */ + vac_close_indexes(nindexes, Irel); + /* * Close source relation now, but keep lock so that no one deletes it * before we commit. (If someone did, they'd fail to clean up the @@ -290,6 +423,160 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt) relation_close(onerel, NoLock); } +/* + * Compute statistics about indexes of a relation + */ +static void +compute_index_stats(Relation onerel, double totalrows, + AnlIndexData *indexdata, int nindexes, + HeapTuple *rows, int numrows, + MemoryContext col_context) +{ + MemoryContext ind_context, + old_context; + TupleDesc heapDescriptor; + Datum attdata[INDEX_MAX_KEYS]; + char nulls[INDEX_MAX_KEYS]; + int ind, + i; + + heapDescriptor = RelationGetDescr(onerel); + + ind_context = AllocSetContextCreate(anl_context, + "Analyze Index", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + old_context = MemoryContextSwitchTo(ind_context); + + for (ind = 0; ind < nindexes; ind++) + { + AnlIndexData *thisdata = &indexdata[ind]; + IndexInfo *indexInfo = thisdata->indexInfo; + int attr_cnt = thisdata->attr_cnt; + TupleTable tupleTable; + TupleTableSlot *slot; + EState *estate; + ExprContext *econtext; + List *predicate; + Datum *exprvals; + bool *exprnulls; + int numindexrows, + tcnt, + rowno; + double totalindexrows; + + /* Ignore index if no columns to analyze and not partial */ + if (attr_cnt == 0 && indexInfo->ii_Predicate == NIL) + continue; + + /* + * Need an EState for evaluation of index expressions and + * partial-index predicates. Create it in the per-index context + * to be sure it gets cleaned up at the bottom of the loop. + */ + estate = CreateExecutorState(); + econtext = GetPerTupleExprContext(estate); + /* Need a slot to hold the current heap tuple, too */ + tupleTable = ExecCreateTupleTable(1); + slot = ExecAllocTableSlot(tupleTable); + ExecSetSlotDescriptor(slot, heapDescriptor, false); + + /* Arrange for econtext's scan tuple to be the tuple under test */ + econtext->ecxt_scantuple = slot; + + /* Set up execution state for predicate. */ + predicate = (List *) + ExecPrepareExpr((Expr *) indexInfo->ii_Predicate, + estate); + + /* Compute and save index expression values */ + exprvals = (Datum *) palloc((numrows * attr_cnt + 1) * sizeof(Datum)); + exprnulls = (bool *) palloc((numrows * attr_cnt + 1) * sizeof(bool)); + numindexrows = 0; + tcnt = 0; + for (rowno = 0; rowno < numrows; rowno++) + { + HeapTuple heapTuple = rows[rowno]; + + /* Set up for predicate or expression evaluation */ + ExecStoreTuple(heapTuple, slot, InvalidBuffer, false); + + /* If index is partial, check predicate */ + if (predicate != NIL) + { + if (!ExecQual(predicate, econtext, false)) + continue; + } + numindexrows++; + + if (attr_cnt > 0) + { + /* + * Evaluate the index row to compute expression values. + * We could do this by hand, but FormIndexDatum is convenient. + */ + FormIndexDatum(indexInfo, + heapTuple, + heapDescriptor, + estate, + attdata, + nulls); + /* + * Save just the columns we care about. + */ + for (i = 0; i < attr_cnt; i++) + { + VacAttrStats *stats = thisdata->vacattrstats[i]; + int attnum = stats->attr->attnum; + + exprvals[tcnt] = attdata[attnum-1]; + exprnulls[tcnt] = (nulls[attnum-1] == 'n'); + tcnt++; + } + } + } + + /* + * Having counted the number of rows that pass the predicate in + * the sample, we can estimate the total number of rows in the index. + */ + thisdata->tupleFract = (double) numindexrows / (double) numrows; + totalindexrows = ceil(thisdata->tupleFract * totalrows); + + /* + * Now we can compute the statistics for the expression columns. + */ + if (numindexrows > 0) + { + MemoryContextSwitchTo(col_context); + for (i = 0; i < attr_cnt; i++) + { + VacAttrStats *stats = thisdata->vacattrstats[i]; + + stats->exprvals = exprvals + i; + stats->exprnulls = exprnulls + i; + stats->rowstride = attr_cnt; + (*stats->compute_stats) (stats, + ind_fetch_func, + numindexrows, + totalindexrows); + MemoryContextResetAndDeleteChildren(col_context); + } + } + + /* And clean up */ + MemoryContextSwitchTo(ind_context); + + ExecDropTupleTable(tupleTable, true); + FreeExecutorState(estate); + MemoryContextResetAndDeleteChildren(ind_context); + } + + MemoryContextSwitchTo(old_context); + MemoryContextDelete(ind_context); +} + /* * examine_attribute -- pre-analysis of a single column * @@ -746,6 +1033,9 @@ update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats) Relation sd; int attno; + if (natts <= 0) + return; /* nothing to do */ + sd = heap_openr(StatisticRelationName, RowExclusiveLock); for (attno = 0; attno < natts; attno++) @@ -880,6 +1170,23 @@ std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull) return heap_getattr(tuple, attnum, tupDesc, isNull); } +/* + * Fetch function for analyzing index expressions. + * + * We have not bothered to construct index tuples, instead the data is + * just in Datum arrays. + */ +static Datum +ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull) +{ + int i; + + /* exprvals and exprnulls are already offset for proper column */ + i = rownum * stats->rowstride; + *isNull = stats->exprnulls[i]; + return stats->exprvals[i]; +} + /*========================================================================== * diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 6fadd0d4e1..3b87e6223b 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.98 2004/02/10 01:55:25 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.99 2004/02/15 21:01:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2258,13 +2258,19 @@ AlterTableAlterColumnFlags(Oid myrelid, bool recurse, HeapTuple tuple; Form_pg_attribute attrtuple; - rel = heap_open(myrelid, AccessExclusiveLock); + rel = relation_open(myrelid, AccessExclusiveLock); + /* + * Allow index for statistics case only + */ if (rel->rd_rel->relkind != RELKIND_RELATION) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("\"%s\" is not a table", - RelationGetRelationName(rel)))); + { + if (rel->rd_rel->relkind != RELKIND_INDEX || *flagType != 'S') + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a table", + RelationGetRelationName(rel)))); + } /* Permissions checks */ if (!pg_class_ownercheck(myrelid, GetUserId())) @@ -2339,7 +2345,7 @@ AlterTableAlterColumnFlags(Oid myrelid, bool recurse, /* * Propagate to children if desired */ - if (recurse) + if (recurse && rel->rd_rel->relkind == RELKIND_RELATION) { List *child, *children; diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h index b14d142752..741b0a129e 100644 --- a/src/include/catalog/heap.h +++ b/src/include/catalog/heap.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/heap.h,v 1.63 2003/11/29 22:40:58 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/catalog/heap.h,v 1.64 2004/02/15 21:01:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -69,6 +69,7 @@ extern void RemoveAttributeById(Oid relid, AttrNumber attnum); extern void RemoveAttrDefault(Oid relid, AttrNumber attnum, DropBehavior behavior, bool complain); extern void RemoveAttrDefaultById(Oid attrdefId); +extern void RemoveStatistics(Relation rel, AttrNumber attnum); extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno, bool relhasoids); diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 8c58f1ac38..9abb09948f 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.50 2004/02/13 06:39:49 tgl Exp $ + * $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.51 2004/02/15 21:01:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -106,8 +106,11 @@ typedef struct VacAttrStats * be looked at by type-specific functions. */ int tupattnum; /* attribute number within tuples */ - HeapTuple *rows; /* access info for fetch function */ + HeapTuple *rows; /* access info for std fetch function */ TupleDesc tupDesc; + Datum *exprvals; /* access info for index fetch function */ + bool *exprnulls; + int rowstride; } VacAttrStats;