From cea49fe82fedcf125eb99a780099eaf47a326b03 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 6 Apr 2012 16:04:10 -0400 Subject: [PATCH] Dept of second thoughts: improve the API for AnalyzeForeignTable. If we make the initially-called function return the table physical-size estimate, acquire_inherited_sample_rows will be able to use that to allocate numbers of samples among child tables, when the day comes that we want to support foreign tables in inheritance trees. --- contrib/file_fdw/file_fdw.c | 69 ++++++++++++++----------- doc/src/sgml/fdwhandler.sgml | 28 +++++----- src/backend/commands/analyze.c | 94 +++++++++++++++------------------- src/include/foreign/fdwapi.h | 8 +-- 4 files changed, 101 insertions(+), 98 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index 30ed9fbad1..2d36a72e9f 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -125,7 +125,9 @@ static void fileBeginForeignScan(ForeignScanState *node, int eflags); static TupleTableSlot *fileIterateForeignScan(ForeignScanState *node); static void fileReScanForeignScan(ForeignScanState *node); static void fileEndForeignScan(ForeignScanState *node); -static AcquireSampleRowsFunc fileAnalyzeForeignTable(Relation relation); +static bool fileAnalyzeForeignTable(Relation relation, + AcquireSampleRowsFunc *func, + BlockNumber *totalpages); /* * Helper functions @@ -141,8 +143,7 @@ static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel, Cost *startup_cost, Cost *total_cost); static int file_acquire_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows, - BlockNumber *totalpages); + double *totalrows, double *totaldeadrows); /* @@ -656,10 +657,39 @@ fileEndForeignScan(ForeignScanState *node) * fileAnalyzeForeignTable * Test whether analyzing this foreign table is supported */ -static AcquireSampleRowsFunc -fileAnalyzeForeignTable(Relation relation) +static bool +fileAnalyzeForeignTable(Relation relation, + AcquireSampleRowsFunc *func, + BlockNumber *totalpages) { - return file_acquire_sample_rows; + char *filename; + List *options; + struct stat stat_buf; + + /* Fetch options of foreign table */ + fileGetOptions(RelationGetRelid(relation), &filename, &options); + + /* + * Get size of the file. (XXX if we fail here, would it be better to + * just return false to skip analyzing the table?) + */ + if (stat(filename, &stat_buf) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + filename))); + + /* + * Convert size to pages. Must return at least 1 so that we can tell + * later on that pg_class.relpages is not default. + */ + *totalpages = (stat_buf.st_size + (BLCKSZ - 1)) / BLCKSZ; + if (*totalpages < 1) + *totalpages = 1; + + *func = file_acquire_sample_rows; + + return true; } /* @@ -780,8 +810,7 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel, * which must have at least targrows entries. * The actual number of rows selected is returned as the function result. * We also count the total number of rows in the file and return it into - * *totalrows, and return the file's physical size in *totalpages. - * Note that *totaldeadrows is always set to 0. + * *totalrows. Note that *totaldeadrows is always set to 0. * * Note that the returned list of rows is not always in order by physical * position in the file. Therefore, correlation estimates derived later @@ -791,8 +820,7 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel, static int file_acquire_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows, - BlockNumber *totalpages) + double *totalrows, double *totaldeadrows) { int numrows = 0; double rowstoskip = -1; /* -1 means not set yet */ @@ -802,7 +830,6 @@ file_acquire_sample_rows(Relation onerel, int elevel, bool *nulls; bool found; char *filename; - struct stat stat_buf; List *options; CopyState cstate; ErrorContextCallback errcontext; @@ -819,22 +846,6 @@ file_acquire_sample_rows(Relation onerel, int elevel, /* Fetch options of foreign table */ fileGetOptions(RelationGetRelid(onerel), &filename, &options); - /* - * Get size of the file. - */ - if (stat(filename, &stat_buf) < 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", - filename))); - - /* - * Convert size to pages for use in I/O cost estimate. - */ - *totalpages = (stat_buf.st_size + (BLCKSZ - 1)) / BLCKSZ; - if (*totalpages < 1) - *totalpages = 1; - /* * Create CopyState from FDW options. */ @@ -931,10 +942,10 @@ file_acquire_sample_rows(Relation onerel, int elevel, * Emit some interesting relation info */ ereport(elevel, - (errmsg("\"%s\": scanned %u pages containing %.0f rows; " + (errmsg("\"%s\": file contains %.0f rows; " "%d rows in sample", RelationGetRelationName(onerel), - *totalpages, *totalrows, numrows))); + *totalrows, numrows))); return numrows; } diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml index 8e7525ab59..2c75b80b30 100644 --- a/doc/src/sgml/fdwhandler.sgml +++ b/doc/src/sgml/fdwhandler.sgml @@ -279,15 +279,19 @@ EndForeignScan (ForeignScanState *node); -AcquireSampleRowsFunc -AnalyzeForeignTable (Relation relation); +bool +AnalyzeForeignTable (Relation relation, + AcquireSampleRowsFunc *func, + BlockNumber *totalpages); This function is called when is executed on - a foreign table. If the FDW supports collecting statistics for this - foreign table, it should return a pointer to a function that will collect - sample rows from the table. Otherwise, return NULL. If the - FDW does not support collecting statistics for any tables, the + a foreign table. If the FDW can collect statistics for this + foreign table, it should return true, and provide a pointer + to a function that will collect sample rows from the table in + func, plus the estimated size of the table in pages in + totalpages. Otherwise, return false. + If the FDW does not support collecting statistics for any tables, the AnalyzeForeignTable pointer can be set to NULL. @@ -298,18 +302,16 @@ int AcquireSampleRowsFunc (Relation relation, int elevel, HeapTuple *rows, int targrows, double *totalrows, - double *totaldeadrows, - BlockNumber *totalpages); + double *totaldeadrows); A random sample of up to targrows rows should be collected from the table and stored into the caller-provided rows array. The actual number of rows collected must be returned. In - addition, store estimates of the total numbers of live rows, dead rows, - and pages in the table into the output parameters - totalrows, totaldeadrows, and - totalpages. These numbers will be recorded in the table's - pg_class entry for future use. + addition, store estimates of the total numbers of live and dead rows in + the table into the output parameters totalrows and + totaldeadrows. (Set totaldeadrows to zero + if the FDW does not have any concept of dead rows.) diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 17abe48f25..ff271644e0 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -84,7 +84,8 @@ static BufferAccessStrategy vac_strategy; static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, - AcquireSampleRowsFunc acquirefunc, bool inh, int elevel); + AcquireSampleRowsFunc acquirefunc, BlockNumber relpages, + bool inh, int elevel); static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize); static bool BlockSampler_HasMore(BlockSampler bs); @@ -97,8 +98,7 @@ static VacAttrStats *examine_attribute(Relation onerel, int attnum, Node *index_expr); static int acquire_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows, - BlockNumber *totalpages); + double *totalrows, double *totaldeadrows); static int compare_rows(const void *a, const void *b); static int acquire_inherited_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, @@ -117,7 +117,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy) { Relation onerel; int elevel; - AcquireSampleRowsFunc acquirefunc; + AcquireSampleRowsFunc acquirefunc = NULL; + BlockNumber relpages = 0; /* Select logging level */ if (vacstmt->options & VACOPT_VERBOSE) @@ -182,6 +183,27 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy) return; } + /* + * Silently ignore tables that are temp tables of other backends --- + * trying to analyze these is rather pointless, since their contents are + * probably not up-to-date on disk. (We don't throw a warning here; it + * would just lead to chatter during a database-wide ANALYZE.) + */ + if (RELATION_IS_OTHER_TEMP(onerel)) + { + relation_close(onerel, ShareUpdateExclusiveLock); + return; + } + + /* + * We can ANALYZE any table except pg_statistic. See update_attstats + */ + if (RelationGetRelid(onerel) == StatisticRelationId) + { + relation_close(onerel, ShareUpdateExclusiveLock); + return; + } + /* * Check that it's a plain table or foreign table; we used to do this * in get_rel_oids() but seems safer to check after we've locked the @@ -191,6 +213,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy) { /* Regular table, so we'll use the regular row acquisition function */ acquirefunc = acquire_sample_rows; + /* Also get regular table's size */ + relpages = RelationGetNumberOfBlocks(onerel); } else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) { @@ -199,15 +223,16 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy) * supports analysis. */ FdwRoutine *fdwroutine; + bool ok = false; fdwroutine = GetFdwRoutineByRelId(RelationGetRelid(onerel)); if (fdwroutine->AnalyzeForeignTable != NULL) - acquirefunc = fdwroutine->AnalyzeForeignTable(onerel); - else - acquirefunc = NULL; + ok = fdwroutine->AnalyzeForeignTable(onerel, + &acquirefunc, + &relpages); - if (acquirefunc == NULL) + if (!ok) { ereport(WARNING, (errmsg("skipping \"%s\" --- cannot analyze this foreign table", @@ -227,27 +252,6 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy) return; } - /* - * Silently ignore tables that are temp tables of other backends --- - * trying to analyze these is rather pointless, since their contents are - * probably not up-to-date on disk. (We don't throw a warning here; it - * would just lead to chatter during a database-wide ANALYZE.) - */ - if (RELATION_IS_OTHER_TEMP(onerel)) - { - relation_close(onerel, ShareUpdateExclusiveLock); - return; - } - - /* - * We can ANALYZE any table except pg_statistic. See update_attstats - */ - if (RelationGetRelid(onerel) == StatisticRelationId) - { - relation_close(onerel, ShareUpdateExclusiveLock); - return; - } - /* * OK, let's do it. First let other backends know I'm in ANALYZE. */ @@ -258,13 +262,13 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy) /* * Do the normal non-recursive ANALYZE. */ - do_analyze_rel(onerel, vacstmt, acquirefunc, false, elevel); + do_analyze_rel(onerel, vacstmt, acquirefunc, relpages, false, elevel); /* * If there are child tables, do recursive ANALYZE. */ if (onerel->rd_rel->relhassubclass) - do_analyze_rel(onerel, vacstmt, acquirefunc, true, elevel); + do_analyze_rel(onerel, vacstmt, acquirefunc, relpages, true, elevel); /* * Close source relation now, but keep lock so that no one deletes it @@ -293,7 +297,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy) */ static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, - AcquireSampleRowsFunc acquirefunc, bool inh, int elevel) + AcquireSampleRowsFunc acquirefunc, BlockNumber relpages, + bool inh, int elevel) { int attr_cnt, tcnt, @@ -308,7 +313,6 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, numrows; double totalrows, totaldeadrows; - BlockNumber totalpages; HeapTuple *rows; PGRUsage ru0; TimestampTz starttime = 0; @@ -485,17 +489,13 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, */ rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple)); if (inh) - { numrows = acquire_inherited_sample_rows(onerel, elevel, rows, targrows, &totalrows, &totaldeadrows); - totalpages = 0; /* not needed in this path */ - } else numrows = (*acquirefunc) (onerel, elevel, rows, targrows, - &totalrows, &totaldeadrows, - &totalpages); + &totalrows, &totaldeadrows); /* * Compute the statistics. Temporary results during the calculations for @@ -576,7 +576,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, */ if (!inh) vac_update_relstats(onerel, - totalpages, + relpages, totalrows, visibilitymap_count(onerel), hasindex, @@ -1032,7 +1032,6 @@ BlockSampler_Next(BlockSampler bs) * The actual number of rows selected is returned as the function result. * We also estimate the total numbers of live and dead rows in the table, * and return them into *totalrows and *totaldeadrows, respectively. - * Also, the number of pages in the relation is returned into *totalpages. * * The returned list of tuples is in order by physical position in the table. * (We will rely on this later to derive correlation estimates.) @@ -1061,8 +1060,7 @@ BlockSampler_Next(BlockSampler bs) static int acquire_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows, - BlockNumber *totalpages) + double *totalrows, double *totaldeadrows) { int numrows = 0; /* # rows now in reservoir */ double samplerows = 0; /* total # rows collected */ @@ -1077,7 +1075,6 @@ acquire_sample_rows(Relation onerel, int elevel, Assert(targrows > 0); totalblocks = RelationGetNumberOfBlocks(onerel); - *totalpages = totalblocks; /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */ OldestXmin = GetOldestXmin(onerel->rd_rel->relisshared, true); @@ -1438,7 +1435,7 @@ compare_rows(const void *a, const void *b) /* * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree * - * This has largely the same API as acquire_sample_rows, except that rows are + * This has the same API as acquire_sample_rows, except that rows are * collected from all inheritance children as well as the specified table. * We fail and return zero if there are no inheritance children. */ @@ -1481,11 +1478,6 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, /* * Count the blocks in all the relations. The result could overflow * BlockNumber, so we use double arithmetic. - * - * XXX eventually we will probably want to allow child tables that are - * foreign tables. Since we can't do RelationGetNumberOfBlocks on a - * foreign table, it's not very clear what fraction of the total to assign - * to it here. */ rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation)); relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double)); @@ -1540,7 +1532,6 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, int childrows; double trows, tdrows; - BlockNumber tpages; /* Fetch a random sample of the child's rows */ childrows = acquire_sample_rows(childrel, @@ -1548,8 +1539,7 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, rows + numrows, childtargrows, &trows, - &tdrows, - &tpages); + &tdrows); /* We may need to convert from child's rowtype to parent's */ if (childrows > 0 && diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h index 6bf3a5e230..0a09c94932 100644 --- a/src/include/foreign/fdwapi.h +++ b/src/include/foreign/fdwapi.h @@ -53,11 +53,11 @@ typedef void (*EndForeignScan_function) (ForeignScanState *node); typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel, HeapTuple *rows, int targrows, double *totalrows, - double *totaldeadrows, - BlockNumber *totalpages); - -typedef AcquireSampleRowsFunc (*AnalyzeForeignTable_function) (Relation relation); + double *totaldeadrows); +typedef bool (*AnalyzeForeignTable_function) (Relation relation, + AcquireSampleRowsFunc *func, + BlockNumber *totalpages); /* * FdwRoutine is the struct returned by a foreign-data wrapper's handler -- 2.40.0