]> granicus.if.org Git - postgresql/commitdiff
Dept of second thoughts: improve the API for AnalyzeForeignTable.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 6 Apr 2012 20:04:10 +0000 (16:04 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 6 Apr 2012 20:04:10 +0000 (16:04 -0400)
If we make the initially-called function return the table physical-size
estimate, acquire_inherited_sample_rows will be able to use that to
allocate numbers of samples among child tables, when the day comes that
we want to support foreign tables in inheritance trees.

contrib/file_fdw/file_fdw.c
doc/src/sgml/fdwhandler.sgml
src/backend/commands/analyze.c
src/include/foreign/fdwapi.h

index 30ed9fbad14d57c3be0bf591a4aaf28b092b49a1..2d36a72e9f299aa45f0f108e4431c83fe77faf26 100644 (file)
@@ -125,7 +125,9 @@ static void fileBeginForeignScan(ForeignScanState *node, int eflags);
 static TupleTableSlot *fileIterateForeignScan(ForeignScanState *node);
 static void fileReScanForeignScan(ForeignScanState *node);
 static void fileEndForeignScan(ForeignScanState *node);
-static AcquireSampleRowsFunc fileAnalyzeForeignTable(Relation relation);
+static bool fileAnalyzeForeignTable(Relation relation,
+                                               AcquireSampleRowsFunc *func,
+                                               BlockNumber *totalpages);
 
 /*
  * Helper functions
@@ -141,8 +143,7 @@ static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
                           Cost *startup_cost, Cost *total_cost);
 static int     file_acquire_sample_rows(Relation onerel, int elevel,
                                                 HeapTuple *rows, int targrows,
-                                                double *totalrows, double *totaldeadrows,
-                                                BlockNumber *totalpages);
+                                                double *totalrows, double *totaldeadrows);
 
 
 /*
@@ -656,10 +657,39 @@ fileEndForeignScan(ForeignScanState *node)
  * fileAnalyzeForeignTable
  *             Test whether analyzing this foreign table is supported
  */
-static AcquireSampleRowsFunc
-fileAnalyzeForeignTable(Relation relation)
+static bool
+fileAnalyzeForeignTable(Relation relation,
+                                               AcquireSampleRowsFunc *func,
+                                               BlockNumber *totalpages)
 {
-       return file_acquire_sample_rows;
+       char       *filename;
+       List       *options;
+       struct stat     stat_buf;
+
+       /* Fetch options of foreign table */
+       fileGetOptions(RelationGetRelid(relation), &filename, &options);
+
+       /*
+        * Get size of the file.  (XXX if we fail here, would it be better to
+        * just return false to skip analyzing the table?)
+        */
+       if (stat(filename, &stat_buf) < 0)
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not stat file \"%s\": %m",
+                                               filename)));
+
+       /*
+        * Convert size to pages.  Must return at least 1 so that we can tell
+        * later on that pg_class.relpages is not default.
+        */
+       *totalpages = (stat_buf.st_size + (BLCKSZ - 1)) / BLCKSZ;
+       if (*totalpages < 1)
+               *totalpages = 1;
+
+       *func = file_acquire_sample_rows;
+
+       return true;
 }
 
 /*
@@ -780,8 +810,7 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
  * which must have at least targrows entries.
  * The actual number of rows selected is returned as the function result.
  * We also count the total number of rows in the file and return it into
- * *totalrows, and return the file's physical size in *totalpages.
- * Note that *totaldeadrows is always set to 0.
+ * *totalrows.  Note that *totaldeadrows is always set to 0.
  *
  * Note that the returned list of rows is not always in order by physical
  * position in the file.  Therefore, correlation estimates derived later
@@ -791,8 +820,7 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
 static int
 file_acquire_sample_rows(Relation onerel, int elevel,
                                                 HeapTuple *rows, int targrows,
-                                                double *totalrows, double *totaldeadrows,
-                                                BlockNumber *totalpages)
+                                                double *totalrows, double *totaldeadrows)
 {
        int                     numrows = 0;
        double          rowstoskip = -1; /* -1 means not set yet */
@@ -802,7 +830,6 @@ file_acquire_sample_rows(Relation onerel, int elevel,
        bool       *nulls;
        bool            found;
        char       *filename;
-       struct stat     stat_buf;
        List       *options;
        CopyState       cstate;
        ErrorContextCallback errcontext;
@@ -819,22 +846,6 @@ file_acquire_sample_rows(Relation onerel, int elevel,
        /* Fetch options of foreign table */
        fileGetOptions(RelationGetRelid(onerel), &filename, &options);
 
-       /*
-        * Get size of the file.
-        */
-       if (stat(filename, &stat_buf) < 0)
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not stat file \"%s\": %m",
-                                               filename)));
-
-       /*
-        * Convert size to pages for use in I/O cost estimate.
-        */
-       *totalpages = (stat_buf.st_size + (BLCKSZ - 1)) / BLCKSZ;
-       if (*totalpages < 1)
-               *totalpages = 1;
-
        /*
         * Create CopyState from FDW options.
         */
@@ -931,10 +942,10 @@ file_acquire_sample_rows(Relation onerel, int elevel,
         * Emit some interesting relation info
         */
        ereport(elevel,
-                       (errmsg("\"%s\": scanned %u pages containing %.0f rows; "
+                       (errmsg("\"%s\": file contains %.0f rows; "
                                        "%d rows in sample",
                                        RelationGetRelationName(onerel),
-                                       *totalpages, *totalrows, numrows)));
+                                       *totalrows, numrows)));
 
        return numrows;
 }
index 8e7525ab596cf3a16ac1c1a234cf8b80ad235c14..2c75b80b303e4a0899e778348082e4620196feea 100644 (file)
@@ -279,15 +279,19 @@ EndForeignScan (ForeignScanState *node);
 
     <para>
 <programlisting>
-AcquireSampleRowsFunc
-AnalyzeForeignTable (Relation relation);
+bool
+AnalyzeForeignTable (Relation relation,
+                     AcquireSampleRowsFunc *func,
+                     BlockNumber *totalpages);
 </programlisting>
 
      This function is called when <xref linkend="sql-analyze"> is executed on
-     a foreign table.  If the FDW supports collecting statistics for this
-     foreign table, it should return a pointer to a function that will collect
-     sample rows from the table.  Otherwise, return <literal>NULL</>.  If the
-     FDW does not support collecting statistics for any tables, the
+     a foreign table.  If the FDW can collect statistics for this
+     foreign table, it should return <literal>true</>, and provide a pointer
+     to a function that will collect sample rows from the table in
+     <parameter>func</>, plus the estimated size of the table in pages in
+     <parameter>totalpages</>.  Otherwise, return <literal>false</>.
+     If the FDW does not support collecting statistics for any tables, the
      <function>AnalyzeForeignTable</> pointer can be set to <literal>NULL</>.
     </para>
 
@@ -298,18 +302,16 @@ int
 AcquireSampleRowsFunc (Relation relation, int elevel,
                        HeapTuple *rows, int targrows,
                        double *totalrows,
-                       double *totaldeadrows,
-                       BlockNumber *totalpages);
+                       double *totaldeadrows);
 </programlisting>
 
      A random sample of up to <parameter>targrows</> rows should be collected
      from the table and stored into the caller-provided <parameter>rows</>
      array.  The actual number of rows collected must be returned.  In
-     addition, store estimates of the total numbers of live rows, dead rows,
-     and pages in the table into the output parameters
-     <parameter>totalrows</>, <parameter>totaldeadrows</>, and
-     <parameter>totalpages</>.  These numbers will be recorded in the table's
-     <structname>pg_class</> entry for future use.
+     addition, store estimates of the total numbers of live and dead rows in
+     the table into the output parameters <parameter>totalrows</> and
+     <parameter>totaldeadrows</>.  (Set <parameter>totaldeadrows</> to zero
+     if the FDW does not have any concept of dead rows.)
     </para>
 
     <para>
index 17abe48f25e10a746a6df84ff502feb271fe82d9..ff271644e0f93ee99bfe9c1f536f3dd48455d8d2 100644 (file)
@@ -84,7 +84,8 @@ static BufferAccessStrategy vac_strategy;
 
 
 static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
-                          AcquireSampleRowsFunc acquirefunc, bool inh, int elevel);
+                          AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
+                          bool inh, int elevel);
 static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
                                  int samplesize);
 static bool BlockSampler_HasMore(BlockSampler bs);
@@ -97,8 +98,7 @@ static VacAttrStats *examine_attribute(Relation onerel, int attnum,
                                  Node *index_expr);
 static int     acquire_sample_rows(Relation onerel, int elevel,
                                        HeapTuple *rows, int targrows,
-                                       double *totalrows, double *totaldeadrows,
-                                       BlockNumber *totalpages);
+                                       double *totalrows, double *totaldeadrows);
 static int     compare_rows(const void *a, const void *b);
 static int     acquire_inherited_sample_rows(Relation onerel, int elevel,
                                                          HeapTuple *rows, int targrows,
@@ -117,7 +117,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
 {
        Relation        onerel;
        int                     elevel;
-       AcquireSampleRowsFunc acquirefunc;
+       AcquireSampleRowsFunc acquirefunc = NULL;
+       BlockNumber     relpages = 0;
 
        /* Select logging level */
        if (vacstmt->options & VACOPT_VERBOSE)
@@ -182,6 +183,27 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
                return;
        }
 
+       /*
+        * Silently ignore tables that are temp tables of other backends ---
+        * trying to analyze these is rather pointless, since their contents are
+        * probably not up-to-date on disk.  (We don't throw a warning here; it
+        * would just lead to chatter during a database-wide ANALYZE.)
+        */
+       if (RELATION_IS_OTHER_TEMP(onerel))
+       {
+               relation_close(onerel, ShareUpdateExclusiveLock);
+               return;
+       }
+
+       /*
+        * We can ANALYZE any table except pg_statistic. See update_attstats
+        */
+       if (RelationGetRelid(onerel) == StatisticRelationId)
+       {
+               relation_close(onerel, ShareUpdateExclusiveLock);
+               return;
+       }
+
        /*
         * Check that it's a plain table or foreign table; we used to do this
         * in get_rel_oids() but seems safer to check after we've locked the
@@ -191,6 +213,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
        {
                /* Regular table, so we'll use the regular row acquisition function */
                acquirefunc = acquire_sample_rows;
+               /* Also get regular table's size */
+               relpages = RelationGetNumberOfBlocks(onerel);
        }
        else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
        {
@@ -199,15 +223,16 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
                 * supports analysis.
                 */
                FdwRoutine *fdwroutine;
+               bool            ok = false;
 
                fdwroutine = GetFdwRoutineByRelId(RelationGetRelid(onerel));
 
                if (fdwroutine->AnalyzeForeignTable != NULL)
-                       acquirefunc = fdwroutine->AnalyzeForeignTable(onerel);
-               else
-                       acquirefunc = NULL;
+                       ok = fdwroutine->AnalyzeForeignTable(onerel,
+                                                                                                &acquirefunc,
+                                                                                                &relpages);
 
-               if (acquirefunc == NULL)
+               if (!ok)
                {
                        ereport(WARNING,
                                        (errmsg("skipping \"%s\" --- cannot analyze this foreign table",
@@ -227,27 +252,6 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
                return;
        }
 
-       /*
-        * Silently ignore tables that are temp tables of other backends ---
-        * trying to analyze these is rather pointless, since their contents are
-        * probably not up-to-date on disk.  (We don't throw a warning here; it
-        * would just lead to chatter during a database-wide ANALYZE.)
-        */
-       if (RELATION_IS_OTHER_TEMP(onerel))
-       {
-               relation_close(onerel, ShareUpdateExclusiveLock);
-               return;
-       }
-
-       /*
-        * We can ANALYZE any table except pg_statistic. See update_attstats
-        */
-       if (RelationGetRelid(onerel) == StatisticRelationId)
-       {
-               relation_close(onerel, ShareUpdateExclusiveLock);
-               return;
-       }
-
        /*
         * OK, let's do it.  First let other backends know I'm in ANALYZE.
         */
@@ -258,13 +262,13 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
        /*
         * Do the normal non-recursive ANALYZE.
         */
-       do_analyze_rel(onerel, vacstmt, acquirefunc, false, elevel);
+       do_analyze_rel(onerel, vacstmt, acquirefunc, relpages, false, elevel);
 
        /*
         * If there are child tables, do recursive ANALYZE.
         */
        if (onerel->rd_rel->relhassubclass)
-               do_analyze_rel(onerel, vacstmt, acquirefunc, true, elevel);
+               do_analyze_rel(onerel, vacstmt, acquirefunc, relpages, true, elevel);
 
        /*
         * Close source relation now, but keep lock so that no one deletes it
@@ -293,7 +297,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
  */
 static void
 do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
-                          AcquireSampleRowsFunc acquirefunc, bool inh, int elevel)
+                          AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
+                          bool inh, int elevel)
 {
        int                     attr_cnt,
                                tcnt,
@@ -308,7 +313,6 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
                                numrows;
        double          totalrows,
                                totaldeadrows;
-       BlockNumber totalpages;
        HeapTuple  *rows;
        PGRUsage        ru0;
        TimestampTz starttime = 0;
@@ -485,17 +489,13 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
         */
        rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
        if (inh)
-       {
                numrows = acquire_inherited_sample_rows(onerel, elevel,
                                                                                                rows, targrows,
                                                                                                &totalrows, &totaldeadrows);
-               totalpages = 0;                 /* not needed in this path */
-       }
        else
                numrows = (*acquirefunc) (onerel, elevel,
                                                                  rows, targrows,
-                                                                 &totalrows, &totaldeadrows,
-                                                                 &totalpages);
+                                                                 &totalrows, &totaldeadrows);
 
        /*
         * Compute the statistics.      Temporary results during the calculations for
@@ -576,7 +576,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
         */
        if (!inh)
                vac_update_relstats(onerel,
-                                                       totalpages,
+                                                       relpages,
                                                        totalrows,
                                                        visibilitymap_count(onerel),
                                                        hasindex,
@@ -1032,7 +1032,6 @@ BlockSampler_Next(BlockSampler bs)
  * The actual number of rows selected is returned as the function result.
  * We also estimate the total numbers of live and dead rows in the table,
  * and return them into *totalrows and *totaldeadrows, respectively.
- * Also, the number of pages in the relation is returned into *totalpages.
  *
  * The returned list of tuples is in order by physical position in the table.
  * (We will rely on this later to derive correlation estimates.)
@@ -1061,8 +1060,7 @@ BlockSampler_Next(BlockSampler bs)
 static int
 acquire_sample_rows(Relation onerel, int elevel,
                                        HeapTuple *rows, int targrows,
-                                       double *totalrows, double *totaldeadrows,
-                                       BlockNumber *totalpages)
+                                       double *totalrows, double *totaldeadrows)
 {
        int                     numrows = 0;    /* # rows now in reservoir */
        double          samplerows = 0; /* total # rows collected */
@@ -1077,7 +1075,6 @@ acquire_sample_rows(Relation onerel, int elevel,
        Assert(targrows > 0);
 
        totalblocks = RelationGetNumberOfBlocks(onerel);
-       *totalpages = totalblocks;
 
        /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
        OldestXmin = GetOldestXmin(onerel->rd_rel->relisshared, true);
@@ -1438,7 +1435,7 @@ compare_rows(const void *a, const void *b)
 /*
  * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree
  *
- * This has largely the same API as acquire_sample_rows, except that rows are
+ * This has the same API as acquire_sample_rows, except that rows are
  * collected from all inheritance children as well as the specified table.
  * We fail and return zero if there are no inheritance children.
  */
@@ -1481,11 +1478,6 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
        /*
         * Count the blocks in all the relations.  The result could overflow
         * BlockNumber, so we use double arithmetic.
-        *
-        * XXX eventually we will probably want to allow child tables that are
-        * foreign tables.  Since we can't do RelationGetNumberOfBlocks on a
-        * foreign table, it's not very clear what fraction of the total to assign
-        * to it here.
         */
        rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation));
        relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double));
@@ -1540,7 +1532,6 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
                                int                     childrows;
                                double          trows,
                                                        tdrows;
-                               BlockNumber     tpages;
 
                                /* Fetch a random sample of the child's rows */
                                childrows = acquire_sample_rows(childrel,
@@ -1548,8 +1539,7 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
                                                                                                rows + numrows,
                                                                                                childtargrows,
                                                                                                &trows,
-                                                                                               &tdrows,
-                                                                                               &tpages);
+                                                                                               &tdrows);
 
                                /* We may need to convert from child's rowtype to parent's */
                                if (childrows > 0 &&
index 6bf3a5e2306497cea066f67460b2e1a9e153731b..0a09c94932692171f78b14bfd0a80316bf4e53be 100644 (file)
@@ -53,11 +53,11 @@ typedef void (*EndForeignScan_function) (ForeignScanState *node);
 typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
                                                                          HeapTuple *rows, int targrows,
                                                                          double *totalrows,
-                                                                         double *totaldeadrows,
-                                                                         BlockNumber *totalpages);
-
-typedef AcquireSampleRowsFunc (*AnalyzeForeignTable_function) (Relation relation);
+                                                                         double *totaldeadrows);
 
+typedef bool (*AnalyzeForeignTable_function) (Relation relation,
+                                                                                         AcquireSampleRowsFunc *func,
+                                                                                         BlockNumber *totalpages);
 
 /*
  * FdwRoutine is the struct returned by a foreign-data wrapper's handler