]> granicus.if.org Git - postgresql/commitdiff
Improve planner's estimation of the size of an append relation: rather than
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 27 Jun 2008 03:56:55 +0000 (03:56 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 27 Jun 2008 03:56:55 +0000 (03:56 +0000)
taking the maximum of any child rel's width, we should weight the widths
proportionally to the number of rows expected from each child.  In hindsight
this is obviously correct because row width is really a proxy for the total
physical size of the relation.  Per discussion with Scott Carey (bug #4264).

src/backend/optimizer/path/allpaths.c
src/backend/optimizer/plan/createplan.c

index e6728f39fe49341ffa108e43101a81e1a3ff16e0..c8335868133ddaf1205691c0b5f2cfc5eb053613 100644 (file)
@@ -8,13 +8,15 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.170 2008/04/01 00:48:33 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.171 2008/06/27 03:56:55 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 #include "postgres.h"
 
+#include <math.h>
+
 #ifdef OPTIMIZER_DEBUG
 #include "nodes/print.h"
 #endif
@@ -263,6 +265,10 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 {
        int                     parentRTindex = rti;
        List       *subpaths = NIL;
+       double          parent_rows;
+       double          parent_size;
+       double     *parent_attrsizes;
+       int                     nattrs;
        ListCell   *l;
 
        /*
@@ -277,10 +283,23 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                 errmsg("SELECT FOR UPDATE/SHARE is not supported for inheritance queries")));
 
        /*
-        * Initialize to compute size estimates for whole append relation
+        * Initialize to compute size estimates for whole append relation.
+        *
+        * We handle width estimates by weighting the widths of different
+        * child rels proportionally to their number of rows.  This is sensible
+        * because the use of width estimates is mainly to compute the total
+        * relation "footprint" if we have to sort or hash it.  To do this,
+        * we sum the total equivalent size (in "double" arithmetic) and then
+        * divide by the total rowcount estimate.  This is done separately for
+        * the total rel width and each attribute.
+        *
+        * Note: if you consider changing this logic, beware that child rels could
+        * have zero rows and/or width, if they were excluded by constraints.
         */
-       rel->rows = 0;
-       rel->width = 0;
+       parent_rows = 0;
+       parent_size = 0;
+       nattrs = rel->max_attr - rel->min_attr + 1;
+       parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
 
        /*
         * Generate access paths for each member relation, and pick the cheapest
@@ -379,38 +398,53 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                        subpaths = lappend(subpaths, childpath);
 
                /*
-                * Propagate size information from the child back to the parent. For
-                * simplicity, we use the largest widths from any child as the parent
-                * estimates.  (If you want to change this, beware of child
-                * attr_widths[] entries that haven't been set and are still 0.)
+                * Accumulate size information from each child.
                 */
-               rel->rows += childrel->rows;
-               if (childrel->width > rel->width)
-                       rel->width = childrel->width;
-
-               forboth(parentvars, rel->reltargetlist,
-                               childvars, childrel->reltargetlist)
+               if (childrel->rows > 0)
                {
-                       Var                *parentvar = (Var *) lfirst(parentvars);
-                       Var                *childvar = (Var *) lfirst(childvars);
+                       parent_rows += childrel->rows;
+                       parent_size += childrel->width * childrel->rows;
 
-                       if (IsA(parentvar, Var) &&
-                               IsA(childvar, Var))
+                       forboth(parentvars, rel->reltargetlist,
+                                       childvars, childrel->reltargetlist)
                        {
-                               int                     pndx = parentvar->varattno - rel->min_attr;
-                               int                     cndx = childvar->varattno - childrel->min_attr;
+                               Var                *parentvar = (Var *) lfirst(parentvars);
+                               Var                *childvar = (Var *) lfirst(childvars);
 
-                               if (childrel->attr_widths[cndx] > rel->attr_widths[pndx])
-                                       rel->attr_widths[pndx] = childrel->attr_widths[cndx];
+                               if (IsA(parentvar, Var) &&
+                                       IsA(childvar, Var))
+                               {
+                                       int                     pndx = parentvar->varattno - rel->min_attr;
+                                       int                     cndx = childvar->varattno - childrel->min_attr;
+
+                                       parent_attrsizes[pndx] += childrel->attr_widths[cndx] * childrel->rows;
+                               }
                        }
                }
        }
 
+       /*
+        * Save the finished size estimates.
+        */
+       rel->rows = parent_rows;
+       if (parent_rows > 0)
+       {
+               int             i;
+
+               rel->width = rint(parent_size / parent_rows);
+               for (i = 0; i < nattrs; i++)
+                       rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows);
+       }
+       else
+               rel->width = 0;                 /* attr_widths should be zero already */
+
        /*
         * Set "raw tuples" count equal to "rows" for the appendrel; needed
         * because some places assume rel->tuples is valid for any baserel.
         */
-       rel->tuples = rel->rows;
+       rel->tuples = parent_rows;
+
+       pfree(parent_attrsizes);
 
        /*
         * Finally, build Append path and install it as the only access path for
index 109a014547519a4b6272cae59305cbc56a315828..0884cda60704d85aa15e7ffcc89aa28d24041289 100644 (file)
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.240 2008/04/17 21:22:14 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.241 2008/06/27 03:56:55 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
 #include <limits.h>
+#include <math.h>
 
 #include "access/skey.h"
 #include "nodes/makefuncs.h"
@@ -2312,6 +2313,7 @@ make_append(List *appendplans, bool isTarget, List *tlist)
 {
        Append     *node = makeNode(Append);
        Plan       *plan = &node->plan;
+       double          total_size;
        ListCell   *subnode;
 
        /*
@@ -2322,7 +2324,7 @@ make_append(List *appendplans, bool isTarget, List *tlist)
        plan->startup_cost = 0;
        plan->total_cost = 0;
        plan->plan_rows = 0;
-       plan->plan_width = 0;
+       total_size = 0;
        foreach(subnode, appendplans)
        {
                Plan       *subplan = (Plan *) lfirst(subnode);
@@ -2331,9 +2333,12 @@ make_append(List *appendplans, bool isTarget, List *tlist)
                        plan->startup_cost = subplan->startup_cost;
                plan->total_cost += subplan->total_cost;
                plan->plan_rows += subplan->plan_rows;
-               if (plan->plan_width < subplan->plan_width)
-                       plan->plan_width = subplan->plan_width;
+               total_size += subplan->plan_width * subplan->plan_rows;
        }
+       if (plan->plan_rows > 0)
+               plan->plan_width = rint(total_size / plan->plan_rows);
+       else
+               plan->plan_width = 0;
 
        plan->targetlist = tlist;
        plan->qual = NIL;