Fix overoptimistic assumptions in column width estimation for subqueries.

author Tom Lane <tgl@sss.pgh.pa.us>

Tue, 23 Aug 2011 21:11:41 +0000 (17:11 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Tue, 23 Aug 2011 21:13:30 +0000 (17:13 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Tue, 23 Aug 2011 21:11:41 +0000 (17:11 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Tue, 23 Aug 2011 21:13:30 +0000 (17:13 -0400)
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c

index 6b43aee1833b87e9c7e08753143bd017a07a9966..a14b809a14ca7f95311071429724b4cd4807e309 100644 (file)
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -403,7 +403,15 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                         continue;
                 }
  
-               /* CE failed, so finish copying targetlist and join quals */
+               /*
+                * CE failed, so finish copying/modifying targetlist and join quals.
+                *
+                * Note: the resulting childrel->reltargetlist may contain arbitrary
+                * expressions, which normally would not occur in a reltargetlist.
+                * That is okay because nothing outside of this routine will look at
+                * the child rel's reltargetlist.  We do have to cope with the case
+                * while constructing attr_widths estimates below, though.
+                */
                 childrel->joininfo = (List *)
                         adjust_appendrel_attrs((Node *) rel->joininfo,
                                                                    appinfo);
@@ -486,23 +494,36 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                         parent_rows += childrel->rows;
                         parent_size += childrel->width * childrel->rows;
  
+                       /*
+                        * Accumulate per-column estimates too.  We need not do anything
+                        * for PlaceHolderVars in the parent list.  If child expression
+                        * isn't a Var, or we didn't record a width estimate for it, we
+                        * have to fall back on a datatype-based estimate.
+                        *
+                        * By construction, child's reltargetlist is 1-to-1 with parent's.
+                        */
                         forboth(parentvars, rel->reltargetlist,
                                         childvars, childrel->reltargetlist)
                         {
                                 Var                *parentvar = (Var *) lfirst(parentvars);
-                               Var                *childvar = (Var *) lfirst(childvars);
-
-                               /*
-                                * Accumulate per-column estimates too.  Whole-row Vars and
-                                * PlaceHolderVars can be ignored here.
-                                */
-                               if (IsA(parentvar, Var) &&
-                                       IsA(childvar, Var))
+                               Node       *childvar = (Node *) lfirst(childvars);
+
+                               if (IsA(parentvar, Var))
                                 {
                                         int                     pndx = parentvar->varattno - rel->min_attr;
-                                       int                     cndx = childvar->varattno - childrel->min_attr;
-
-                                       parent_attrsizes[pndx] += childrel->attr_widths[cndx] * childrel->rows;
+                                       int32           child_width = 0;
+
+                                       if (IsA(childvar, Var))
+                                       {
+                                               int             cndx = ((Var *) childvar)->varattno - childrel->min_attr;
+
+                                               child_width = childrel->attr_widths[cndx];
+                                       }
+                                       if (child_width <= 0)
+                                               child_width = get_typavgwidth(exprType(childvar),
+                                                                                                         exprTypmod(childvar));
+                                       Assert(child_width > 0);
+                                       parent_attrsizes[pndx] += child_width * childrel->rows;
                                 }
                         }
                 }
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index 1b264a29e2962428ecba9b2d8cde58618c71b8df..c853bd8dcb6ca6758b6b515abea577712aa07889 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3238,14 +3238,14 @@ set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel,
         /*
          * Compute per-output-column width estimates by examining the subquery's
          * targetlist.  For any output that is a plain Var, get the width estimate
-        * that was made while planning the subquery.  Otherwise, fall back on a
-        * datatype-based estimate.
+        * that was made while planning the subquery.  Otherwise, we leave it to
+        * set_rel_width to fill in a datatype-based default estimate.
          */
         foreach(lc, subroot->parse->targetList)
         {
                 TargetEntry *te = (TargetEntry *) lfirst(lc);
                 Node       *texpr = (Node *) te->expr;
-               int32           item_width;
+               int32           item_width = 0;
  
                 Assert(IsA(te, TargetEntry));
                 /* junk columns aren't visible to upper query */
@@ -3256,8 +3256,14 @@ set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel,
                  * XXX This currently doesn't work for subqueries containing set
                  * operations, because the Vars in their tlists are bogus references
                  * to the first leaf subquery, which wouldn't give the right answer
-                * even if we could still get to its PlannerInfo.  So fall back on
-                * datatype in that case.
+                * even if we could still get to its PlannerInfo.
+                *
+                * Also, the subquery could be an appendrel for which all branches are
+                * known empty due to constraint exclusion, in which case
+                * set_append_rel_pathlist will have left the attr_widths set to zero.
+                *
+                * In either case, we just leave the width estimate zero until
+                * set_rel_width fixes it.
                  */
                 if (IsA(texpr, Var) &&
                         subroot->parse->setOperations == NULL)
@@ -3267,11 +3273,6 @@ set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel,
  
                         item_width = subrel->attr_widths[var->varattno - subrel->min_attr];
                 }
-               else
-               {
-                       item_width = get_typavgwidth(exprType(texpr), exprTypmod(texpr));
-               }
-               Assert(item_width > 0);
                 Assert(te->resno >= rel->min_attr && te->resno <= rel->max_attr);
                 rel->attr_widths[te->resno - rel->min_attr] = item_width;
         }
author	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 23 Aug 2011 21:11:41 +0000 (17:11 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 23 Aug 2011 21:13:30 +0000 (17:13 -0400)
src/backend/optimizer/path/allpaths.c		patch \| blob \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| history