]> granicus.if.org Git - postgresql/commitdiff
Fix improper repetition of previous results from a hashed aggregate.
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 24 Aug 2016 18:37:50 +0000 (14:37 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 24 Aug 2016 18:38:13 +0000 (14:38 -0400)
ExecReScanAgg's check for whether it could re-use a previously calculated
hashtable neglected the possibility that the Agg node might reference
PARAM_EXEC Params that are not referenced by its input plan node.  That's
okay if the Params are in upper tlist or qual expressions; but if one
appears in aggregate input expressions, then the hashtable contents need
to be recomputed when the Param's value changes.

To avoid unnecessary performance degradation in the case of a Param that
isn't within an aggregate input, add logic to the planner to determine
which Params are within aggregate inputs.  This requires a new field in
struct Agg, but fortunately we never write plans to disk, so this isn't
an initdb-forcing change.

Per report from Jeevan Chalke.  This has been broken since forever,
so back-patch to all supported branches.

Andrew Gierth, with minor adjustments by me

Report: <CAM2+6=VY8ykfLT5Q8vb9B6EbeBk-NGuLbT6seaQ+Fq4zXvrDcA@mail.gmail.com>

src/backend/executor/nodeAgg.c
src/backend/nodes/copyfuncs.c
src/backend/nodes/outfuncs.c
src/backend/nodes/readfuncs.c
src/backend/optimizer/plan/createplan.c
src/backend/optimizer/plan/subselect.c
src/include/nodes/plannodes.h
src/test/regress/expected/aggregates.out
src/test/regress/sql/aggregates.sql

index 1ec2515090fb2a4b723cbbd5c73c68426f5e4dd9..ce2fc281a43bb445d6d592921bf885e136303b62 100644 (file)
@@ -3425,11 +3425,13 @@ ExecReScanAgg(AggState *node)
                        return;
 
                /*
-                * If we do have the hash table and the subplan does not have any
-                * parameter changes, then we can just rescan the existing hash table;
-                * no need to build it again.
+                * If we do have the hash table, and the subplan does not have any
+                * parameter changes, and none of our own parameter changes affect
+                * input expressions of the aggregated functions, then we can just
+                * rescan the existing hash table; no need to build it again.
                 */
-               if (outerPlan->chgParam == NULL)
+               if (outerPlan->chgParam == NULL &&
+                       !bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
                {
                        ResetTupleHashIterator(node->hashtable, &node->hashiter);
                        return;
index 3244c76ddcca13b79db7902da12a22f4f3ccf283..c2b1ccf99f1e7606dd109c94a297f4f360e7b2fd 100644 (file)
@@ -877,6 +877,7 @@ _copyAgg(const Agg *from)
                COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
        }
        COPY_SCALAR_FIELD(numGroups);
+       COPY_BITMAPSET_FIELD(aggParams);
        COPY_NODE_FIELD(groupingSets);
        COPY_NODE_FIELD(chain);
 
index acaf4ea5ebcd694b266001ddc8526891b56bcddb..0ccd75b74a81b48e817d0048b9b236dc1bd3085e 100644 (file)
@@ -716,6 +716,7 @@ _outAgg(StringInfo str, const Agg *node)
                appendStringInfo(str, " %u", node->grpOperators[i]);
 
        WRITE_LONG_FIELD(numGroups);
+       WRITE_BITMAPSET_FIELD(aggParams);
        WRITE_NODE_FIELD(groupingSets);
        WRITE_NODE_FIELD(chain);
 }
index 94954dcc722a3d865bc50a7b0a8c31cb68be2bda..da71dbb733207ebe201d5deca035f0f6400f5dc3 100644 (file)
@@ -1991,6 +1991,7 @@ _readAgg(void)
        READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols);
        READ_OID_ARRAY(grpOperators, local_node->numCols);
        READ_LONG_FIELD(numGroups);
+       READ_BITMAPSET_FIELD(aggParams);
        READ_NODE_FIELD(groupingSets);
        READ_NODE_FIELD(chain);
 
index 54d601fc47d0ffbbf6bfc4d0cccb0e6660e39e65..47158f646802aaeab776192b98329a940bbdbea3 100644 (file)
@@ -5664,6 +5664,7 @@ make_agg(List *tlist, List *qual,
        node->grpColIdx = grpColIdx;
        node->grpOperators = grpOperators;
        node->numGroups = numGroups;
+       node->aggParams = NULL;         /* SS_finalize_plan() will fill this */
        node->groupingSets = groupingSets;
        node->chain = chain;
 
index a46cc108203706e393465c5892f2cb1f6f73a6d1..6edefb1138833ce7d79261895029d035649c2d25 100644 (file)
@@ -82,6 +82,7 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
                          Bitmapset *valid_params,
                          Bitmapset *scan_params);
 static bool finalize_primnode(Node *node, finalize_primnode_context *context);
+static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
 
 
 /*
@@ -2652,6 +2653,29 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
                                                                                 locally_added_param);
                        break;
 
+               case T_Agg:
+                       {
+                               Agg                *agg = (Agg *) plan;
+
+                               /*
+                                * AGG_HASHED plans need to know which Params are referenced
+                                * in aggregate calls.  Do a separate scan to identify them.
+                                */
+                               if (agg->aggstrategy == AGG_HASHED)
+                               {
+                                       finalize_primnode_context aggcontext;
+
+                                       aggcontext.root = root;
+                                       aggcontext.paramids = NULL;
+                                       finalize_agg_primnode((Node *) agg->plan.targetlist,
+                                                                                 &aggcontext);
+                                       finalize_agg_primnode((Node *) agg->plan.qual,
+                                                                                 &aggcontext);
+                                       agg->aggParams = aggcontext.paramids;
+                               }
+                       }
+                       break;
+
                case T_WindowAgg:
                        finalize_primnode(((WindowAgg *) plan)->startOffset,
                                                          &context);
@@ -2660,7 +2684,6 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
                        break;
 
                case T_Hash:
-               case T_Agg:
                case T_Material:
                case T_Sort:
                case T_Unique:
@@ -2811,6 +2834,29 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
                                                                  (void *) context);
 }
 
+/*
+ * finalize_agg_primnode: find all Aggref nodes in the given expression tree,
+ * and add IDs of all PARAM_EXEC params appearing within their aggregated
+ * arguments to the result set.
+ */
+static bool
+finalize_agg_primnode(Node *node, finalize_primnode_context *context)
+{
+       if (node == NULL)
+               return false;
+       if (IsA(node, Aggref))
+       {
+               Aggref     *agg = (Aggref *) node;
+
+               /* we should not consider the direct arguments, if any */
+               finalize_primnode((Node *) agg->args, context);
+               finalize_primnode((Node *) agg->aggfilter, context);
+               return false;                   /* there can't be any Aggrefs below here */
+       }
+       return expression_tree_walker(node, finalize_agg_primnode,
+                                                                 (void *) context);
+}
+
 /*
  * SS_make_initplan_output_param - make a Param for an initPlan's output
  *
index 369179f2912e8919f5bfa605e5eb017ed07000f5..bc5463b8f7fe02b370499f40cbc4c6441f47a246 100644 (file)
@@ -715,7 +715,8 @@ typedef struct Agg
        AttrNumber *grpColIdx;          /* their indexes in the target list */
        Oid                *grpOperators;       /* equality operators to compare with */
        long            numGroups;              /* estimated number of groups in input */
-       /* Note: the planner only provides numGroups in AGG_HASHED case */
+       Bitmapset  *aggParams;          /* IDs of Params used in Aggref inputs */
+       /* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
        List       *groupingSets;       /* grouping sets to use */
        List       *chain;                      /* chained Agg/Sort nodes */
 } Agg;
index 14646c6397c166c27999ee1c00df2853a84b6746..45208a6da66bd8d09451c98cfce15a1247c9d00a 100644 (file)
@@ -366,6 +366,81 @@ from tenk1 o;
  9999
 (1 row)
 
+-- Test handling of Params within aggregate arguments in hashed aggregation.
+-- Per bug report from Jeevan Chalke.
+explain (verbose, costs off)
+select s1, s2, sm
+from generate_series(1, 3) s1,
+     lateral (select s2, sum(s1 + s2) sm
+              from generate_series(1, 3) s2 group by s2) ss
+order by 1, 2;
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Sort
+   Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
+   Sort Key: s1.s1, s2.s2
+   ->  Nested Loop
+         Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
+         ->  Function Scan on pg_catalog.generate_series s1
+               Output: s1.s1
+               Function Call: generate_series(1, 3)
+         ->  HashAggregate
+               Output: s2.s2, sum((s1.s1 + s2.s2))
+               Group Key: s2.s2
+               ->  Function Scan on pg_catalog.generate_series s2
+                     Output: s2.s2
+                     Function Call: generate_series(1, 3)
+(14 rows)
+
+select s1, s2, sm
+from generate_series(1, 3) s1,
+     lateral (select s2, sum(s1 + s2) sm
+              from generate_series(1, 3) s2 group by s2) ss
+order by 1, 2;
+ s1 | s2 | sm 
+----+----+----
+  1 |  1 |  2
+  1 |  2 |  3
+  1 |  3 |  4
+  2 |  1 |  3
+  2 |  2 |  4
+  2 |  3 |  5
+  3 |  1 |  4
+  3 |  2 |  5
+  3 |  3 |  6
+(9 rows)
+
+explain (verbose, costs off)
+select array(select sum(x+y) s
+            from generate_series(1,3) y group by y order by s)
+  from generate_series(1,3) x;
+                            QUERY PLAN                             
+-------------------------------------------------------------------
+ Function Scan on pg_catalog.generate_series x
+   Output: (SubPlan 1)
+   Function Call: generate_series(1, 3)
+   SubPlan 1
+     ->  Sort
+           Output: (sum((x.x + y.y))), y.y
+           Sort Key: (sum((x.x + y.y)))
+           ->  HashAggregate
+                 Output: sum((x.x + y.y)), y.y
+                 Group Key: y.y
+                 ->  Function Scan on pg_catalog.generate_series y
+                       Output: y.y
+                       Function Call: generate_series(1, 3)
+(13 rows)
+
+select array(select sum(x+y) s
+            from generate_series(1,3) y group by y order by s)
+  from generate_series(1,3) x;
+  array  
+---------
+ {2,3,4}
+ {3,4,5}
+ {4,5,6}
+(3 rows)
+
 --
 -- test for bitwise integer aggregates
 --
index 9983ff3a8962f749f265d60ea562963ec56b48cc..430ac49385c7e1723f34a701fcd537fef49f4f8d 100644 (file)
@@ -98,6 +98,28 @@ select
   (select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1)))
 from tenk1 o;
 
+-- Test handling of Params within aggregate arguments in hashed aggregation.
+-- Per bug report from Jeevan Chalke.
+explain (verbose, costs off)
+select s1, s2, sm
+from generate_series(1, 3) s1,
+     lateral (select s2, sum(s1 + s2) sm
+              from generate_series(1, 3) s2 group by s2) ss
+order by 1, 2;
+select s1, s2, sm
+from generate_series(1, 3) s1,
+     lateral (select s2, sum(s1 + s2) sm
+              from generate_series(1, 3) s2 group by s2) ss
+order by 1, 2;
+
+explain (verbose, costs off)
+select array(select sum(x+y) s
+            from generate_series(1,3) y group by y order by s)
+  from generate_series(1,3) x;
+select array(select sum(x+y) s
+            from generate_series(1,3) y group by y order by s)
+  from generate_series(1,3) x;
+
 --
 -- test for bitwise integer aggregates
 --