]> granicus.if.org Git - postgresql/commitdiff
Move estimate_hashagg_tablesize to selfuncs.c, and widen result to double.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 21 Feb 2019 19:59:02 +0000 (14:59 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 21 Feb 2019 19:59:12 +0000 (14:59 -0500)
It seems to make more sense for this to be in selfuncs.c, since it's
largely a statistical-estimation thing, and it's related to other
functions like estimate_hash_bucket_stats that are there.

While at it, change the result type from Size to double.  Perhaps at one
point it was impossible for the result to overflow an integer, but
I've got no confidence in that proposition anymore.  Nothing's actually
done with the result except to compare it to a work_mem-based limit,
so as long as we don't get an overflow on the way to that comparison,
things should be fine even with very large dNumGroups.

Code movement proposed by Antonin Houska, type change by me

Discussion: https://postgr.es/m/25767.1549359615@localhost

src/backend/optimizer/plan/planner.c
src/backend/utils/adt/selfuncs.c
src/include/utils/selfuncs.h

index 1a58d733fa655bc31701b8bb9fc76ef536fc2822..5579dfa65e4f529bb62ce5fbb30979fd9ab9643c 100644 (file)
@@ -148,9 +148,6 @@ static double get_number_of_groups(PlannerInfo *root,
                                         double path_rows,
                                         grouping_sets_data *gd,
                                         List *target_list);
-static Size estimate_hashagg_tablesize(Path *path,
-                                                  const AggClauseCosts *agg_costs,
-                                                  double dNumGroups);
 static RelOptInfo *create_grouping_paths(PlannerInfo *root,
                                          RelOptInfo *input_rel,
                                          PathTarget *target,
@@ -3659,40 +3656,6 @@ get_number_of_groups(PlannerInfo *root,
        return dNumGroups;
 }
 
-/*
- * estimate_hashagg_tablesize
- *       estimate the number of bytes that a hash aggregate hashtable will
- *       require based on the agg_costs, path width and dNumGroups.
- *
- * XXX this may be over-estimating the size now that hashagg knows to omit
- * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
- * grouping columns not in the hashed set are counted here even though hashagg
- * won't store them. Is this a problem?
- */
-static Size
-estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
-                                                  double dNumGroups)
-{
-       Size            hashentrysize;
-
-       /* Estimate per-hash-entry space at tuple width... */
-       hashentrysize = MAXALIGN(path->pathtarget->width) +
-               MAXALIGN(SizeofMinimalTupleHeader);
-
-       /* plus space for pass-by-ref transition values... */
-       hashentrysize += agg_costs->transitionSpace;
-       /* plus the per-hash-entry overhead */
-       hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
-
-       /*
-        * Note that this disregards the effect of fill-factor and growth policy
-        * of the hash-table. That's probably ok, given default the default
-        * fill-factor is relatively high. It'd be hard to meaningfully factor in
-        * "double-in-size" growth policies here.
-        */
-       return hashentrysize * dNumGroups;
-}
-
 /*
  * create_grouping_paths
  *
@@ -4130,7 +4093,7 @@ consider_groupingsets_paths(PlannerInfo *root,
                ListCell   *lc;
                ListCell   *l_start = list_head(gd->rollups);
                AggStrategy strat = AGG_HASHED;
-               Size            hashsize;
+               double          hashsize;
                double          exclude_groups = 0.0;
 
                Assert(can_hash);
@@ -4297,9 +4260,9 @@ consider_groupingsets_paths(PlannerInfo *root,
                /*
                 * Account first for space needed for groups we can't sort at all.
                 */
-               availspace -= (double) estimate_hashagg_tablesize(path,
-                                                                                                                 agg_costs,
-                                                                                                                 gd->dNumHashGroups);
+               availspace -= estimate_hashagg_tablesize(path,
+                                                                                                agg_costs,
+                                                                                                gd->dNumHashGroups);
 
                if (availspace > 0 && list_length(gd->rollups) > 1)
                {
@@ -6424,7 +6387,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 
        if (can_hash)
        {
-               Size            hashaggtablesize;
+               double          hashaggtablesize;
 
                if (parse->groupingSets)
                {
@@ -6735,7 +6698,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 
        if (can_hash && cheapest_total_path != NULL)
        {
-               Size            hashaggtablesize;
+               double          hashaggtablesize;
 
                /* Checked above */
                Assert(parse->hasAggs || parse->groupClause);
@@ -6768,7 +6731,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 
        if (can_hash && cheapest_partial_path != NULL)
        {
-               Size            hashaggtablesize;
+               double          hashaggtablesize;
 
                hashaggtablesize =
                        estimate_hashagg_tablesize(cheapest_partial_path,
index 2e3fab6a9588045563cbf2dbebb3db874557fb9f..e6837869cf6785c11f834623deabc8b5e3d32bf3 100644 (file)
 #include "catalog/pg_statistic.h"
 #include "catalog/pg_statistic_ext.h"
 #include "executor/executor.h"
+#include "executor/nodeAgg.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
@@ -3428,6 +3429,43 @@ estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
        ReleaseVariableStats(vardata);
 }
 
+/*
+ * estimate_hashagg_tablesize
+ *       estimate the number of bytes that a hash aggregate hashtable will
+ *       require based on the agg_costs, path width and number of groups.
+ *
+ * We return the result as "double" to forestall any possible overflow
+ * problem in the multiplication by dNumGroups.
+ *
+ * XXX this may be over-estimating the size now that hashagg knows to omit
+ * unneeded columns from the hashtable.  Also for mixed-mode grouping sets,
+ * grouping columns not in the hashed set are counted here even though hashagg
+ * won't store them.  Is this a problem?
+ */
+double
+estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
+                                                  double dNumGroups)
+{
+       Size            hashentrysize;
+
+       /* Estimate per-hash-entry space at tuple width... */
+       hashentrysize = MAXALIGN(path->pathtarget->width) +
+               MAXALIGN(SizeofMinimalTupleHeader);
+
+       /* plus space for pass-by-ref transition values... */
+       hashentrysize += agg_costs->transitionSpace;
+       /* plus the per-hash-entry overhead */
+       hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
+
+       /*
+        * Note that this disregards the effect of fill-factor and growth policy
+        * of the hash table.  That's probably ok, given that the default
+        * fill-factor is relatively high.  It'd be hard to meaningfully factor in
+        * "double-in-size" growth policies here.
+        */
+       return hashentrysize * dNumGroups;
+}
+
 
 /*-------------------------------------------------------------------------
  *
index 0ce2175f3e8f2d8a0c3aceb6c270b91fd81fcab6..84805152338839ac76fd001dd957cb05f4b8f814 100644 (file)
@@ -186,6 +186,9 @@ extern void estimate_hash_bucket_stats(PlannerInfo *root,
                                                   Node *hashkey, double nbuckets,
                                                   Selectivity *mcv_freq,
                                                   Selectivity *bucketsize_frac);
+extern double estimate_hashagg_tablesize(Path *path,
+                                                  const AggClauseCosts *agg_costs,
+                                                  double dNumGroups);
 
 extern List *get_quals_from_indexclauses(List *indexclauses);
 extern Cost index_other_operands_eval_cost(PlannerInfo *root,