Avoid out-of-memory in a hash join with many duplicate inner keys.

author Tom Lane <tgl@sss.pgh.pa.us>

Tue, 15 Aug 2017 18:05:46 +0000 (14:05 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Tue, 15 Aug 2017 18:05:53 +0000 (14:05 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Tue, 15 Aug 2017 18:05:46 +0000 (14:05 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Tue, 15 Aug 2017 18:05:53 +0000 (14:05 -0400)
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index 45a04b0b2753a294c16c54edf20c5d2ab9bf0d8d..72041693dfda4fcce78a670989468f1d67caa5fb 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -2185,6 +2185,8 @@ _copyRestrictInfo(const RestrictInfo *from)
         COPY_SCALAR_FIELD(hashjoinoperator);
         COPY_SCALAR_FIELD(left_bucketsize);
         COPY_SCALAR_FIELD(right_bucketsize);
+       COPY_SCALAR_FIELD(left_mcvfreq);
+       COPY_SCALAR_FIELD(right_mcvfreq);
  
         return newnode;
  }
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index b35acb7bdcf17d2ba3baa728827f55f1449a8905..051a8544b0cb98f131c2dd083c4e1e0961ec0986 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3028,6 +3028,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
         double          hashjointuples;
         double          virtualbuckets;
         Selectivity innerbucketsize;
+       Selectivity innermcvfreq;
         ListCell   *hcl;
  
         /* Mark the path with the correct row estimate */
@@ -3060,9 +3061,9 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
         virtualbuckets = (double) numbuckets * (double) numbatches;
  
         /*
-        * Determine bucketsize fraction for inner relation.  We use the smallest
-        * bucketsize estimated for any individual hashclause; this is undoubtedly
-        * conservative.
+        * Determine bucketsize fraction and MCV frequency for the inner relation.
+        * We use the smallest bucketsize or MCV frequency estimated for any
+        * individual hashclause; this is undoubtedly conservative.
          *
          * BUT: if inner relation has been unique-ified, we can assume it's good
          * for hashing.  This is important both because it's the right answer, and
@@ -3070,22 +3071,27 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
          * non-unique-ified paths.
          */
         if (IsA(inner_path, UniquePath))
+       {
                 innerbucketsize = 1.0 / virtualbuckets;
+               innermcvfreq = 0.0;
+       }
         else
         {
                 innerbucketsize = 1.0;
+               innermcvfreq = 1.0;
                 foreach(hcl, hashclauses)
                 {
                         RestrictInfo *restrictinfo = lfirst_node(RestrictInfo, hcl);
                         Selectivity thisbucketsize;
+                       Selectivity thismcvfreq;
  
                         /*
                          * First we have to figure out which side of the hashjoin clause
                          * is the inner side.
                          *
                          * Since we tend to visit the same clauses over and over when
-                        * planning a large query, we cache the bucketsize estimate in the
-                        * RestrictInfo node to avoid repeated lookups of statistics.
+                        * planning a large query, we cache the bucket stats estimates in
+                        * the RestrictInfo node to avoid repeated lookups of statistics.
                          */
                         if (bms_is_subset(restrictinfo->right_relids,
                                                           inner_path->parent->relids))
@@ -3095,12 +3101,14 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
                                 if (thisbucketsize < 0)
                                 {
                                         /* not cached yet */
-                                       thisbucketsize =
-                                               estimate_hash_bucketsize(root,
-                                                                                                get_rightop(restrictinfo->clause),
-                                                                                                virtualbuckets);
-                                       restrictinfo->right_bucketsize = thisbucketsize;
+                                       estimate_hash_bucket_stats(root,
+                                                                                          get_rightop(restrictinfo->clause),
+                                                                                          virtualbuckets,
+                                                                                          &restrictinfo->right_mcvfreq,
+                                                                                          &restrictinfo->right_bucketsize);
+                                       thisbucketsize = restrictinfo->right_bucketsize;
                                 }
+                               thismcvfreq = restrictinfo->right_mcvfreq;
                         }
                         else
                         {
@@ -3111,19 +3119,36 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
                                 if (thisbucketsize < 0)
                                 {
                                         /* not cached yet */
-                                       thisbucketsize =
-                                               estimate_hash_bucketsize(root,
-                                                                                                get_leftop(restrictinfo->clause),
-                                                                                                virtualbuckets);
-                                       restrictinfo->left_bucketsize = thisbucketsize;
+                                       estimate_hash_bucket_stats(root,
+                                                                                          get_leftop(restrictinfo->clause),
+                                                                                          virtualbuckets,
+                                                                                          &restrictinfo->left_mcvfreq,
+                                                                                          &restrictinfo->left_bucketsize);
+                                       thisbucketsize = restrictinfo->left_bucketsize;
                                 }
+                               thismcvfreq = restrictinfo->left_mcvfreq;
                         }
  
                         if (innerbucketsize > thisbucketsize)
                                 innerbucketsize = thisbucketsize;
+                       if (innermcvfreq > thismcvfreq)
+                               innermcvfreq = thismcvfreq;
                 }
         }
  
+       /*
+        * If the bucket holding the inner MCV would exceed work_mem, we don't
+        * want to hash unless there is really no other alternative, so apply
+        * disable_cost.  (The executor normally copes with excessive memory usage
+        * by splitting batches, but obviously it cannot separate equal values
+        * that way, so it will be unable to drive the batch size below work_mem
+        * when this is true.)
+        */
+       if (relation_byte_size(clamp_row_est(inner_path_rows * innermcvfreq),
+                                                  inner_path->pathtarget->width) >
+               (work_mem * 1024L))
+               startup_cost += disable_cost;
+
         /*
          * Compute cost of the hashquals and qpquals (other restriction clauses)
          * separately.
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c

index 9c6c47a1b9bb8ae3573ecda9963ee01ce152eb02..6d8f8938b2f833d296b428d1162084ebf992d447 100644 (file)
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -2067,6 +2067,8 @@ adjust_appendrel_attrs_mutator(Node *node,
                 newinfo->scansel_cache = NIL;
                 newinfo->left_bucketsize = -1;
                 newinfo->right_bucketsize = -1;
+               newinfo->left_mcvfreq = -1;
+               newinfo->right_mcvfreq = -1;
  
                 return (Node *) newinfo;
         }
diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c

index ebae0cd8ce0345a7e4a38e5db287229bd5fb303e..39b52aecc53a276468b172da320eb6afa147cbc5 100644 (file)
--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -199,6 +199,8 @@ make_restrictinfo_internal(Expr *clause,
  
         restrictinfo->left_bucketsize = -1;
         restrictinfo->right_bucketsize = -1;
+       restrictinfo->left_mcvfreq = -1;
+       restrictinfo->right_mcvfreq = -1;
  
         return restrictinfo;
  }
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index e103f5ef16c4d0b47de25248af73c01cacd07041..a7a06146a06cfd007d77aa4658e2a4e0c080e339 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3559,9 +3559,16 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
  }
  
  /*
- * Estimate hash bucketsize fraction (ie, number of entries in a bucket
- * divided by total tuples in relation) if the specified expression is used
- * as a hash key.
+ * Estimate hash bucket statistics when the specified expression is used
+ * as a hash key for the given number of buckets.
+ *
+ * This attempts to determine two values:
+ *
+ * 1. The frequency of the most common value of the expression (returns
+ * zero into *mcv_freq if we can't get that).
+ *
+ * 2. The "bucketsize fraction", ie, average number of entries in a bucket
+ * divided by total tuples in relation.
   *
   * XXX This is really pretty bogus since we're effectively assuming that the
   * distribution of hash keys will be the same after applying restriction
@@ -3587,29 +3594,58 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
   * discourage use of a hash rather strongly if the inner relation is large,
   * which is what we want.  We do not want to hash unless we know that the
   * inner rel is well-dispersed (or the alternatives seem much worse).
+ *
+ * The caller should also check that the mcv_freq is not so large that the
+ * most common value would by itself require an impractically large bucket.
+ * In a hash join, the executor can split buckets if they get too big, but
+ * obviously that doesn't help for a bucket that contains many duplicates of
+ * the same value.
   */
-Selectivity
-estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
+void
+estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
+                                                  Selectivity *mcv_freq,
+                                                  Selectivity *bucketsize_frac)
  {
         VariableStatData vardata;
         double          estfract,
                                 ndistinct,
                                 stanullfrac,
-                               mcvfreq,
                                 avgfreq;
         bool            isdefault;
         AttStatsSlot sslot;
  
         examine_variable(root, hashkey, 0, &vardata);
  
+       /* Look up the frequency of the most common value, if available */
+       *mcv_freq = 0.0;
+
+       if (HeapTupleIsValid(vardata.statsTuple))
+       {
+               if (get_attstatsslot(&sslot, vardata.statsTuple,
+                                                        STATISTIC_KIND_MCV, InvalidOid,
+                                                        ATTSTATSSLOT_NUMBERS))
+               {
+                       /*
+                        * The first MCV stat is for the most common value.
+                        */
+                       if (sslot.nnumbers > 0)
+                               *mcv_freq = sslot.numbers[0];
+                       free_attstatsslot(&sslot);
+               }
+       }
+
         /* Get number of distinct values */
         ndistinct = get_variable_numdistinct(&vardata, &isdefault);
  
-       /* If ndistinct isn't real, punt and return 0.1, per comments above */
+       /*
+        * If ndistinct isn't real, punt.  We normally return 0.1, but if the
+        * mcv_freq is known to be even higher than that, use it instead.
+        */
         if (isdefault)
         {
+               *bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq);
                 ReleaseVariableStats(vardata);
-               return (Selectivity) 0.1;
+               return;
         }
  
         /* Get fraction that are null */
@@ -3650,31 +3686,11 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
         else
                 estfract = 1.0 / ndistinct;
  
-       /*
-        * Look up the frequency of the most common value, if available.
-        */
-       mcvfreq = 0.0;
-
-       if (HeapTupleIsValid(vardata.statsTuple))
-       {
-               if (get_attstatsslot(&sslot, vardata.statsTuple,
-                                                        STATISTIC_KIND_MCV, InvalidOid,
-                                                        ATTSTATSSLOT_NUMBERS))
-               {
-                       /*
-                        * The first MCV stat is for the most common value.
-                        */
-                       if (sslot.nnumbers > 0)
-                               mcvfreq = sslot.numbers[0];
-                       free_attstatsslot(&sslot);
-               }
-       }
-
         /*
          * Adjust estimated bucketsize upward to account for skewed distribution.
          */
-       if (avgfreq > 0.0 && mcvfreq > avgfreq)
-               estfract *= mcvfreq / avgfreq;
+       if (avgfreq > 0.0 && *mcv_freq > avgfreq)
+               estfract *= *mcv_freq / avgfreq;
  
         /*
          * Clamp bucketsize to sane range (the above adjustment could easily
@@ -3686,9 +3702,9 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
         else if (estfract > 1.0)
                 estfract = 1.0;
  
-       ReleaseVariableStats(vardata);
+       *bucketsize_frac = (Selectivity) estfract;
  
-       return (Selectivity) estfract;
+       ReleaseVariableStats(vardata);
  }
  
  
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index 9bae3c6ab98e75bebadc0db8ad4009731c092f17..be2028867a85d01f4b07ab0fe03362e14c6c56a0 100644 (file)
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1807,6 +1807,8 @@ typedef struct RestrictInfo
         /* cache space for hashclause processing; -1 if not yet set */
         Selectivity left_bucketsize;    /* avg bucketsize of left side */
         Selectivity right_bucketsize;   /* avg bucketsize of right side */
+       Selectivity left_mcvfreq;       /* left side's most common val's freq */
+       Selectivity right_mcvfreq;      /* right side's most common val's freq */
  } RestrictInfo;
  
  /*
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h

index c7fdd540e8417a200c590d4853957796cf66b603..dc6069d43556286b6abb481e6f12f0a5462d6774 100644 (file)
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -206,8 +206,10 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause,
  extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
                                         double input_rows, List **pgset);
  
-extern Selectivity estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey,
-                                                double nbuckets);
+extern void estimate_hash_bucket_stats(PlannerInfo *root,
+                                                  Node *hashkey, double nbuckets,
+                                                  Selectivity *mcv_freq,
+                                                  Selectivity *bucketsize_frac);
  
  extern List *deconstruct_indexquals(IndexPath *path);
  extern void genericcostestimate(PlannerInfo *root, IndexPath *path,
author	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 15 Aug 2017 18:05:46 +0000 (14:05 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 15 Aug 2017 18:05:53 +0000 (14:05 -0400)
src/backend/nodes/copyfuncs.c		patch \| blob \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| history
src/backend/optimizer/prep/prepunion.c		patch \| blob \| history
src/backend/optimizer/util/restrictinfo.c		patch \| blob \| history
src/backend/utils/adt/selfuncs.c		patch \| blob \| history
src/include/nodes/relation.h		patch \| blob \| history
src/include/utils/selfuncs.h		patch \| blob \| history