]> granicus.if.org Git - postgresql/commitdiff
Avoid integer overflow in the loop that extracts histogram entries from
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 5 May 2009 18:02:11 +0000 (18:02 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 5 May 2009 18:02:11 +0000 (18:02 +0000)
ANALYZE's total sample.  The original coding is at risk of overflow for
statistics targets exceeding about 2675; this was not a problem before
8.4 but it is now.  Per bug #4793 from Dennis Noordsij.

src/backend/commands/analyze.c

index 47581ab705f2c92df871a08deb445181b843ea2d..6285a5788894478c21b6012eeb190994fe636013 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.135 2009/03/31 22:12:46 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.136 2009/05/05 18:02:11 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2260,6 +2260,10 @@ compute_scalar_stats(VacAttrStatsP stats,
                        MemoryContext old_context;
                        Datum      *hist_values;
                        int                     nvals;
+                       int                     pos,
+                                               posfrac,
+                                               delta,
+                                               deltafrac;
 
                        /* Sort the MCV items into position order to speed next loop */
                        qsort((void *) track, num_mcv,
@@ -2313,15 +2317,35 @@ compute_scalar_stats(VacAttrStatsP stats,
                        /* Must copy the target values into anl_context */
                        old_context = MemoryContextSwitchTo(stats->anl_context);
                        hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
+
+                       /*
+                        * The object of this loop is to copy the first and last values[]
+                        * entries along with evenly-spaced values in between.  So the
+                        * i'th value is values[(i * (nvals - 1)) / (num_hist - 1)].  But
+                        * computing that subscript directly risks integer overflow when
+                        * the stats target is more than a couple thousand.  Instead we
+                        * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking
+                        * the integral and fractional parts of the sum separately.
+                        */
+                       delta = (nvals - 1) / (num_hist - 1);
+                       deltafrac = (nvals - 1) % (num_hist - 1);
+                       pos = posfrac = 0;
+
                        for (i = 0; i < num_hist; i++)
                        {
-                               int                     pos;
-
-                               pos = (i * (nvals - 1)) / (num_hist - 1);
                                hist_values[i] = datumCopy(values[pos].value,
                                                                                   stats->attr->attbyval,
                                                                                   stats->attr->attlen);
+                               pos += delta;
+                               posfrac += deltafrac;
+                               if (posfrac >= (num_hist - 1))
+                               {
+                                       /* fractional part exceeds 1, carry to integer part */
+                                       pos++;
+                                       posfrac -= (num_hist - 1);
+                               }
                        }
+
                        MemoryContextSwitchTo(old_context);
 
                        stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;