1. Combined the base and length arrays into a single array of structs.

author Tim Peters <tim.peters@gmail.com>

Sat, 10 Aug 2002 05:21:15 +0000 (05:21 +0000)

committer Tim Peters <tim.peters@gmail.com>

Sat, 10 Aug 2002 05:21:15 +0000 (05:21 +0000)
author Tim Peters <tim.peters@gmail.com>
Sat, 10 Aug 2002 05:21:15 +0000 (05:21 +0000)
committer Tim Peters <tim.peters@gmail.com>
Sat, 10 Aug 2002 05:21:15 +0000 (05:21 +0000)
diff --git a/Objects/listobject.c b/Objects/listobject.c

index 7fad905060bc8cf4625863ef231f102ec7df9051..cea8597f2311982a30193ea754aad2a0ab921893 100644 (file)
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1122,11 +1122,10 @@ fail:
   */
  #define MAX_MERGE_PENDING 85
  
-/* If a run wins MIN_GALLOP times in a row, we switch to galloping mode,
- * and stay there until both runs win less often than MIN_GALLOP
- * consecutive times.  See listsort.txt for more info.
+/* When we get into galloping mode, we stay there until both runs win less
+ * often than MIN_GALLOP consecutive times.  See listsort.txt for more info.
   */
-#define MIN_GALLOP 8
+#define MIN_GALLOP 7
  
  /* Avoid malloc for small temp arrays. */
  #define MERGESTATE_TEMP_SIZE 256
@@ -1134,10 +1133,21 @@ fail:
  /* One MergeState exists on the stack per invocation of mergesort.  It's just
   * a convenient way to pass state around among the helper functions.
   */
+struct s_slice {
+       PyObject **base;
+       int len;
+};
+
  typedef struct s_MergeState {
         /* The user-supplied comparison function. or NULL if none given. */
         PyObject *compare;
  
+       /* This controls when we get *into* galloping mode.  It's initialized
+        * to MIN_GALLOP.  merge_lo and merge_hi tend to nudge it higher for
+        * random data, and lower for highly structured data.
+        */
+       int min_gallop;
+
         /* 'a' is temp storage to help with merges.  It contains room for
          * alloced entries.
          */
@@ -1148,14 +1158,13 @@ typedef struct s_MergeState {
          * address base[i] and extends for len[i] elements.  It's always
          * true (so long as the indices are in bounds) that
          *
-        *     base[i] + len[i] == base[i+1]
+        *     pending[i].base + pending[i].len == pending[i+1].base
          *
          * so we could cut the storage for this, but it's a minor amount,
          * and keeping all the info explicit simplifies the code.
          */
         int n;
-       PyObject **base[MAX_MERGE_PENDING];
-       int len[MAX_MERGE_PENDING];
+       struct s_slice pending[MAX_MERGE_PENDING];
  
         /* 'a' points to this when possible, rather than muck with malloc. */
         PyObject *temparray[MERGESTATE_TEMP_SIZE];
@@ -1170,6 +1179,7 @@ merge_init(MergeState *ms, PyObject *compare)
         ms->a = ms->temparray;
         ms->alloced = MERGESTATE_TEMP_SIZE;
         ms->n = 0;
+       ms->min_gallop = MIN_GALLOP;
  }
  
  /* Free all the temp memory owned by the MergeState.  This must be called
@@ -1224,6 +1234,7 @@ merge_lo(MergeState *ms, PyObject **pa, int na, PyObject **pb, int nb)
         PyObject *compare;
         PyObject **dest;
         int result = -1;        /* guilty until proved innocent */
+       int min_gallop = ms->min_gallop;
  
         assert(ms && pa && pb && na > 0 && nb > 0 && pa + na == pb);
         if (MERGE_GETMEM(ms, na) < 0)
@@ -1248,6 +1259,7 @@ merge_lo(MergeState *ms, PyObject **pa, int na, PyObject **pb, int nb)
                  * appears to win consistently.
                  */
                 for (;;) {
+                       assert(na > 1 && nb > 0);
                         k = ISLT(*pb, *pa, compare);
                         if (k) {
                                 if (k < 0)
@@ -1258,7 +1270,7 @@ merge_lo(MergeState *ms, PyObject **pa, int na, PyObject **pb, int nb)
                                 --nb;
                                 if (nb == 0)
                                         goto Succeed;
-                               if (bcount >= MIN_GALLOP)
+                               if (bcount >= min_gallop)
                                         break;
                         }
                         else {
@@ -1268,7 +1280,7 @@ merge_lo(MergeState *ms, PyObject **pa, int na, PyObject **pb, int nb)
                                 --na;
                                 if (na == 1)
                                         goto CopyB;
-                               if (acount >= MIN_GALLOP)
+                               if (acount >= min_gallop)
                                         break;
                         }
                 }
@@ -1278,7 +1290,11 @@ merge_lo(MergeState *ms, PyObject **pa, int na, PyObject **pb, int nb)
                  * (if ever) neither run appears to be winning consistently
                  * anymore.
                  */
+               ++min_gallop;
                 do {
+                       assert(na > 1 && nb > 0);
+                       min_gallop -= min_gallop > 1;
+                       ms->min_gallop = min_gallop;
                         k = gallop_right(*pb, pa, na, 0, compare);
                         acount = k;
                         if (k) {
@@ -1319,6 +1335,8 @@ merge_lo(MergeState *ms, PyObject **pa, int na, PyObject **pb, int nb)
                         if (na == 1)
                                 goto CopyB;
                 } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
+               ++min_gallop;   /* penalize it for leaving galloping mode */
+               ms->min_gallop = min_gallop;
         }
  Succeed:
         result = 0;
@@ -1349,6 +1367,7 @@ merge_hi(MergeState *ms, PyObject **pa, int na, PyObject **pb, int nb)
         int result = -1;        /* guilty until proved innocent */
         PyObject **basea;
         PyObject **baseb;
+       int min_gallop = ms->min_gallop;
  
         assert(ms && pa && pb && na > 0 && nb > 0 && pa + na == pb);
         if (MERGE_GETMEM(ms, nb) < 0)
@@ -1376,6 +1395,7 @@ merge_hi(MergeState *ms, PyObject **pa, int na, PyObject **pb, int nb)
                  * appears to win consistently.
                  */
                 for (;;) {
+                       assert(na > 0 && nb > 1);
                         k = ISLT(*pb, *pa, compare);
                         if (k) {
                                 if (k < 0)
@@ -1386,7 +1406,7 @@ merge_hi(MergeState *ms, PyObject **pa, int na, PyObject **pb, int nb)
                                 --na;
                                 if (na == 0)
                                         goto Succeed;
-                               if (acount >= MIN_GALLOP)
+                               if (acount >= min_gallop)
                                         break;
                         }
                         else {
@@ -1396,7 +1416,7 @@ merge_hi(MergeState *ms, PyObject **pa, int na, PyObject **pb, int nb)
                                 --nb;
                                 if (nb == 1)
                                         goto CopyA;
-                               if (bcount >= MIN_GALLOP)
+                               if (bcount >= min_gallop)
                                         break;
                         }
                 }
@@ -1406,7 +1426,11 @@ merge_hi(MergeState *ms, PyObject **pa, int na, PyObject **pb, int nb)
                  * (if ever) neither run appears to be winning consistently
                  * anymore.
                  */
+               ++min_gallop;
                 do {
+                       assert(na > 0 && nb > 1);
+                       min_gallop -= min_gallop > 1;
+                       ms->min_gallop = min_gallop;
                         k = gallop_right(*pb, basea, na, na-1, compare);
                         if (k < 0)
                                 goto Fail;
@@ -1449,6 +1473,8 @@ merge_hi(MergeState *ms, PyObject **pa, int na, PyObject **pb, int nb)
                         if (na == 0)
                                 goto Succeed;
                 } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
+               ++min_gallop;   /* penalize it for leaving galloping mode */
+               ms->min_gallop = min_gallop;
         }
  Succeed:
         result = 0;
@@ -1482,10 +1508,10 @@ merge_at(MergeState *ms, int i)
         assert(i >= 0);
         assert(i == ms->n - 2 || i == ms->n - 3);
  
-       pa = ms->base[i];
-       pb = ms->base[i+1];
-       na = ms->len[i];
-       nb = ms->len[i+1];
+       pa = ms->pending[i].base;
+       na = ms->pending[i].len;
+       pb = ms->pending[i+1].base;
+       nb = ms->pending[i+1].len;
         assert(na > 0 && nb > 0);
         assert(pa + na == pb);
  
@@ -1493,11 +1519,9 @@ merge_at(MergeState *ms, int i)
          * run now, also slide over the last run (which isn't involved
          * in this merge).  The current run i+1 goes away in any case.
          */
-       if (i == ms->n - 3) {
-               ms->len[i+1] = ms->len[i+2];
-               ms->base[i+1] = ms->base[i+2];
-       }
-       ms->len[i] = na + nb;
+       ms->pending[i].len = na + nb;
+       if (i == ms->n - 3)
+               ms->pending[i+1] = ms->pending[i+2];
         --ms->n;
  
         /* Where does b start in a?  Elements in a before that can be
@@ -1541,18 +1565,18 @@ merge_at(MergeState *ms, int i)
  static int
  merge_collapse(MergeState *ms)
  {
-       int *len = ms->len;
+       struct s_slice *p = ms->pending;
  
         assert(ms);
         while (ms->n > 1) {
                 int n = ms->n - 2;
-               if (n > 0 && len[n-1] <= len[n] + len[n+1]) {
-                       if (len[n-1] < len[n+1])
+               if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len) {
+                       if (p[n-1].len < p[n+1].len)
                                 --n;
                         if (merge_at(ms, n) < 0)
                                 return -1;
                 }
-               else if (len[n] <= len[n+1]) {
+               else if (p[n].len <= p[n+1].len) {
                          if (merge_at(ms, n) < 0)
                                 return -1;
                 }
@@ -1570,12 +1594,12 @@ merge_collapse(MergeState *ms)
  static int
  merge_force_collapse(MergeState *ms)
  {
-       int *len = ms->len;
+       struct s_slice *p = ms->pending;
  
         assert(ms);
         while (ms->n > 1) {
                 int n = ms->n - 2;
-               if (n > 0 && len[n-1] < len[n+1])
+               if (n > 0 && p[n-1].len < p[n+1].len)
                         --n;
                 if (merge_at(ms, n) < 0)
                         return -1;
@@ -1664,8 +1688,8 @@ listsort(PyListObject *self, PyObject *args)
                 }
                 /* Push run onto pending-runs stack, and maybe merge. */
                 assert(ms.n < MAX_MERGE_PENDING);
-               ms.base[ms.n] = lo;
-               ms.len[ms.n] = n;
+               ms.pending[ms.n].base = lo;
+               ms.pending[ms.n].len = n;
                 ++ms.n;
                 if (merge_collapse(&ms) < 0)
                         goto fail;
@@ -1678,8 +1702,8 @@ listsort(PyListObject *self, PyObject *args)
         if (merge_force_collapse(&ms) < 0)
                 goto fail;
         assert(ms.n == 1);
-       assert(ms.base[0] == self->ob_item);
-       assert(ms.len[0] == self->ob_size);
+       assert(ms.pending[0].base == self->ob_item);
+       assert(ms.pending[0].len == self->ob_size);
  
  succeed:
         result = Py_None;
diff --git a/Objects/listsort.txt b/Objects/listsort.txt

index 545ce513d026bc24e7b59fe8f239a0ac5bf3206f..c561288880392e5db54c49b487c989ef6bad8520 100644 (file)
--- a/Objects/listsort.txt
+++ b/Objects/listsort.txt
@@ -95,31 +95,31 @@ Comparison with Python's Samplesort Hybrid
    below that, it's either astronomically lucky, or is finding exploitable
    structure in the data.
  
-      n   lg(n!)    *sort     3sort    +sort   %sort    ~sort     !sort
--------  -------   ------  --------  -------  ------  -------  --------
-  32768   444255   453096   453614    32908   452871   130491    469141  old
-                   449235    33019    33016    51328   188720     65534  new
-                    0.86% 1273.80%   -0.33%  782.31%  -30.85%   615.87%  %ch from new
+      n   lg(n!)    *sort    3sort     +sort   %sort    ~sort     !sort
+-------  -------   ------   -------  -------  ------  -------  --------
+  32768   444255   453096   453614    32908   452871   130491    469141 old
+                   448885    33016    33007    50426   182083     65534 new
+                    0.94% 1273.92%   -0.30%  798.09%  -28.33%   615.87% %ch from new
  
    65536   954037   972699   981940    65686   973104   260029   1004607
-                   963714    65824    65809   103409   377634    131070
-                    0.93% 1391.77%   -0.19%  841.02%  -31.14%   666.47%
+                   962991    65821    65808   101667   364341    131070
+                    1.01% 1391.83%   -0.19%  857.15%  -28.63%   666.47%
  
   131072  2039137  2101881  2091491   131232  2092894   554790   2161379
-                  2059092   131413   131362   209130   755476    262142
-                    2.08% 1491.54%   -0.10%  900.76%  -26.56%   724.51%
+                  2057533   131410   131361   206193   728871    262142
+                    2.16% 1491.58%   -0.10%  915.02%  -23.88%   724.51%
  
   262144  4340409  4464460  4403233   262314  4445884  1107842   4584560
-                  4380687   262440   262460   421998  1511174    524286
-                    1.91% 1577.81%   -0.06%  953.53%  -26.69%   774.44%
+                  4377402   262437   262459   416347  1457945    524286
+                    1.99% 1577.82%   -0.06%  967.83%  -24.01%   774.44%
  
   524288  9205096 9453356   9408463   524468  9441930  2218577   9692015
-                 9285709    524581   524634   848590  3022584   1048574
-                   1.81%  1693.52%   -0.03% 1012.66%  -26.60%   824.30%
+                 9278734    524580   524633   837947  2916107   1048574
+                   1.88%  1693.52%   -0.03% 1026.79%  -23.92%   824.30%
  
  1048576 19458756 19950272 19838588  1048766 19912134  4430649  20434212
-                 19621118  1048960  1048942  1715806  6045418   2097150
-                    1.68% 1791.26%   -0.02% 1060.51%  -26.71%   874.38%
+                 19606028  1048958  1048941  1694896  5832445   2097150
+                    1.76% 1791.27%   -0.02% 1074.83%  -24.03%   874.38%
  
    Discussion of cases:
  
@@ -171,7 +171,6 @@ Comparison with Python's Samplesort Hybrid
    bytes each on this box) needed by each test, again with arguments
    "15 20 1":
  
-
     2**i  *sort \sort /sort  3sort  +sort  %sort  ~sort  =sort  !sort
    32768  16384     0     0   6256      0  10821  12288      0  16383
    65536  32766     0     0  21652      0  31276  24576      0  32767
@@ -430,6 +429,11 @@ etc.  We stay in galloping mode until both searches find slices to copy
  less than MIN_GALLOP elements long, at which point we go back to one-pair-
  at-a-time mode.
  
+A refinement:  The MergeState struct contains the value of min_gallop that
+controls when we enter galloping mode, initialized to MIN_GALLOP.
+merge_lo() and merge_hi() adjust this higher when gallooping isn't paying
+off, and lower when it is.
+
  
  Galloping
  ---------
@@ -536,13 +540,21 @@ at the other values.  At and after i=6, galloping always wins.
  
  We can't guess in advance when it's going to win, though, so we do one pair
  at a time until the evidence seems strong that galloping may pay.  MIN_GALLOP
-is 8 as I type this, and that's pretty strong evidence.  However, if the data
-is random, it simply will trigger galloping mode purely by luck every now
-and again, and it's quite likely to hit one of the losing cases next.  8
-favors protecting against a slowdown on random data at the expense of giving
-up small wins on lightly clustered data, and tiny marginal wins on highly
-clustered data (they win huge anyway, and if you're getting a factor of
-10 speedup, another percent just isn't worth fighting for).
+is 7, and that's pretty strong evidence.  However, if the data is random, it
+simply will trigger galloping mode purely by luck every now and again, and
+it's quite likely to hit one of the losing cases next.  On the other hand,
+in cases like ~sort, galloping always pays, and MIN_GALLOP is larger than it
+"should be" then.  So the MergeState struct keeps a min_gallop variable
+that merge_lo and merge_hi adjust:  the longer we stay in galloping mode,
+the smaller min_gallop gets, making it easier to transition back to
+galloping mode (if we ever leave it in the current merge, and at the
+start of the next merge).  But whenever the gallop loop doesn't pay,
+min_gallop is increased by one, making it harder to transition to back
+to galloping mode (and again both within a merge and across merges).  For
+random data, this all but eliminates the gallop penalty:  min_gallop grows
+large enough that we almost never get into galloping mode.  And for cases
+like ~sort, min_gallop can fall to as low as 1.  This seems to work well,
+but in all it's a minor improvement over using a fixed MIN_GALLOP value.
  
  
  Galloping Complication
@@ -567,6 +579,9 @@ wildly unbalanced runs already enjoys excellent performance.
  
  Comparing Average # of Compares on Random Arrays
  ------------------------------------------------
+[NOTE:  This was done when the new algorithm used about 0.1% more compares
+ on random data than does its current incarnation.]
+
  Here list.sort() is samplesort, and list.msort() this sort:
  
  """
author	Tim Peters <tim.peters@gmail.com>
	Sat, 10 Aug 2002 05:21:15 +0000 (05:21 +0000)
committer	Tim Peters <tim.peters@gmail.com>
	Sat, 10 Aug 2002 05:21:15 +0000 (05:21 +0000)
Objects/listobject.c		patch \| blob \| history
Objects/listsort.txt		patch \| blob \| history