long(string, base) now takes time linear in len(string) when base is a

author Tim Peters <tim.peters@gmail.com>

Sun, 2 Feb 2003 07:51:32 +0000 (07:51 +0000)

committer Tim Peters <tim.peters@gmail.com>

Sun, 2 Feb 2003 07:51:32 +0000 (07:51 +0000)
author Tim Peters <tim.peters@gmail.com>
Sun, 2 Feb 2003 07:51:32 +0000 (07:51 +0000)
committer Tim Peters <tim.peters@gmail.com>
Sun, 2 Feb 2003 07:51:32 +0000 (07:51 +0000)
diff --git a/Lib/pickle.py b/Lib/pickle.py

index ba0e38b08e6dca35bdcee96c68aec71958a7dd6a..4e80cca5744d11afe6bfa6cc82dd8cac0629a2fc 100644 (file)
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -1427,9 +1427,6 @@ def encode_long(x):
      binary = _binascii.unhexlify(ashex)
      return binary[::-1]
  
-# XXX OOPS!  This is still quadratic-time.  While hex(n) is linear-time
-# XXX in the # of digits in n, long(s, 16) is still quadratic-time
-# XXX in len(s).
  def decode_long(data):
      r"""Decode a long from a two's complement little-endian binary string.
  
@@ -1453,7 +1450,7 @@ def decode_long(data):
      if nbytes == 0:
          return 0L
      ashex = _binascii.hexlify(data[::-1])
-    n = long(ashex, 16) # quadratic time
+    n = long(ashex, 16) # quadratic time before Python 2.3; linear now
      if data[-1] >= '\x80':
          n -= 1L << (nbytes * 8)
      return n
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py

index 6615307da6e31a9248fcdd572c74244d582f4e43..2a1ca179c1b2bd33cff92d4b569c48a5786a21b4 100644 (file)
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -247,7 +247,7 @@ class AbstractPickleTests(unittest.TestCase):
  
      def test_long(self):
          for proto in protocols:
-            # 256 bytes is where LONG4 begins
+            # 256 bytes is where LONG4 begins.
              for nbits in 1, 8, 8*254, 8*255, 8*256, 8*257:
                  nbase = 1L << nbits
                  for npos in nbase-1, nbase, nbase+1:
@@ -257,20 +257,11 @@ class AbstractPickleTests(unittest.TestCase):
                          self.assertEqual(n, got)
          # Try a monster.  This is quadratic-time in protos 0 & 1, so don't
          # bother with those.
-        # XXX Damn.  pickle.py is still quadratic-time here, due to
-        # XXX long(string, 16).  cPickle runs this in an eyeblink, but I
-        # XXX gave up waiting for pickle.py to get beyond "loading".  Giving
-        # XXX up for now.
-        return
-        print "building long"
          nbase = long("deadbeeffeedface", 16)
          nbase += nbase << 1000000
          for n in nbase, -nbase:
-            print "dumping"
              p = self.dumps(n, 2)
-            print "loading"
              got = self.loads(p)
-            print "checking"
              self.assertEqual(n, got)
  
      def test_reduce(self):
diff --git a/Misc/NEWS b/Misc/NEWS

index 5d3f57c50e8ffb433d07042e11be578d8ceaabcd..e9f105bdf99926848c11d29bd9548c7b5cc7185a 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 2.3 alpha 2?
  Core and builtins
  -----------------
  
+- long(string, base) takes time linear in len(string) when base is a power
+  of 2 now.  It used to take time quadratic in len(string).
+
  - filter returns now Unicode results for Unicode arguments.
  
  - raw_input can now return Unicode objects.
diff --git a/Objects/longobject.c b/Objects/longobject.c

index 9d7243f7e018028ee3f1db8fef61839a8fc87caf..7ca82446f23532ccbfc90eb2a74db9bea48455df 100644 (file)
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -1090,6 +1090,95 @@ long_format(PyObject *aa, int base, int addL)
         return (PyObject *)str;
  }
  
+/* *str points to the first digit in a string of base base digits.  base
+ * is a power of 2 (2, 4, 8, 16, or 32).  *str is set to point to the first
+ * non-digit (which may be *str!).  A normalized long is returned.
+ * The point to this routine is that it takes time linear in the number of
+ * string characters.
+ */
+static PyLongObject *
+long_from_binary_base(char **str, int base)
+{
+       char *p = *str;
+       char *start = p;
+       int bits_per_char;
+       int n;
+       PyLongObject *z;
+       twodigits accum;
+       int bits_in_accum;
+       digit *pdigit;
+
+       assert(base >= 2 && base <= 32 && (base & (base - 1)) == 0);
+       n = base;
+       for (bits_per_char = -1; n; ++bits_per_char)
+               n >>= 1;
+       /* n <- total # of bits needed, while setting p to end-of-string */
+       n = 0;
+       for (;;) {
+               int k = -1;
+               char ch = *p;
+
+               if (ch <= '9')
+                       k = ch - '0';
+               else if (ch >= 'a')
+                       k = ch - 'a' + 10;
+               else if (ch >= 'A')
+                       k = ch - 'A' + 10;
+               if (k < 0 || k >= base)
+                       break;
+               n += bits_per_char;
+               if (n < 0) {
+                       PyErr_SetString(PyExc_ValueError,
+                                       "long string too large to convert");
+                       return NULL;
+               }
+               ++p;
+       }
+       *str = p;
+       /* n <- # of Python digits needed, = ceiling(n/SHIFT). */
+       n = (n + SHIFT - 1) / SHIFT;
+       z = _PyLong_New(n);
+       if (z == NULL)
+               return NULL;
+       /* Read string from right, and fill in long from left; i.e.,
+        * from least to most significant in both.
+        */
+       accum = 0;
+       bits_in_accum = 0;
+       pdigit = z->ob_digit;
+       while (--p >= start) {
+               unsigned char ch = (unsigned char)*p;
+               digit k;
+
+               if (ch <= '9')
+                       k = ch - '0';
+               else if (ch >= 'a')
+                       k = ch - 'a' + 10;
+               else {
+                       assert(ch >= 'A');
+                       k = ch - 'A' + 10;
+               }
+               assert(k >= 0 && k <= base);
+               accum |= k << bits_in_accum;
+               bits_in_accum += bits_per_char;
+               if (bits_in_accum >= SHIFT) {
+                       *pdigit++ = (digit)(accum & MASK);
+                       assert(pdigit - z->ob_digit <= n);
+                       accum >>= SHIFT;
+                       bits_in_accum -= SHIFT;
+                       assert(bits_in_accum < SHIFT);
+               }
+       }
+       if (bits_in_accum) {
+               assert(bits_in_accum <= SHIFT);
+               *pdigit++ = (digit)accum;
+               assert(pdigit - z->ob_digit <= n);
+       }
+       while (pdigit - z->ob_digit < n)
+               *pdigit++ = 0;
+       return long_normalize(z);
+}
+
  PyObject *
  PyLong_FromString(char *str, char **pend, int base)
  {
@@ -1122,23 +1211,27 @@ PyLong_FromString(char *str, char **pend, int base)
         }
         if (base == 16 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
                 str += 2;
-       z = _PyLong_New(0);
         start = str;
-       for ( ; z != NULL; ++str) {
-               int k = -1;
-               PyLongObject *temp;
+       if ((base & (base - 1)) == 0)
+               z = long_from_binary_base(&str, base);
+       else {
+               z = _PyLong_New(0);
+               for ( ; z != NULL; ++str) {
+                       int k = -1;
+                       PyLongObject *temp;
  
-               if (*str <= '9')
-                       k = *str - '0';
-               else if (*str >= 'a')
-                       k = *str - 'a' + 10;
-               else if (*str >= 'A')
-                       k = *str - 'A' + 10;
-               if (k < 0 || k >= base)
-                       break;
-               temp = muladd1(z, (digit)base, (digit)k);
-               Py_DECREF(z);
-               z = temp;
+                       if (*str <= '9')
+                               k = *str - '0';
+                       else if (*str >= 'a')
+                               k = *str - 'a' + 10;
+                       else if (*str >= 'A')
+                               k = *str - 'A' + 10;
+                       if (k < 0 || k >= base)
+                               break;
+                       temp = muladd1(z, (digit)base, (digit)k);
+                       Py_DECREF(z);
+                       z = temp;
+               }
         }
         if (z == NULL)
                 return NULL;
author	Tim Peters <tim.peters@gmail.com>
	Sun, 2 Feb 2003 07:51:32 +0000 (07:51 +0000)
committer	Tim Peters <tim.peters@gmail.com>
	Sun, 2 Feb 2003 07:51:32 +0000 (07:51 +0000)
Lib/pickle.py		patch \| blob \| history
Lib/test/pickletester.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Objects/longobject.c		patch \| blob \| history