Merged revisions 60408-60440 via svnmerge from

author Christian Heimes <christian@cheimes.de>

Wed, 30 Jan 2008 11:58:22 +0000 (11:58 +0000)

committer Christian Heimes <christian@cheimes.de>

Wed, 30 Jan 2008 11:58:22 +0000 (11:58 +0000)
author Christian Heimes <christian@cheimes.de>
Wed, 30 Jan 2008 11:58:22 +0000 (11:58 +0000)
committer Christian Heimes <christian@cheimes.de>
Wed, 30 Jan 2008 11:58:22 +0000 (11:58 +0000)
diff --git a/Doc/tutorial/errors.rst b/Doc/tutorial/errors.rst

index fa2d42220fb578c5634e4fdb61f46f40d96a95c7..5c652a98714956aad7b1565756bf9ac151cdbcb6 100644 (file)
--- a/Doc/tutorial/errors.rst
+++ b/Doc/tutorial/errors.rst
@@ -131,8 +131,8 @@ the exception (allowing a caller to handle the exception as well)::
         f = open('myfile.txt')
         s = f.readline()
         i = int(s.strip())
-   except IOError as e:
-       print("I/O error(%s): %s" % (e.errno, e.strerror))
+   except IOError as (errno, strerror):
+       print "I/O error(%s): %s" % (errno, strerror)
     except ValueError:
         print("Could not convert data to an integer.")
     except:
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h

index d7808a4c1d9c7fade03cf467adc1f27cb1dc370e..39542893d7a09c28dfd65c58974d253a49884e17 100644 (file)
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -358,7 +358,14 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
  
  #else
  
-#define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
+/* Since splitting on whitespace is an important use case, and whitespace
+   in most situations is solely ASCII whitespace, we optimize for the common
+   case by using a quick look-up table with an inlined check.
+ */
+extern const unsigned char _Py_ascii_whitespace[];
+
+#define Py_UNICODE_ISSPACE(ch) \
+       ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
  
  #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
  #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
diff --git a/Lib/_abcoll.py b/Lib/_abcoll.py

index 36d39cf728eb6363693d657820b2d8e647e9ab42..4ce3df4696ed6fc1d2a5074e2ca8c68efa37d834 100644 (file)
--- a/Lib/_abcoll.py
+++ b/Lib/_abcoll.py
@@ -211,6 +211,12 @@ class Set(metaclass=ABCMeta):
              return NotImplemented
          return self._from_iterable(value for value in other if value in self)
  
+    def isdisjoint(self, other):
+        for value in other:
+            if value in self:
+                return False
+        return True
+
      def __or__(self, other):
          if not isinstance(other, Iterable):
              return NotImplemented
@@ -278,6 +284,12 @@ class MutableSet(Set):
          """Return True if it was deleted, False if not there."""
          raise NotImplementedError
  
+    def remove(self, value):
+        """Remove an element. If not a member, raise a KeyError."""
+        if value not in self:
+            raise KeyError(value)
+        self.discard(value)
+
      def pop(self):
          """Return the popped value.  Raise KeyError if empty."""
          it = iter(self)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 694f3b0a261b07e078f9c6ae328491cb8a795db7..1b35d4e273b53678f40417cd44e043935790dd7d 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -125,6 +125,64 @@ static PyUnicodeObject *unicode_latin1[256];
  */
  static const char unicode_default_encoding[] = "utf-8";
  
+/* Fast detection of the most frequent whitespace characters */
+const unsigned char _Py_ascii_whitespace[] = {
+       0, 0, 0, 0, 0, 0, 0, 0,
+//     case 0x0009: /* HORIZONTAL TABULATION */
+//     case 0x000A: /* LINE FEED */
+//     case 0x000B: /* VERTICAL TABULATION */
+//     case 0x000C: /* FORM FEED */
+//     case 0x000D: /* CARRIAGE RETURN */
+       0, 1, 1, 1, 1, 1, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+//     case 0x001C: /* FILE SEPARATOR */
+//     case 0x001D: /* GROUP SEPARATOR */
+//     case 0x001E: /* RECORD SEPARATOR */
+//     case 0x001F: /* UNIT SEPARATOR */
+       0, 0, 0, 0, 1, 1, 1, 1,
+//     case 0x0020: /* SPACE */
+       1, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* Same for linebreaks */
+static unsigned char ascii_linebreak[] = {
+       0, 0, 0, 0, 0, 0, 0, 0,
+//         0x000A, /* LINE FEED */
+//         0x000D, /* CARRIAGE RETURN */
+       0, 0, 1, 0, 0, 1, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+//         0x001C, /* FILE SEPARATOR */
+//         0x001D, /* GROUP SEPARATOR */
+//         0x001E, /* RECORD SEPARATOR */
+       0, 0, 0, 0, 1, 1, 1, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0
+};
+
+
  Py_UNICODE
  PyUnicode_GetMax(void)
  {
@@ -151,8 +209,9 @@ static BLOOM_MASK bloom_linebreak;
  
  #define BLOOM(mask, ch) ((mask & (1 << ((ch) & 0x1F))))
  
-#define BLOOM_LINEBREAK(ch)\
-    (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK((ch)))
+#define BLOOM_LINEBREAK(ch) \
+    ((ch) < 128U ? ascii_linebreak[(ch)] : \
+    (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK(ch)))
  
  Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len)
  {
@@ -5602,25 +5661,26 @@ PyObject *split_whitespace(PyUnicodeObject *self,
      register Py_ssize_t j;
      Py_ssize_t len = self->length;
      PyObject *str;
+    register const Py_UNICODE *buf = self->str;
  
      for (i = j = 0; i < len; ) {
         /* find a token */
-       while (i < len && Py_UNICODE_ISSPACE(self->str[i]))
+       while (i < len && Py_UNICODE_ISSPACE(buf[i]))
             i++;
         j = i;
-       while (i < len && !Py_UNICODE_ISSPACE(self->str[i]))
+       while (i < len && !Py_UNICODE_ISSPACE(buf[i]))
             i++;
         if (j < i) {
             if (maxcount-- <= 0)
                 break;
-           SPLIT_APPEND(self->str, j, i);
-           while (i < len && Py_UNICODE_ISSPACE(self->str[i]))
+           SPLIT_APPEND(buf, j, i);
+           while (i < len && Py_UNICODE_ISSPACE(buf[i]))
                 i++;
             j = i;
         }
      }
      if (j < len) {
-       SPLIT_APPEND(self->str, j, len);
+       SPLIT_APPEND(buf, j, len);
      }
      return list;
  
@@ -5693,18 +5753,19 @@ PyObject *split_char(PyUnicodeObject *self,
      register Py_ssize_t j;
      Py_ssize_t len = self->length;
      PyObject *str;
+    register const Py_UNICODE *buf = self->str;
  
      for (i = j = 0; i < len; ) {
-       if (self->str[i] == ch) {
+       if (buf[i] == ch) {
             if (maxcount-- <= 0)
                 break;
-           SPLIT_APPEND(self->str, j, i);
+           SPLIT_APPEND(buf, j, i);
             i = j = i + 1;
         } else
             i++;
      }
      if (j <= len) {
-       SPLIT_APPEND(self->str, j, len);
+       SPLIT_APPEND(buf, j, len);
      }
      return list;
  
@@ -5753,25 +5814,26 @@ PyObject *rsplit_whitespace(PyUnicodeObject *self,
      register Py_ssize_t j;
      Py_ssize_t len = self->length;
      PyObject *str;
+    register const Py_UNICODE *buf = self->str;
  
      for (i = j = len - 1; i >= 0; ) {
         /* find a token */
-       while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i]))
+       while (i >= 0 && Py_UNICODE_ISSPACE(buf[i]))
             i--;
         j = i;
-       while (i >= 0 && !Py_UNICODE_ISSPACE(self->str[i]))
+       while (i >= 0 && !Py_UNICODE_ISSPACE(buf[i]))
             i--;
         if (j > i) {
             if (maxcount-- <= 0)
                 break;
-           SPLIT_APPEND(self->str, i + 1, j + 1);
-           while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i]))
+           SPLIT_APPEND(buf, i + 1, j + 1);
+           while (i >= 0 && Py_UNICODE_ISSPACE(buf[i]))
                 i--;
             j = i;
         }
      }
      if (j >= 0) {
-       SPLIT_APPEND(self->str, 0, j + 1);
+       SPLIT_APPEND(buf, 0, j + 1);
      }
      if (PyList_Reverse(list) < 0)
          goto onError;
@@ -5792,18 +5854,19 @@ PyObject *rsplit_char(PyUnicodeObject *self,
      register Py_ssize_t j;
      Py_ssize_t len = self->length;
      PyObject *str;
+    register const Py_UNICODE *buf = self->str;
  
      for (i = j = len - 1; i >= 0; ) {
-       if (self->str[i] == ch) {
+       if (buf[i] == ch) {
             if (maxcount-- <= 0)
                 break;
-           SPLIT_APPEND(self->str, i + 1, j + 1);
+           SPLIT_APPEND(buf, i + 1, j + 1);
             j = i = i - 1;
         } else
             i--;
      }
      if (j >= -1) {
-       SPLIT_APPEND(self->str, 0, j + 1);
+       SPLIT_APPEND(buf, 0, j + 1);
      }
      if (PyList_Reverse(list) < 0)
          goto onError;
author	Christian Heimes <christian@cheimes.de>
	Wed, 30 Jan 2008 11:58:22 +0000 (11:58 +0000)
committer	Christian Heimes <christian@cheimes.de>
	Wed, 30 Jan 2008 11:58:22 +0000 (11:58 +0000)
Doc/tutorial/errors.rst		patch \| blob \| history
Include/unicodeobject.h		patch \| blob \| history
Lib/_abcoll.py		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history