On c.l.py, Martin v. Löwis said that Py_UNICODE could be of a signed type,

author Tim Peters <tim.peters@gmail.com>

Tue, 16 Sep 2003 20:30:58 +0000 (20:30 +0000)

committer Tim Peters <tim.peters@gmail.com>

Tue, 16 Sep 2003 20:30:58 +0000 (20:30 +0000)
author Tim Peters <tim.peters@gmail.com>
Tue, 16 Sep 2003 20:30:58 +0000 (20:30 +0000)
committer Tim Peters <tim.peters@gmail.com>
Tue, 16 Sep 2003 20:30:58 +0000 (20:30 +0000)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 7adcd671c0fe87355bde5a18de0ef785ee2bde51..83104d802f31cb8dd53da5b355c985c82aea2f2b 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1,4 +1,4 @@
-/* 
+/*
  
  Unicode implementation based on original code by Fredrik Lundh,
  modified by Marc-Andre Lemburg <mal@lemburg.com> according to the
@@ -53,7 +53,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  
     The implementation will keep allocated Unicode memory intact for
     all objects on the free list having a size less than this
-   limit. This reduces malloc() overhead for small Unicode objects.  
+   limit. This reduces malloc() overhead for small Unicode objects.
  
     At worst this will result in MAX_UNICODE_FREELIST_SIZE *
     (sizeof(PyUnicodeObject) + KEEPALIVE_SIZE_LIMIT +
@@ -98,7 +98,7 @@ static PyUnicodeObject *unicode_latin1[256];
     parameter; it is initialized by _PyUnicode_Init().
  
     Always use the PyUnicode_SetDefaultEncoding() and
-   PyUnicode_GetDefaultEncoding() APIs to access this global. 
+   PyUnicode_GetDefaultEncoding() APIs to access this global.
  
  */
  static char unicode_default_encoding[100];
@@ -122,7 +122,7 @@ int unicode_resize(register PyUnicodeObject *unicode,
                        int length)
  {
      void *oldstr;
-    
+
      /* Shortcut if there's nothing much to do. */
      if (unicode->length == length)
         goto reset;
@@ -130,10 +130,14 @@ int unicode_resize(register PyUnicodeObject *unicode,
      /* Resizing shared object (unicode_empty or single character
         objects) in-place is not allowed. Use PyUnicode_Resize()
         instead ! */
-    if (unicode == unicode_empty || 
-       (unicode->length == 1 && 
-         /* XXX Is unicode->str[] always unsigned? */
-        unicode->str[0] < 256U &&
+    if (unicode == unicode_empty ||
+       (unicode->length == 1 &&
+         /* MvL said unicode->str[] may be signed.  Python generally assumes
+          * an int contains at least 32 bits, and we don't use more than
+          * 32 bits even in a UCS4 build, so casting to unsigned int should
+          * be correct.
+          */
+        (unsigned int)unicode->str[0] < 256U &&
          unicode_latin1[unicode->str[0]] == unicode)) {
          PyErr_SetString(PyExc_SystemError,
                          "can't resize shared unicode objects");
@@ -159,12 +163,12 @@ int unicode_resize(register PyUnicodeObject *unicode,
          unicode->defenc = NULL;
      }
      unicode->hash = -1;
-    
+
      return 0;
  }
  
  /* We allocate one more byte to make sure the string is
-   Ux0000 terminated -- XXX is this needed ? 
+   Ux0000 terminated -- XXX is this needed ?
  
     XXX This allocator could further be enhanced by assuring that the
         free list never reduces its size below 1.
@@ -176,7 +180,7 @@ PyUnicodeObject *_PyUnicode_New(int length)
  {
      register PyUnicodeObject *unicode;
  
-    /* Optimization for empty strings */
+    /* Optimization fo empty strings */
      if (length == 0 && unicode_empty != NULL) {
          Py_INCREF(unicode_empty);
          return unicode_empty;
@@ -190,7 +194,7 @@ PyUnicodeObject *_PyUnicode_New(int length)
         if (unicode->str) {
             /* Keep-Alive optimization: we only upsize the buffer,
                never downsize it. */
-           if ((unicode->length < length) && 
+           if ((unicode->length < length) &&
                  unicode_resize(unicode, length) < 0) {
                 PyMem_DEL(unicode->str);
                 goto onError;
@@ -213,8 +217,12 @@ PyUnicodeObject *_PyUnicode_New(int length)
         goto onError;
      }
      /* Initialize the first element to guard against cases where
-       the caller fails before initializing str.
-    */
+     * the caller fails before initializing str -- unicode_resize()
+     * reads str[0], and the Keep-Alive optimization can keep memory
+     * allocated for str alive across a call to unicode_dealloc(unicode).
+     * We don't want unicode_resize to read uninitialized memory in
+     * that case.
+     */
      unicode->str[0] = 0;
      unicode->str[length] = 0;
      unicode->length = length;
@@ -273,7 +281,7 @@ int PyUnicode_Resize(PyObject **unicode, int length)
      /* Resizing unicode_empty and single character objects is not
         possible since these are being shared. We simply return a fresh
         copy with the same Unicode content. */
-    if (v->length != length && 
+    if (v->length != length &&
         (v == unicode_empty || v->length == 1)) {
         PyUnicodeObject *w = _PyUnicode_New(length);
         if (w == NULL)
@@ -324,7 +332,7 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
             return (PyObject *)unicode;
         }
      }
-    
+
      unicode = _PyUnicode_New(size);
      if (!unicode)
          return NULL;
@@ -355,7 +363,7 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
      /* Copy the wchar_t data into the new object */
  #ifdef HAVE_USABLE_WCHAR_T
      memcpy(unicode->str, w, size * sizeof(wchar_t));
-#else    
+#else
      {
         register Py_UNICODE *u;
         register int i;
@@ -458,7 +466,7 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
      const char *s = NULL;
      int len;
      PyObject *v;
-    
+
      if (obj == NULL) {
         PyErr_BadInternalCall();
         return NULL;
@@ -505,13 +513,13 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
                      obj->ob_type->tp_name);
         goto onError;
      }
-    
+
      /* Convert to Unicode */
      if (len == 0) {
         Py_INCREF(unicode_empty);
         v = (PyObject *)unicode_empty;
      }
-    else 
+    else
         v = PyUnicode_Decode(s, len, encoding, errors);
  
      return v;
@@ -558,7 +566,7 @@ PyObject *PyUnicode_Decode(const char *s,
      }
      Py_DECREF(buffer);
      return unicode;
-    
+
   onError:
      Py_XDECREF(buffer);
      return NULL;
@@ -570,7 +578,7 @@ PyObject *PyUnicode_Encode(const Py_UNICODE *s,
                            const char *errors)
  {
      PyObject *v, *unicode;
-    
+
      unicode = PyUnicode_FromUnicode(s, size);
      if (unicode == NULL)
         return NULL;
@@ -584,13 +592,13 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
                                      const char *errors)
  {
      PyObject *v;
-    
+
      if (!PyUnicode_Check(unicode)) {
          PyErr_BadArgument();
          goto onError;
      }
  
-    if (encoding == NULL) 
+    if (encoding == NULL)
         encoding = PyUnicode_GetDefaultEncoding();
  
      /* Shortcuts for common default encodings */
@@ -620,7 +628,7 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
          goto onError;
      }
      return v;
-    
+
   onError:
      return NULL;
  }
@@ -670,7 +678,7 @@ const char *PyUnicode_GetDefaultEncoding(void)
  int PyUnicode_SetDefaultEncoding(const char *encoding)
  {
      PyObject *v;
-    
+
      /* Make sure the encoding is valid. As side effect, this also
         loads the encoding into the codec registry cache. */
      v = _PyCodec_Lookup(encoding);
@@ -678,7 +686,7 @@ int PyUnicode_SetDefaultEncoding(const char *encoding)
         goto onError;
      Py_DECREF(v);
      strncpy(unicode_default_encoding,
-           encoding, 
+           encoding,
             sizeof(unicode_default_encoding));
      return 0;
  
@@ -778,7 +786,7 @@ int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler
  
  /* see RFC2152 for details */
  
-static 
+static
  char utf7_special[128] = {
      /* indicate whether a UTF-7 character is special i.e. cannot be directly
         encoded:
@@ -869,7 +877,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
              if ((ch == '-') || !B64CHAR(ch)) {
                  inShift = 0;
                  s++;
-                    
+
                  /* p, charsleft, bitsleft, surrogate = */ DECODE(p, charsleft, bitsleft, surrogate);
                  if (bitsleft >= 6) {
                      /* The shift sequence has a partial character in it. If
@@ -877,26 +885,26 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
                         but that is not the case here */
  
                      errmsg = "partial character in shift sequence";
-                    goto utf7Error; 
+                    goto utf7Error;
                  }
                  /* According to RFC2152 the remaining bits should be zero. We
-                   choose to signal an error/insert a replacement character 
+                   choose to signal an error/insert a replacement character
                     here so indicate the potential of a misencoded character. */
  
                  /* On x86, a << b == a << (b%32) so make sure that bitsleft != 0 */
                  if (bitsleft && charsleft << (sizeof(charsleft) * 8 - bitsleft)) {
                      errmsg = "non-zero padding bits in shift sequence";
-                    goto utf7Error; 
+                    goto utf7Error;
                  }
  
                  if (ch == '-') {
                      if ((s < e) && (*(s) == '-')) {
-                        *p++ = '-';   
+                        *p++ = '-';
                          inShift = 1;
                      }
                  } else if (SPECIAL(ch,0,0)) {
                      errmsg = "unexpected special character";
-                       goto utf7Error;  
+                       goto utf7Error;
                  } else  {
                      *p++ = ch;
                  }
@@ -922,7 +930,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
          else if (SPECIAL(ch,0,0)) {
              errmsg = "unexpected special character";
              s++;
-               goto utf7Error;  
+               goto utf7Error;
          }
          else {
              *p++ = ch;
@@ -1026,7 +1034,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
                  /* out, charsleft, bitsleft = */ ENCODE(out, charsleft, bitsleft);
  
                  /* If the next character is special then we dont' need to terminate
-                   the shift sequence. If the next character is not a BASE64 character 
+                   the shift sequence. If the next character is not a BASE64 character
                     or '-' then the shift sequence will be terminated implicitly and we
                     don't have to insert a '-'. */
  
@@ -1035,7 +1043,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
                          Py_UNICODE ch2 = s[i+1];
  
                          if (SPECIAL(ch2, encodeSetO, encodeWhiteSpace)) {
-                           
+
                          } else if (B64CHAR(ch2) || ch2 == '-') {
                              *out++ = '-';
                              inShift = 0;
@@ -1049,7 +1057,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
                          inShift = 0;
                      }
                  }
-            }            
+            }
          }
         }
      if (bitsleft) {
@@ -1070,7 +1078,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
  
  /* --- UTF-8 Codec -------------------------------------------------------- */
  
-static 
+static
  char utf8_code_length[256] = {
      /* Map UTF-8 encoded prefix byte to sequence length.  zero means
         illegal prefix.  see RFC 2279 for details */
@@ -1171,7 +1179,7 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
              break;
  
          case 3:
-            if ((s[1] & 0xc0) != 0x80 || 
+            if ((s[1] & 0xc0) != 0x80 ||
                  (s[2] & 0xc0) != 0x80) {
                  errmsg = "invalid data";
                 startinpos = s-starts;
@@ -1181,7 +1189,7 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
              ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
              if (ch < 0x0800) {
                 /* Note: UTF-8 encodings of surrogates are considered
-                  legal UTF-8 sequences; 
+                  legal UTF-8 sequences;
  
                    XXX For wide builds (UCS-4) we should probably try
                        to recombine the surrogates into a single code
@@ -1222,13 +1230,13 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
             *p++ = (Py_UNICODE)ch;
  #else
              /*  compute and append the two surrogates: */
-            
+
              /*  translate from 10000..10FFFF to 0..FFFF */
              ch -= 0x10000;
-                    
+
              /*  high surrogate = top 10 bits added to D800 */
              *p++ = (Py_UNICODE)(0xD800 + (ch >> 10));
-                    
+
              /*  low surrogate = bottom 10 bits added to DC00 */
              *p++ = (Py_UNICODE)(0xDC00 + (ch & 0x03FF));
  #endif
@@ -1243,7 +1251,7 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
          }
          s += n;
         continue;
-       
+
      utf8Error:
      outpos = p-PyUnicode_AS_UNICODE(unicode);
      if (unicode_decode_call_errorhandler(
@@ -1437,7 +1445,7 @@ PyUnicode_DecodeUTF16(const char *s,
             q += 2;
             bo = 1;
         }
-#else    
+#else
         if (bom == 0xFEFF) {
             q += 2;
             bo = 1;
@@ -1566,7 +1574,7 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s,
      for (i = pairs = 0; i < size; i++)
         if (s[i] >= 0x10000)
             pairs++;
-    v = PyString_FromStringAndSize(NULL, 
+    v = PyString_FromStringAndSize(NULL,
                   2 * (size + pairs + (byteorder == 0)));
      if (v == NULL)
          return NULL;
@@ -1887,25 +1895,25 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
  
      if (quotes) {
          *p++ = 'u';
-        *p++ = (findchar(s, size, '\'') && 
+        *p++ = (findchar(s, size, '\'') &&
                  !findchar(s, size, '"')) ? '"' : '\'';
      }
      while (size-- > 0) {
          Py_UNICODE ch = *s++;
  
          /* Escape quotes */
-        if (quotes && 
+        if (quotes &&
             (ch == (Py_UNICODE) PyString_AS_STRING(repr)[1] || ch == '\\')) {
              *p++ = '\\';
              *p++ = (char) ch;
             continue;
-        } 
+        }
  
  #ifdef Py_UNICODE_WIDE
          /* Map 21-bit characters to '\U00xxxxxx' */
          else if (ch >= 0x10000) {
             int offset = p - PyString_AS_STRING(repr);
-           
+
             /* Resize the string if necessary */
             if (offset + 12 > PyString_GET_SIZE(repr)) {
                 if (_PyString_Resize(&repr, PyString_GET_SIZE(repr) + 100))
@@ -1930,7 +1938,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
         else if (ch >= 0xD800 && ch < 0xDC00) {
             Py_UNICODE ch2;
             Py_UCS4 ucs;
-           
+
             ch2 = *s++;
             size--;
             if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
@@ -1982,7 +1990,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
              *p++ = 'x';
              *p++ = hexdigit[(ch >> 4) & 0x000F];
              *p++ = hexdigit[ch & 0x000F];
-        } 
+        }
  
          /* Copy everything else as-is */
          else
@@ -2028,7 +2036,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
      const char *bs;
      PyObject *errorHandler = NULL;
      PyObject *exc = NULL;
-    
+
      /* Escaped strings will always be longer than the resulting
         Unicode string, so we start with size here and then reduce the
         length after conversion to the true value. (But decoding error
@@ -2111,7 +2119,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
      Py_XDECREF(errorHandler);
      Py_XDECREF(exc);
      return (PyObject *)v;
-    
+
   onError:
      Py_XDECREF(v);
      Py_XDECREF(errorHandler);
@@ -2154,7 +2162,7 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
              *p++ = hexdigit[(ch >> 8) & 0xf];
              *p++ = hexdigit[(ch >> 4) & 0xf];
              *p++ = hexdigit[ch & 15];
-        } 
+        }
          else
  #endif
         /* Map 16-bit characters to '\uxxxx' */
@@ -2193,7 +2201,7 @@ PyObject *PyUnicode_DecodeLatin1(const char *s,
  {
      PyUnicodeObject *v;
      Py_UNICODE *p;
-    
+
      /* Latin-1 is equivalent to the first 256 ordinals in Unicode. */
      if (size == 1 && *(unsigned char*)s < 256) {
         Py_UNICODE r = *(unsigned char*)s;
@@ -2209,7 +2217,7 @@ PyObject *PyUnicode_DecodeLatin1(const char *s,
      while (size-- > 0)
         *p++ = (unsigned char)*s++;
      return (PyObject *)v;
-    
+
   onError:
      Py_XDECREF(v);
      return NULL;
@@ -2509,13 +2517,13 @@ PyObject *PyUnicode_DecodeASCII(const char *s,
      const char *e;
      PyObject *errorHandler = NULL;
      PyObject *exc = NULL;
-    
+
      /* ASCII is equivalent to the first 128 ordinals in Unicode. */
      if (size == 1 && *(unsigned char*)s < 128) {
         Py_UNICODE r = *(unsigned char*)s;
         return PyUnicode_FromUnicode(&r, 1);
      }
-    
+
      v = _PyUnicode_New(size);
      if (v == NULL)
         goto onError;
@@ -2547,7 +2555,7 @@ PyObject *PyUnicode_DecodeASCII(const char *s,
      Py_XDECREF(errorHandler);
      Py_XDECREF(exc);
      return (PyObject *)v;
-    
+
   onError:
      Py_XDECREF(v);
      Py_XDECREF(errorHandler);
@@ -2665,7 +2673,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
      int extrachars = 0;
      PyObject *errorHandler = NULL;
      PyObject *exc = NULL;
-    
+
      /* Default to Latin-1 */
      if (mapping == NULL)
         return PyUnicode_DecodeLatin1(s, size, errors);
@@ -2738,7 +2746,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
                     int needed = (targetsize - extrachars) + \
                                  (targetsize << 2);
                     extrachars += needed;
-                   if (_PyUnicode_Resize(&v, 
+                   if (_PyUnicode_Resize(&v,
                                          PyUnicode_GET_SIZE(v) + needed) < 0) {
                         Py_DECREF(x);
                         goto onError;
@@ -2769,7 +2777,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
      Py_XDECREF(errorHandler);
      Py_XDECREF(exc);
      return (PyObject *)v;
-    
+
   onError:
      Py_XDECREF(errorHandler);
      Py_XDECREF(exc);
@@ -3421,7 +3429,7 @@ PyObject *PyUnicode_Translate(PyObject *str,
                               const char *errors)
  {
      PyObject *result;
-    
+
      str = PyUnicode_FromObject(str);
      if (str == NULL)
         goto onError;
@@ -3431,12 +3439,12 @@ PyObject *PyUnicode_Translate(PyObject *str,
                                         errors);
      Py_DECREF(str);
      return result;
-    
+
   onError:
      Py_XDECREF(str);
      return NULL;
  }
-    
+
  /* --- Decimal Encoder ---------------------------------------------------- */
  
  int PyUnicode_EncodeDecimal(Py_UNICODE *s,
@@ -3469,7 +3477,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
         Py_UNICODE *uni2;
         Py_UNICODE *collstart;
         Py_UNICODE *collend;
-       
+
         if (Py_UNICODE_ISSPACE(ch)) {
             *output++ = ' ';
             ++p;
@@ -3570,7 +3578,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
  
  /* --- Helpers ------------------------------------------------------------ */
  
-static 
+static
  int count(PyUnicodeObject *self,
           int start,
           int end,
@@ -3610,7 +3618,7 @@ int PyUnicode_Count(PyObject *str,
                     int end)
  {
      int result;
-    
+
      str = PyUnicode_FromObject(str);
      if (str == NULL)
         return -1;
@@ -3619,17 +3627,17 @@ int PyUnicode_Count(PyObject *str,
         Py_DECREF(str);
         return -1;
      }
-    
+
      result = count((PyUnicodeObject *)str,
                    start, end,
                    (PyUnicodeObject *)substr);
-    
+
      Py_DECREF(str);
      Py_DECREF(substr);
      return result;
  }
  
-static 
+static
  int findstring(PyUnicodeObject *self,
                PyUnicodeObject *substring,
                int start,
@@ -3673,7 +3681,7 @@ int PyUnicode_Find(PyObject *str,
                    int direction)
  {
      int result;
-    
+
      str = PyUnicode_FromObject(str);
      if (str == NULL)
         return -2;
@@ -3682,7 +3690,7 @@ int PyUnicode_Find(PyObject *str,
         Py_DECREF(str);
         return -2;
      }
-    
+
      result = findstring((PyUnicodeObject *)str,
                         (PyUnicodeObject *)substr,
                         start, end, direction);
@@ -3691,7 +3699,7 @@ int PyUnicode_Find(PyObject *str,
      return result;
  }
  
-static 
+static
  int tailmatch(PyUnicodeObject *self,
               PyUnicodeObject *substring,
               int start,
@@ -3735,7 +3743,7 @@ int PyUnicode_Tailmatch(PyObject *str,
                         int direction)
  {
      int result;
-    
+
      str = PyUnicode_FromObject(str);
      if (str == NULL)
         return -1;
@@ -3744,7 +3752,7 @@ int PyUnicode_Tailmatch(PyObject *str,
         Py_DECREF(substr);
         return -1;
      }
-    
+
      result = tailmatch((PyUnicodeObject *)str,
                        (PyUnicodeObject *)substr,
                        start, end, direction);
@@ -3753,7 +3761,7 @@ int PyUnicode_Tailmatch(PyObject *str,
      return result;
  }
  
-static 
+static
  const Py_UNICODE *findchar(const Py_UNICODE *s,
                      int size,
                      Py_UNICODE ch)
@@ -3772,7 +3780,7 @@ const Py_UNICODE *findchar(const Py_UNICODE *s,
  /* Apply fixfct filter to the Unicode object self and return a
     reference to the modified object */
  
-static 
+static
  PyObject *fixup(PyUnicodeObject *self,
                 int (*fixfct)(PyUnicodeObject *s))
  {
@@ -3796,16 +3804,16 @@ PyObject *fixup(PyUnicodeObject *self,
      return (PyObject*) u;
  }
  
-static 
+static
  int fixupper(PyUnicodeObject *self)
  {
      int len = self->length;
      Py_UNICODE *s = self->str;
      int status = 0;
-    
+
      while (len-- > 0) {
         register Py_UNICODE ch;
-       
+
         ch = Py_UNICODE_TOUPPER(*s);
         if (ch != *s) {
              status = 1;
@@ -3817,16 +3825,16 @@ int fixupper(PyUnicodeObject *self)
      return status;
  }
  
-static 
+static
  int fixlower(PyUnicodeObject *self)
  {
      int len = self->length;
      Py_UNICODE *s = self->str;
      int status = 0;
-    
+
      while (len-- > 0) {
         register Py_UNICODE ch;
-       
+
         ch = Py_UNICODE_TOLOWER(*s);
         if (ch != *s) {
              status = 1;
@@ -3838,13 +3846,13 @@ int fixlower(PyUnicodeObject *self)
      return status;
  }
  
-static 
+static
  int fixswapcase(PyUnicodeObject *self)
  {
      int len = self->length;
      Py_UNICODE *s = self->str;
      int status = 0;
-    
+
      while (len-- > 0) {
          if (Py_UNICODE_ISUPPER(*s)) {
              *s = Py_UNICODE_TOLOWER(*s);
@@ -3859,13 +3867,13 @@ int fixswapcase(PyUnicodeObject *self)
      return status;
  }
  
-static 
+static
  int fixcapitalize(PyUnicodeObject *self)
  {
      int len = self->length;
      Py_UNICODE *s = self->str;
      int status = 0;
-    
+
      if (len == 0)
         return 0;
      if (Py_UNICODE_ISLOWER(*s)) {
@@ -3900,19 +3908,19 @@ int fixtitle(PyUnicodeObject *self)
         else
             return 0;
      }
-    
+
      e = p + PyUnicode_GET_SIZE(self);
      previous_is_cased = 0;
      for (; p < e; p++) {
         register const Py_UNICODE ch = *p;
-       
+
         if (previous_is_cased)
             *p = Py_UNICODE_TOLOWER(ch);
         else
             *p = Py_UNICODE_TOTITLE(ch);
-       
-       if (Py_UNICODE_ISLOWER(ch) || 
-           Py_UNICODE_ISUPPER(ch) || 
+
+       if (Py_UNICODE_ISLOWER(ch) ||
+           Py_UNICODE_ISUPPER(ch) ||
             Py_UNICODE_ISTITLE(ch))
             previous_is_cased = 1;
         else
@@ -3949,7 +3957,7 @@ PyObject *PyUnicode_Join(PyObject *separator,
         sep = PyUnicode_AS_UNICODE(separator);
         seplen = PyUnicode_GET_SIZE(separator);
      }
-    
+
      res = _PyUnicode_New(sz);
      if (res == NULL)
         goto onError;
@@ -4013,9 +4021,9 @@ PyObject *PyUnicode_Join(PyObject *separator,
      return NULL;
  }
  
-static 
-PyUnicodeObject *pad(PyUnicodeObject *self, 
-                    int left, 
+static
+PyUnicodeObject *pad(PyUnicodeObject *self,
+                    int left,
                      int right,
                      Py_UNICODE fill)
  {
@@ -4112,7 +4120,7 @@ PyObject *PyUnicode_Splitlines(PyObject *string,
  
      for (i = j = 0; i < len; ) {
         int eol;
-       
+
         /* Find a line and append it */
         while (i < len && !Py_UNICODE_ISLINEBREAK(data[i]))
             i++;
@@ -4144,7 +4152,7 @@ PyObject *PyUnicode_Splitlines(PyObject *string,
      return NULL;
  }
  
-static 
+static
  PyObject *split_char(PyUnicodeObject *self,
                      PyObject *list,
                      Py_UNICODE ch,
@@ -4174,7 +4182,7 @@ PyObject *split_char(PyUnicodeObject *self,
      return NULL;
  }
  
-static 
+static
  PyObject *split_substring(PyUnicodeObject *self,
                           PyObject *list,
                           PyUnicodeObject *substring,
@@ -4236,7 +4244,7 @@ PyObject *split(PyUnicodeObject *self,
         return split_substring(self,list,substring,maxcount);
  }
  
-static 
+static
  PyObject *replace(PyUnicodeObject *self,
                   PyUnicodeObject *str1,
                   PyUnicodeObject *str2,
@@ -4259,13 +4267,13 @@ PyObject *replace(PyUnicodeObject *self,
          } else {
             Py_UNICODE u1 = str1->str[0];
             Py_UNICODE u2 = str2->str[0];
-           
+
              u = (PyUnicodeObject*) PyUnicode_FromUnicode(
                  NULL,
                  self->length
                  );
              if (u != NULL) {
-               Py_UNICODE_COPY(u->str, self->str, 
+               Py_UNICODE_COPY(u->str, self->str,
                                 self->length);
                  for (i = 0; i < u->length; i++)
                      if (u->str[i] == u1) {
@@ -4327,7 +4335,7 @@ PyObject *replace(PyUnicodeObject *self,
              }
          }
      }
-    
+
      return (PyObject *) u;
  }
  
@@ -4433,9 +4441,9 @@ unicode_center(PyUnicodeObject *self, PyObject *args)
  
  static short utf16Fixup[32] =
  {
-    0, 0, 0, 0, 0, 0, 0, 0, 
      0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0x2000, -0x800, -0x800, -0x800, -0x800
  };
  
@@ -4449,9 +4457,9 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
  
      len1 = str1->length;
      len2 = str2->length;
-    
+
      while (len1 > 0 && len2 > 0) {
-        Py_UNICODE c1, c2;     
+        Py_UNICODE c1, c2;
  
          c1 = *s1++;
          c2 = *s2++;
@@ -4464,7 +4472,7 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
  
          if (c1 != c2)
              return (c1 < c2) ? -1 : 1;
-        
+
          len1--; len2--;
      }
  
@@ -4483,9 +4491,9 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
  
      len1 = str1->length;
      len2 = str2->length;
-    
+
      while (len1 > 0 && len2 > 0) {
-        Py_UNICODE c1, c2;     
+        Py_UNICODE c1, c2;
  
          c1 = *s1++;
          c2 = *s2++;
@@ -4651,7 +4659,7 @@ unicode_count(PyUnicodeObject *self, PyObject *args)
                                                 (PyObject *)substring);
      if (substring == NULL)
         return NULL;
-    
+
      if (start < 0)
          start += self->length;
      if (start < 0)
@@ -4836,7 +4844,7 @@ unicode_index(PyUnicodeObject *self, PyObject *args)
      if (!PyArg_ParseTuple(args, "O|O&O&:index", &substring,
                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
          return NULL;
-    
+
      substring = (PyUnicodeObject *)PyUnicode_FromObject(
                                                 (PyObject *)substring);
      if (substring == NULL)
@@ -4877,7 +4885,7 @@ unicode_islower(PyUnicodeObject *self)
      cased = 0;
      for (; p < e; p++) {
         register const Py_UNICODE ch = *p;
-       
+
         if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch))
             return PyBool_FromLong(0);
         else if (!cased && Py_UNICODE_ISLOWER(ch))
@@ -4911,7 +4919,7 @@ unicode_isupper(PyUnicodeObject *self)
      cased = 0;
      for (; p < e; p++) {
         register const Py_UNICODE ch = *p;
-       
+
         if (Py_UNICODE_ISLOWER(ch) || Py_UNICODE_ISTITLE(ch))
             return PyBool_FromLong(0);
         else if (!cased && Py_UNICODE_ISUPPER(ch))
@@ -4948,7 +4956,7 @@ unicode_istitle(PyUnicodeObject *self)
      previous_is_cased = 0;
      for (; p < e; p++) {
         register const Py_UNICODE ch = *p;
-       
+
         if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch)) {
             if (previous_is_cased)
                 return PyBool_FromLong(0);
@@ -5428,9 +5436,9 @@ PyObject *PyUnicode_Replace(PyObject *obj,
         Py_DECREF(str1);
         return NULL;
      }
-    result = replace((PyUnicodeObject *)self, 
-                    (PyUnicodeObject *)str1, 
-                    (PyUnicodeObject *)str2, 
+    result = replace((PyUnicodeObject *)self,
+                    (PyUnicodeObject *)str1,
+                    (PyUnicodeObject *)str2,
                      maxcount);
      Py_DECREF(self);
      Py_DECREF(str1);
@@ -5589,7 +5597,7 @@ PyObject *PyUnicode_Split(PyObject *s,
                           int maxsplit)
  {
      PyObject *result;
-    
+
      s = PyUnicode_FromObject(s);
      if (s == NULL)
         return NULL;
@@ -5681,9 +5689,9 @@ are deleted.");
  static PyObject*
  unicode_translate(PyUnicodeObject *self, PyObject *table)
  {
-    return PyUnicode_TranslateCharmap(self->str, 
+    return PyUnicode_TranslateCharmap(self->str,
                                       self->length,
-                                     table, 
+                                     table,
                                       "ignore");
  }
  
@@ -5938,7 +5946,7 @@ unicode_subscript(PyUnicodeObject* self, PyObject* item)
              for (cur = start, i = 0; i < slicelength; cur += step, i++) {
                  result_buf[i] = source_buf[cur];
              }
-            
+
              result = PyUnicode_FromUnicode(result_buf, slicelength);
              PyMem_FREE(result_buf);
              return result;
@@ -5993,7 +6001,7 @@ unicode_buffer_getcharbuf(PyUnicodeObject *self,
                           const void **ptr)
  {
      PyObject *str;
-    
+
      if (index != 0) {
          PyErr_SetString(PyExc_SystemError,
                         "accessing non-existent unicode segment");
@@ -6066,7 +6074,7 @@ formatfloat(Py_UNICODE *buf,
         worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
      char fmt[20];
      double x;
-    
+
      x = PyFloat_AsDouble(v);
      if (x == -1.0 && PyErr_Occurred())
         return -1;
@@ -6087,7 +6095,7 @@ formatfloat(Py_UNICODE *buf,
          len = 1 + 50 + 1 + prec = 52 + prec
  
         If prec=0 the effective precision is 1 (the leading digit is
-       always given), therefore increase the length by one. 
+       always given), therefore increase the length by one.
  
      */
      if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
@@ -6160,7 +6168,7 @@ formatint(Py_UNICODE *buf,
  
      if ((flags & F_ALT) &&
          (type == 'x' || type == 'X')) {
-        /* When converting under %#x or %#X, there are a number 
+        /* When converting under %#x or %#X, there are a number
           * of issues that cause pain:
           * - when 0 is being converted, the C standard leaves off
           *   the '0x' or '0X', which is inconsistent with other
@@ -6170,9 +6178,9 @@ formatint(Py_UNICODE *buf,
           *   convert 0 with the '0x' or '0X'
           *   (Metrowerks, Compaq Tru64)
           * - there are platforms that give '0x' when converting
-         *   under %#X, but convert 0 in accordance with the 
+         *   under %#X, but convert 0 in accordance with the
           *   standard (OS/2 EMX)
-         * 
+         *
           * We can achieve the desired consistency by inserting our
           * own '0x' or '0X' prefix, and substituting %x/%X in place
           * of %#x/%#X.
@@ -6180,12 +6188,12 @@ formatint(Py_UNICODE *buf,
           * Note that this is the same approach as used in
           * formatint() in stringobject.c
           */
-        PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c", 
+        PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
                        type, prec, type);
      }
      else {
          PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
-                      (flags&F_ALT) ? "#" : "", 
+                      (flags&F_ALT) ? "#" : "",
                        prec, type);
      }
      return usprintf(buf, fmt, x);
@@ -6204,7 +6212,7 @@ formatchar(Py_UNICODE *buf,
      }
  
      else if (PyString_Check(v)) {
-       if (PyString_GET_SIZE(v) != 1) 
+       if (PyString_GET_SIZE(v) != 1)
             goto onError;
         buf[0] = (Py_UNICODE)PyString_AS_STRING(v)[0];
      }
@@ -6260,7 +6268,7 @@ PyObject *PyUnicode_Format(PyObject *format,
      PyUnicodeObject *result = NULL;
      PyObject *dict = NULL;
      PyObject *uformat;
-       
+
      if (format == NULL || args == NULL) {
         PyErr_BadInternalCall();
         return NULL;
@@ -6324,7 +6332,7 @@ PyObject *PyUnicode_Format(PyObject *format,
  
                 if (dict == NULL) {
                     PyErr_SetString(PyExc_TypeError,
-                                   "format requires a mapping"); 
+                                   "format requires a mapping");
                     goto onError;
                 }
                 ++fmt;
@@ -6570,7 +6578,7 @@ PyObject *PyUnicode_Format(PyObject *format,
                 PyErr_Format(PyExc_ValueError,
                              "unsupported format character '%c' (0x%x) "
                              "at index %i",
-                            (31<=c && c<=126) ? (char)c : '?', 
+                            (31<=c && c<=126) ? (char)c : '?',
                               (int)c,
                              (int)(fmt -1 - PyUnicode_AS_UNICODE(uformat)));
                 goto onError;
author	Tim Peters <tim.peters@gmail.com>
	Tue, 16 Sep 2003 20:30:58 +0000 (20:30 +0000)
committer	Tim Peters <tim.peters@gmail.com>
	Tue, 16 Sep 2003 20:30:58 +0000 (20:30 +0000)