Marc-Andre Lemburg:

author Guido van Rossum <guido@python.org>

Tue, 28 Mar 2000 20:29:59 +0000 (20:29 +0000)

committer Guido van Rossum <guido@python.org>

Tue, 28 Mar 2000 20:29:59 +0000 (20:29 +0000)
author Guido van Rossum <guido@python.org>
Tue, 28 Mar 2000 20:29:59 +0000 (20:29 +0000)
committer Guido van Rossum <guido@python.org>
Tue, 28 Mar 2000 20:29:59 +0000 (20:29 +0000)
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h

index e9e60d8baed4ccbc85ecbb98c4457c83d3b518ee..cfc812666e396ebac33b614846ae5d0ba408b851 100644 (file)
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -82,6 +82,10 @@ Unicode Integration Proposal (see file Misc/unicode.txt).
  #endif
  
  #ifdef HAVE_WCHAR_H
+/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
+# ifdef _HAVE_BSDI
+#  include <time.h>
+# endif
  # include "wchar.h"
  #endif
  
@@ -562,7 +566,9 @@ extern DL_IMPORT(PyObject *) PyUnicode_TranslateCharmap(
      );
  
  #ifdef MS_WIN32
+
  /* --- MBCS codecs for Windows -------------------------------------------- */
+
  extern DL_IMPORT(PyObject*) PyUnicode_DecodeMBCS(
      const char *string,         /* MBCS encoded string */
      int length,                 /* size of string */
@@ -579,8 +585,8 @@ extern DL_IMPORT(PyObject*) PyUnicode_EncodeMBCS(
      const char *errors          /* error handling */
      );
  
-
  #endif /* MS_WIN32 */
+
  /* --- Methods & Slots ----------------------------------------------------
  
     These are capable of handling Unicode objects and strings on input
diff --git a/Lib/encodings/mbcs.py b/Lib/encodings/mbcs.py

index b7fafbd76452762813f4219f8935363c3f302f9f..5103980ff4733dea4ebda62b27bc6c94744163a1 100644 (file)
--- a/Lib/encodings/mbcs.py
+++ b/Lib/encodings/mbcs.py
@@ -34,4 +34,3 @@ class StreamConverter(StreamWriter,StreamReader):
  def getregentry():
  
      return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
-
diff --git a/Lib/test/output/test_unicode b/Lib/test/output/test_unicode

index 1ec9031045efed28dc1bb8008c5ba6abe3b0ecf4..88e8624bff864292f1fad6896efa9e9cf248204a 100644 (file)
--- a/Lib/test/output/test_unicode
+++ b/Lib/test/output/test_unicode
@@ -1,4 +1,5 @@
  test_unicode
  Testing Unicode comparisons... done.
+Testing Unicode contains method... done.
  Testing Unicode formatting strings... done.
-Testing unicodedata module... done.
+Testing builtin codecs... done.
diff --git a/Lib/test/output/test_unicodedata b/Lib/test/output/test_unicodedata

new file mode 100644 (file)

index 0000000..fc9562f
--- /dev/null
+++ b/Lib/test/output/test_unicodedata
@@ -0,0 +1,2 @@
+test_unicodedata
+Testing unicodedata module... done.
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py

index 3d15f22a4efefd936be6c67504fccf01d5fb22a1..f90887a30700d301bd6a93483e0cc1ea6082b58e 100644 (file)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1,6 +1,5 @@
  """ Test script for the Unicode implementation.
  
-
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
@@ -250,50 +249,6 @@ assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
  assert u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"} == u'abc, def'
  print 'done.'
  
-# Test Unicode database APIs
-try:
-    import unicodedata
-except ImportError:
-    pass
-else:
-    print 'Testing unicodedata module...',
-    
-    assert unicodedata.digit(u'A',None) is None
-    assert unicodedata.digit(u'9') == 9
-    assert unicodedata.digit(u'\u215b',None) is None
-    assert unicodedata.digit(u'\u2468') == 9
-
-    assert unicodedata.numeric(u'A',None) is None
-    assert unicodedata.numeric(u'9') == 9
-    assert unicodedata.numeric(u'\u215b') == 0.125
-    assert unicodedata.numeric(u'\u2468') == 9.0
-
-    assert unicodedata.decimal(u'A',None) is None
-    assert unicodedata.decimal(u'9') == 9
-    assert unicodedata.decimal(u'\u215b',None) is None
-    assert unicodedata.decimal(u'\u2468',None) is None
-
-    assert unicodedata.category(u'\uFFFE') == 'Cn'
-    assert unicodedata.category(u'a') == 'Ll'
-    assert unicodedata.category(u'A') == 'Lu'
-
-    assert unicodedata.bidirectional(u'\uFFFE') == ''
-    assert unicodedata.bidirectional(u' ') == 'WS'
-    assert unicodedata.bidirectional(u'A') == 'L'
-
-    assert unicodedata.decomposition(u'\uFFFE') == ''
-    assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'
-
-    assert unicodedata.mirrored(u'\uFFFE') == 0
-    assert unicodedata.mirrored(u'a') == 0
-    assert unicodedata.mirrored(u'\u2201') == 1
-
-    assert unicodedata.combining(u'\uFFFE') == 0
-    assert unicodedata.combining(u'a') == 0
-    assert unicodedata.combining(u'\u20e1') == 230
-    
-    print 'done.'
-
  # Test builtin codecs
  print 'Testing builtin codecs...',
  
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py

new file mode 100644 (file)

index 0000000..6ddd077
--- /dev/null
+++ b/Lib/test/test_unicodedata.py
@@ -0,0 +1,50 @@
+""" Test script for the unicodedata module.
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+
+"""#"
+from test_support import verbose
+import sys
+
+# Test Unicode database APIs
+import unicodedata
+
+print 'Testing unicodedata module...',
+
+assert unicodedata.digit(u'A',None) is None
+assert unicodedata.digit(u'9') == 9
+assert unicodedata.digit(u'\u215b',None) is None
+assert unicodedata.digit(u'\u2468') == 9
+
+assert unicodedata.numeric(u'A',None) is None
+assert unicodedata.numeric(u'9') == 9
+assert unicodedata.numeric(u'\u215b') == 0.125
+assert unicodedata.numeric(u'\u2468') == 9.0
+
+assert unicodedata.decimal(u'A',None) is None
+assert unicodedata.decimal(u'9') == 9
+assert unicodedata.decimal(u'\u215b',None) is None
+assert unicodedata.decimal(u'\u2468',None) is None
+
+assert unicodedata.category(u'\uFFFE') == 'Cn'
+assert unicodedata.category(u'a') == 'Ll'
+assert unicodedata.category(u'A') == 'Lu'
+
+assert unicodedata.bidirectional(u'\uFFFE') == ''
+assert unicodedata.bidirectional(u' ') == 'WS'
+assert unicodedata.bidirectional(u'A') == 'L'
+
+assert unicodedata.decomposition(u'\uFFFE') == ''
+assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'
+
+assert unicodedata.mirrored(u'\uFFFE') == 0
+assert unicodedata.mirrored(u'a') == 0
+assert unicodedata.mirrored(u'\u2201') == 1
+
+assert unicodedata.combining(u'\uFFFE') == 0
+assert unicodedata.combining(u'a') == 0
+assert unicodedata.combining(u'\u20e1') == 230
+
+print 'done.'
diff --git a/Misc/unicode.txt b/Misc/unicode.txt

index fc1f2c5a24939e5d08a44164bbaabc2e17a4a587..ce74c05bd190087ac6c94f014d9918ea0aa08d8b 100644 (file)
--- a/Misc/unicode.txt
+++ b/Misc/unicode.txt
@@ -740,8 +740,8 @@ These markers are used by the PyArg_ParseTuple() APIs:
         On output, a buffer of the needed size is allocated and
         returned through *buffer as NULL-terminated string.
         The encoded may not contain embedded NULL characters.
-       The caller is responsible for free()ing the allocated *buffer
-       after usage.
+       The caller is responsible for calling PyMem_Free()
+       to free the allocated *buffer after usage.
  
    "es#":
         Takes three parameters: encoding (const char *),
@@ -755,8 +755,9 @@ These markers are used by the PyArg_ParseTuple() APIs:
  
         If *buffer is NULL, a buffer of the needed size is
         allocated and output copied into it. *buffer is then
-       updated to point to the allocated memory area. The caller
-       is responsible for free()ing *buffer after usage.
+       updated to point to the allocated memory area.
+       The caller is responsible for calling PyMem_Free()
+       to free the allocated *buffer after usage.
  
         In both cases *buffer_len is updated to the number of
         characters written (excluding the trailing NULL-byte).
@@ -784,7 +785,7 @@ Using "es#" with auto-allocation:
             return NULL;
         }
         str = PyString_FromStringAndSize(buffer, buffer_len);
-       free(buffer);
+       PyMem_Free(buffer);
         return str;
      }
  
@@ -807,7 +808,7 @@ Using "es" with auto-allocation returning a NULL-terminated string:
             return NULL;
         }
         str = PyString_FromString(buffer);
-       free(buffer);
+       PyMem_Free(buffer);
         return str;
      }
  
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c

index 6c8a2d44e6d9533ddb02b104f4863113ba5b0a4e..4f368f8b8fcc2609be7f2b7bbad2fe9be01d7c27 100644 (file)
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -286,6 +286,26 @@ charmap_decode(PyObject *self,
                        size);
  }
  
+#ifdef MS_WIN32
+
+static PyObject *
+mbcs_decode(PyObject *self,
+           PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    
+    if (!PyArg_ParseTuple(args, "t#|z:mbcs_decode",
+                         &data, &size, &errors))
+       return NULL;
+
+    return codec_tuple(PyUnicode_DecodeMBCS(data, size, errors),
+                      size);
+}
+
+#endif /* MS_WIN32 */
+
  /* --- Encoder ------------------------------------------------------------ */
  
  static PyObject *
@@ -491,6 +511,28 @@ charmap_encode(PyObject *self,
                        PyUnicode_GET_SIZE(str));
  }
  
+#ifdef MS_WIN32
+
+static PyObject *
+mbcs_encode(PyObject *self,
+           PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "U|z:mbcs_encode",
+                         &str, &errors))
+       return NULL;
+
+    return codec_tuple(PyUnicode_EncodeMBCS(
+                              PyUnicode_AS_UNICODE(str), 
+                              PyUnicode_GET_SIZE(str),
+                              errors),
+                      PyUnicode_GET_SIZE(str));
+}
+
+#endif /* MS_WIN32 */
+
  /* --- Module API --------------------------------------------------------- */
  
  static PyMethodDef _codecs_functions[] = {
@@ -519,6 +561,10 @@ static PyMethodDef _codecs_functions[] = {
      {"charmap_decode",                 charmap_decode,                 1},
      {"readbuffer_encode",      readbuffer_encode,              1},
      {"charbuffer_encode",      charbuffer_encode,              1},
+#ifdef MS_WIN32
+    {"mbcs_encode",            mbcs_encode,                    1},
+    {"mbcs_decode",            mbcs_decode,                    1},
+#endif
      {NULL, NULL}               /* sentinel */
  };
  
diff --git a/Python/getargs.c b/Python/getargs.c

index 27a69d05951065d12db19ff4b3a5391d5fdd4bf9..91fe26736b5b4c2b55c87ac3a8cf82270a79779b 100644 (file)
--- a/Python/getargs.c
+++ b/Python/getargs.c
@@ -704,7 +704,7 @@ convertsimple1(arg, p_format, p_va)
                                    the data copied into it; *buffer is
                                    updated to point to the new buffer;
                                    the caller is responsible for
-                                  free()ing it after usage
+                                  PyMem_Free()ing it after usage
  
                                    - if *buffer is not NULL, the data
                                    is copied to *buffer; *buffer_len
@@ -752,7 +752,7 @@ convertsimple1(arg, p_format, p_va)
                                    is allocated and the data copied
                                    into it; *buffer is updated to
                                    point to the new buffer; the caller
-                                  is responsible for free()ing it
+                                  is responsible for PyMem_Free()ing it
                                    after usage
  
                                  */
author	Guido van Rossum <guido@python.org>
	Tue, 28 Mar 2000 20:29:59 +0000 (20:29 +0000)
committer	Guido van Rossum <guido@python.org>
	Tue, 28 Mar 2000 20:29:59 +0000 (20:29 +0000)
Include/unicodeobject.h		patch \| blob \| history
Lib/encodings/mbcs.py		patch \| blob \| history
Lib/test/output/test_unicode		patch \| blob \| history
Lib/test/output/test_unicodedata	[new file with mode: 0644]	patch \| blob
Lib/test/test_unicode.py		patch \| blob \| history
Lib/test/test_unicodedata.py	[new file with mode: 0644]	patch \| blob
Misc/unicode.txt		patch \| blob \| history
Modules/_codecsmodule.c		patch \| blob \| history
Python/getargs.c		patch \| blob \| history