Teach PyString_FromFormat, PyErr_Format, and PyString_FromFormatV

author Tim Peters <tim.peters@gmail.com>

Sat, 13 May 2006 23:28:20 +0000 (23:28 +0000)

committer Tim Peters <tim.peters@gmail.com>

Sat, 13 May 2006 23:28:20 +0000 (23:28 +0000)
author Tim Peters <tim.peters@gmail.com>
Sat, 13 May 2006 23:28:20 +0000 (23:28 +0000)
committer Tim Peters <tim.peters@gmail.com>
Sat, 13 May 2006 23:28:20 +0000 (23:28 +0000)
diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex

index c3e1fbd3044e6ddbc8984dc54ba970fc579cb830..9a5d3eb05222c697eec0a020f6bc96b6933c17bb 100644 (file)
--- a/Doc/api/concrete.tex
+++ b/Doc/api/concrete.tex
@@ -245,7 +245,7 @@ booleans.  The following macros are available, however.
  \end{csimplemacrodesc}
  
  \begin{cfuncdesc}{PyObject*}{PyBool_FromLong}{long v}
-  Return a new reference to \constant{Py_True} or \constant{Py_False} 
+  Return a new reference to \constant{Py_True} or \constant{Py_False}
    depending on the truth value of \var{v}.
  \versionadded{2.3}
  \end{cfuncdesc}
@@ -618,12 +618,24 @@ parameter and are called with a non-string parameter.
    exactly to the format characters in the \var{format} string.  The
    following format characters are allowed:
  
+  % This should be exactly the same as the table in PyErr_Format.
+  % One should just refer to the other.
+
+  % The descriptions for %zd and %zu are wrong, but the truth is complicated
+  % because not all compilers support the %z width modifier -- we fake it
+  % when necessary via interpolating PY_FORMAT_SIZE_T.
+
+  % %u, %lu, %zu should have "new in Python 2.5" blurbs.
+
    \begin{tableiii}{l|l|l}{member}{Format Characters}{Type}{Comment}
      \lineiii{\%\%}{\emph{n/a}}{The literal \% character.}
      \lineiii{\%c}{int}{A single character, represented as an C int.}
      \lineiii{\%d}{int}{Exactly equivalent to \code{printf("\%d")}.}
+    \lineiii{\%u}{unsigned int}{Exactly equivalent to \code{printf("\%u")}.}
      \lineiii{\%ld}{long}{Exactly equivalent to \code{printf("\%ld")}.}
-    \lineiii{\%zd}{long}{Exactly equivalent to \code{printf("\%zd")}.}
+    \lineiii{\%lu}{unsigned long}{Exactly equivalent to \code{printf("\%lu")}.}
+    \lineiii{\%zd}{Py_ssize_t}{Exactly equivalent to \code{printf("\%zd")}.}
+    \lineiii{\%zu}{ssize_t}{Exactly equivalent to \code{printf("\%zu")}.}
      \lineiii{\%i}{int}{Exactly equivalent to \code{printf("\%i")}.}
      \lineiii{\%x}{int}{Exactly equivalent to \code{printf("\%x")}.}
      \lineiii{\%s}{char*}{A null-terminated C character array.}
@@ -632,6 +644,10 @@ parameter and are called with a non-string parameter.
         guaranteed to start with the literal \code{0x} regardless of
         what the platform's \code{printf} yields.}
    \end{tableiii}
+
+  An unrecognized format character causes all the rest of the format
+  string to be copied as-is to the result string, and any extra
+  arguments discarded.
  \end{cfuncdesc}
  
  \begin{cfuncdesc}{PyObject*}{PyString_FromFormatV}{const char *format,
@@ -687,7 +703,7 @@ parameter and are called with a non-string parameter.
    \var{size})}.  It must not be deallocated.  If \var{string} is a
    Unicode object, this function computes the default encoding of
    \var{string} and operates on that.  If \var{string} is not a string
-  object at all, \cfunction{PyString_AsStringAndSize()} returns 
+  object at all, \cfunction{PyString_AsStringAndSize()} returns
    \code{-1} and raises \exception{TypeError}.
  \end{cfuncdesc}
  
@@ -1494,7 +1510,7 @@ They all return \NULL{} or \code{-1} if an exception occurs.
    Return 1 if \var{substr} matches \var{str}[\var{start}:\var{end}] at
    the given tail end (\var{direction} == -1 means to do a prefix
    match, \var{direction} == 1 a suffix match), 0 otherwise.
-  Return \code{-1} if an error occurred.                         
+  Return \code{-1} if an error occurred.
  \end{cfuncdesc}
  
  \begin{cfuncdesc}{Py_ssize_t}{PyUnicode_Find}{PyObject *str,
@@ -3013,7 +3029,7 @@ Macros for the convenience of modules implementing the DB API:
  
  
  \subsection{Set Objects \label{setObjects}}
-\sectionauthor{Raymond D. Hettinger}{python@rcn.com}                     
+\sectionauthor{Raymond D. Hettinger}{python@rcn.com}
  
  \obindex{set}
  \obindex{frozenset}
@@ -3022,8 +3038,8 @@ Macros for the convenience of modules implementing the DB API:
  This section details the public API for \class{set} and \class{frozenset}
  objects.  Any functionality not listed below is best accessed using the
  either the abstract object protocol (including
-\cfunction{PyObject_CallMethod()}, \cfunction{PyObject_RichCompareBool()}, 
-\cfunction{PyObject_Hash()}, \cfunction{PyObject_Repr()}, 
+\cfunction{PyObject_CallMethod()}, \cfunction{PyObject_RichCompareBool()},
+\cfunction{PyObject_Hash()}, \cfunction{PyObject_Repr()},
  \cfunction{PyObject_IsTrue()}, \cfunction{PyObject_Print()}, and
  \cfunction{PyObject_GetIter()})
  or the abstract number protocol (including
@@ -3040,7 +3056,7 @@ or the abstract number protocol (including
    block of memory for medium and large sized sets (much like list storage).
    None of the fields of this structure should be considered public and
    are subject to change.  All access should be done through the
-  documented API rather than by manipulating the values in the structure. 
+  documented API rather than by manipulating the values in the structure.
  
  \end{ctypedesc}
  
@@ -3059,7 +3075,7 @@ The following type check macros work on pointers to any Python object.
  Likewise, the constructor functions work with any iterable Python object.
  
  \begin{cfuncdesc}{int}{PyAnySet_Check}{PyObject *p}
-  Return true if \var{p} is a \class{set} object, a \class{frozenset} 
+  Return true if \var{p} is a \class{set} object, a \class{frozenset}
    object, or an instance of a subtype.
  \end{cfuncdesc}
  
@@ -3112,7 +3128,7 @@ The following functions and macros are available for instances of
    function does not automatically convert unhashable sets into temporary
    frozensets.  Raise a \exception{TypeError} if the \var{key} is unhashable.
    Raise \exception{PyExc_SystemError} if \var{anyset} is not a \class{set},
-  \class{frozenset}, or an instance of a subtype.                         
+  \class{frozenset}, or an instance of a subtype.
  \end{cfuncdesc}
  
  The following functions are available for instances of \class{set} or
@@ -3134,7 +3150,7 @@ its subtypes but not for instances of \class{frozenset} or its subtypes.
    unhashable.  Unlike the Python \method{discard()} method, this function
    does not automatically convert unhashable sets into temporary frozensets.
    Raise \exception{PyExc_SystemError} if \var{set} is an not an instance
-  of \class{set} or its subtype.                         
+  of \class{set} or its subtype.
  \end{cfuncdesc}
  
  \begin{cfuncdesc}{PyObject*}{PySet_Pop}{PyObject *set}
@@ -3142,7 +3158,7 @@ its subtypes but not for instances of \class{frozenset} or its subtypes.
    and removes the object from the \var{set}.  Return \NULL{} on
    failure.  Raise \exception{KeyError} if the set is empty.
    Raise a \exception{SystemError} if \var{set} is an not an instance
-  of \class{set} or its subtype.                        
+  of \class{set} or its subtype.
  \end{cfuncdesc}
  
  \begin{cfuncdesc}{int}{PySet_Clear}{PyObject *set}
diff --git a/Doc/api/exceptions.tex b/Doc/api/exceptions.tex

index c4727f2f93de6a7471b5e74042d55b400c8cce09..79428125571545fce2b0254f6b5beba8499a1051 100644 (file)
--- a/Doc/api/exceptions.tex
+++ b/Doc/api/exceptions.tex
@@ -135,13 +135,32 @@ for each thread.
    codes, similar to \cfunction{printf()}. The \code{width.precision}
    before a format code is parsed, but the width part is ignored.
  
-  \begin{tableii}{c|l}{character}{Character}{Meaning}
-    \lineii{c}{Character, as an \ctype{int} parameter}
-    \lineii{d}{Number in decimal, as an \ctype{int} parameter}
-    \lineii{x}{Number in hexadecimal, as an \ctype{int} parameter}
-    \lineii{s}{A string, as a \ctype{char *} parameter}
-    \lineii{p}{A hex pointer, as a \ctype{void *} parameter}
-  \end{tableii}
+  % This should be exactly the same as the table in PyString_FromFormat.
+  % One should just refer to the other.
+
+  % The descriptions for %zd and %zu are wrong, but the truth is complicated
+  % because not all compilers support the %z width modifier -- we fake it
+  % when necessary via interpolating PY_FORMAT_SIZE_T.
+
+  % %u, %lu, %zu should have "new in Python 2.5" blurbs.
+
+  \begin{tableiii}{l|l|l}{member}{Format Characters}{Type}{Comment}
+    \lineiii{\%\%}{\emph{n/a}}{The literal \% character.}
+    \lineiii{\%c}{int}{A single character, represented as an C int.}
+    \lineiii{\%d}{int}{Exactly equivalent to \code{printf("\%d")}.}
+    \lineiii{\%u}{unsigned int}{Exactly equivalent to \code{printf("\%u")}.}
+    \lineiii{\%ld}{long}{Exactly equivalent to \code{printf("\%ld")}.}
+    \lineiii{\%lu}{unsigned long}{Exactly equivalent to \code{printf("\%lu")}.}
+    \lineiii{\%zd}{Py_ssize_t}{Exactly equivalent to \code{printf("\%zd")}.}
+    \lineiii{\%zu}{ssize_t}{Exactly equivalent to \code{printf("\%zu")}.}
+    \lineiii{\%i}{int}{Exactly equivalent to \code{printf("\%i")}.}
+    \lineiii{\%x}{int}{Exactly equivalent to \code{printf("\%x")}.}
+    \lineiii{\%s}{char*}{A null-terminated C character array.}
+    \lineiii{\%p}{void*}{The hex representation of a C pointer.
+       Mostly equivalent to \code{printf("\%p")} except that it is
+       guaranteed to start with the literal \code{0x} regardless of
+       what the platform's \code{printf} yields.}
+  \end{tableiii}
  
    An unrecognized format character causes all the rest of the format
    string to be copied as-is to the result string, and any extra
@@ -275,8 +294,8 @@ for each thread.
    command line documentation.  There is no C API for warning control.
  \end{cfuncdesc}
  
-\begin{cfuncdesc}{int}{PyErr_WarnExplicit}{PyObject *category, 
-                const char *message, const char *filename, int lineno, 
+\begin{cfuncdesc}{int}{PyErr_WarnExplicit}{PyObject *category,
+                const char *message, const char *filename, int lineno,
                  const char *module, PyObject *registry}
    Issue a warning message with explicit control over all warning
    attributes.  This is a straightforward wrapper around the Python
@@ -402,5 +421,5 @@ are derived from \exception{BaseException}.
  \withsubitem{(built-in exception)}{\ttindex{BaseException}}
  
  String exceptions are still supported in the interpreter to allow
-existing code to run unmodified, but this will also change in a future 
+existing code to run unmodified, but this will also change in a future
  release.
diff --git a/Misc/NEWS b/Misc/NEWS

index a7269afab9964016abf59efca9b383f99696b108..de5e5192ed12a3a2e15fa9c425a3a2a1e4131249 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -72,7 +72,7 @@ Core and builtins
  Extension Modules
  -----------------
  
-- On Win32, os.listdir now supports arbitrarily-long Unicode path names 
+- On Win32, os.listdir now supports arbitrarily-long Unicode path names
    (up to the system limit of 32K characters).
  
  - Use Win32 API to implement os.{access,chdir,chmod,mkdir,remove,rename,rmdir,utime}.
@@ -200,6 +200,10 @@ Build
  C API
  -----
  
+- ``PyString_FromFormat``, ``PyErr_Format``, and ``PyString_FromFormatV``
+  now accept formats "%u" for unsigned ints, "%lu" for unsigned longs,
+  and "%zu" for unsigned integers of type ``size_t``.
+
  Tests
  -----
  
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c

index e8881dc250de844b8b4a80466222121d6b039314..a74e76164ebb0e56e7a75e2ee1a4512a0f71aaa2 100644 (file)
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -486,8 +486,8 @@ test_u_code(PyObject *self)
         return Py_None;
  }
  
-static
-PyObject *codec_incrementalencoder(PyObject *self, PyObject *args)
+static PyObject *
+codec_incrementalencoder(PyObject *self, PyObject *args)
  {
         const char *encoding, *errors = NULL;
         if (!PyArg_ParseTuple(args, "s|s:test_incrementalencoder",
@@ -496,8 +496,8 @@ PyObject *codec_incrementalencoder(PyObject *self, PyObject *args)
         return PyCodec_IncrementalEncoder(encoding, errors);
  }
  
-static
-PyObject *codec_incrementaldecoder(PyObject *self, PyObject *args)
+static PyObject *
+codec_incrementaldecoder(PyObject *self, PyObject *args)
  {
         const char *encoding, *errors = NULL;
         if (!PyArg_ParseTuple(args, "s|s:test_incrementaldecoder",
@@ -660,6 +660,44 @@ test_thread_state(PyObject *self, PyObject *args)
  }
  #endif
  
+/* Some tests of PyString_FromFormat().  This needs more tests.
+ * PyString_FromFormat() also needs docs.
+ */
+static PyObject *
+test_string_from_format(PyObject *self, PyObject *args)
+{
+       PyObject *result;
+       char *msg;
+
+#define CHECK_1_FORMAT(FORMAT, TYPE)                   \
+       result = PyString_FromFormat(FORMAT, (TYPE)1);  \
+       if (result == NULL)                             \
+               return NULL;                            \
+       if (strcmp(PyString_AsString(result), "1")) {   \
+               msg = FORMAT " failed at 1";            \
+               goto Fail;                              \
+       }                                               \
+       Py_DECREF(result)
+
+       CHECK_1_FORMAT("%d", int);
+       CHECK_1_FORMAT("%ld", long);
+       /* The z width modifier was added in Python 2.5. */
+       CHECK_1_FORMAT("%zd", Py_ssize_t);
+
+       /* The u type code was added in Python 2.5. */
+       CHECK_1_FORMAT("%u", unsigned int);
+       CHECK_1_FORMAT("%lu", unsigned long);
+       CHECK_1_FORMAT("%zu", size_t);
+
+       Py_RETURN_NONE;
+
+ Fail:
+       Py_XDECREF(result);
+       return raiseTestError("test_string_from_format", msg);
+
+#undef CHECK_1_FORMAT
+}
+
  static PyMethodDef TestMethods[] = {
         {"raise_exception",     raise_exception,                 METH_VARARGS},
         {"test_config",         (PyCFunction)test_config,        METH_NOARGS},
@@ -669,6 +707,7 @@ static PyMethodDef TestMethods[] = {
         {"test_long_numbits",   (PyCFunction)test_long_numbits,  METH_NOARGS},
         {"test_k_code",         (PyCFunction)test_k_code,        METH_NOARGS},
         {"test_null_strings",   (PyCFunction)test_null_strings,  METH_NOARGS},
+       {"test_string_from_format", (PyCFunction)test_string_from_format, METH_NOARGS},
  
         {"getargs_b",           getargs_b,                       METH_VARARGS},
         {"getargs_B",           getargs_B,                       METH_VARARGS},
diff --git a/Objects/stringobject.c b/Objects/stringobject.c

index 01a4bb4c9a30d4ee04c4515fca964619767f8cd4..536caeff972fa64cd0e6e7102eea053f3a32ef45 100644 (file)
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -176,14 +176,11 @@ PyString_FromFormatV(const char *format, va_list vargs)
                         while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
                                 ;
  
-                       /* skip the 'l' in %ld, since it doesn't change the
-                          width.  although only %d is supported (see
-                          "expand" section below), others can be easily
-                          added */
-                       if (*f == 'l' && *(f+1) == 'd')
-                               ++f;
-                       /* likewise for %zd */
-                       if (*f == 'z' && *(f+1) == 'd')
+                       /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
+                        * they don't affect the amount of space we reserve.
+                        */
+                       if ((*f == 'l' || *f == 'z') &&
+                                       (f[1] == 'd' || f[1] == 'u'))
                                 ++f;
  
                         switch (*f) {
@@ -193,7 +190,7 @@ PyString_FromFormatV(const char *format, va_list vargs)
                         case '%':
                                 n++;
                                 break;
-                       case 'd': case 'i': case 'x':
+                       case 'd': case 'u': case 'i': case 'x':
                                 (void) va_arg(count, int);
                                 /* 20 bytes is enough to hold a 64-bit
                                    integer.  Decimal takes the most space.
@@ -255,14 +252,14 @@ PyString_FromFormatV(const char *format, va_list vargs)
                         }
                         while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
                                 f++;
-                       /* handle the long flag, but only for %ld.  others
-                          can be added when necessary. */
-                       if (*f == 'l' && *(f+1) == 'd') {
+                       /* handle the long flag, but only for %ld and %lu.
+                          others can be added when necessary. */
+                       if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
                                 longflag = 1;
                                 ++f;
                         }
                         /* handle the size_t flag. */
-                       if (*f == 'z' && *(f+1) == 'd') {
+                       if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
                                 size_tflag = 1;
                                 ++f;
                         }
@@ -281,6 +278,18 @@ PyString_FromFormatV(const char *format, va_list vargs)
                                         sprintf(s, "%d", va_arg(vargs, int));
                                 s += strlen(s);
                                 break;
+                       case 'u':
+                               if (longflag)
+                                       sprintf(s, "%lu",
+                                               va_arg(vargs, unsigned long));
+                               else if (size_tflag)
+                                       sprintf(s, "%" PY_FORMAT_SIZE_T "u",
+                                               va_arg(vargs, size_t));
+                               else
+                                       sprintf(s, "%u",
+                                               va_arg(vargs, unsigned int));
+                               s += strlen(s);
+                               break;
                         case 'i':
                                 sprintf(s, "%i", va_arg(vargs, int));
                                 s += strlen(s);
author	Tim Peters <tim.peters@gmail.com>
	Sat, 13 May 2006 23:28:20 +0000 (23:28 +0000)
committer	Tim Peters <tim.peters@gmail.com>
	Sat, 13 May 2006 23:28:20 +0000 (23:28 +0000)
Doc/api/concrete.tex		patch \| blob \| history
Doc/api/exceptions.tex		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Modules/_testcapimodule.c		patch \| blob \| history
Objects/stringobject.c		patch \| blob \| history