]> granicus.if.org Git - python/commitdiff
Cut disused recode_encoding logic in _PyBytes_DecodeEscape. (GH-16013)
authorGreg Price <gnprice@gmail.com>
Thu, 12 Sep 2019 18:12:22 +0000 (11:12 -0700)
committerBenjamin Peterson <benjamin@python.org>
Thu, 12 Sep 2019 18:12:22 +0000 (19:12 +0100)
All call sites pass NULL for `recode_encoding`, so this path is
completely untested.  That's been true since before Python 3.0.
It adds significant complexity to this logic, so it's best to
take it out.

All call sites now have a literal NULL, and that's been true since
commit 768921cf3 eliminated a conditional (`foo ? bar : NULL`) at
the call site in Python/ast.c where we're parsing a bytes literal.
But even before then, that condition `foo` had been a constant
since unadorned string literals started meaning Unicode, in commit
572dbf8f1 aka v3.0a1~1035 .

The `unicode` parameter is already unused, so mark it as unused too.
The code that acted on it was also taken out before Python 3.0, in
commit 8d30cc014 aka v3.0a1~1031 .

The function (PyBytes_DecodeEscape) is exposed in the API, but it's
never been documented.

Include/bytesobject.h
Include/longobject.h
Objects/bytesobject.c
Python/ast.c

index 3fde4a221fdb18b5e50e5eb3936675f553ed00ea..fc9981e56d277416da6828635d3e8f0ad8037fb2 100644 (file)
@@ -77,9 +77,7 @@ PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t,
 #ifndef Py_LIMITED_API
 /* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */
 PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
-                                             const char *, Py_ssize_t,
-                                             const char *,
-                                             const char **);
+                                             const char *, const char **);
 #endif
 
 /* Macro, trading safety for speed */
index 1e7a58d994b8a486e23a4f71cab0ff90d5560d2f..87b4d017d3234e5e84ecc040f44f7f652fa85dc2 100644 (file)
@@ -74,7 +74,7 @@ PyAPI_FUNC(int) _PyLong_Size_t_Converter(PyObject *, void *);
 #endif
 
 /* Used by Python/mystrtoul.c, _PyBytes_FromHex(),
-   _PyBytes_DecodeEscapeRecode(), etc. */
+   _PyBytes_DecodeEscape(), etc. */
 #ifndef Py_LIMITED_API
 PyAPI_DATA(unsigned char) _PyLong_DigitValue[256];
 #endif
index e1f5ee2f62f336e595604ddddb6c644d0a66dd24..4b2a77b4b8c16f1c18ffc6d766d52c77c28a2d22 100644 (file)
@@ -1077,52 +1077,10 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
     return NULL;
 }
 
-/* Unescape a backslash-escaped string. If unicode is non-zero,
-   the string is a u-literal. If recode_encoding is non-zero,
-   the string is UTF-8 encoded and should be re-encoded in the
-   specified encoding.  */
-
-static char *
-_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
-                            const char *errors, const char *recode_encoding,
-                            _PyBytesWriter *writer, char *p)
-{
-    PyObject *u, *w;
-    const char* t;
-
-    t = *s;
-    /* Decode non-ASCII bytes as UTF-8. */
-    while (t < end && (*t & 0x80))
-        t++;
-    u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
-    if (u == NULL)
-        return NULL;
-
-    /* Recode them in target encoding. */
-    w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
-    Py_DECREF(u);
-    if  (w == NULL)
-        return NULL;
-    assert(PyBytes_Check(w));
-
-    /* Append bytes to output buffer. */
-    writer->min_size--;   /* subtract 1 preallocated byte */
-    p = _PyBytesWriter_WriteBytes(writer, p,
-                                  PyBytes_AS_STRING(w),
-                                  PyBytes_GET_SIZE(w));
-    Py_DECREF(w);
-    if (p == NULL)
-        return NULL;
-
-    *s = t;
-    return p;
-}
-
+/* Unescape a backslash-escaped string. */
 PyObject *_PyBytes_DecodeEscape(const char *s,
                                 Py_ssize_t len,
                                 const char *errors,
-                                Py_ssize_t unicode,
-                                const char *recode_encoding,
                                 const char **first_invalid_escape)
 {
     int c;
@@ -1142,17 +1100,7 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
     end = s + len;
     while (s < end) {
         if (*s != '\\') {
-            if (!(recode_encoding && (*s & 0x80))) {
-                *p++ = *s++;
-            }
-            else {
-                /* non-ASCII character and need to recode */
-                p = _PyBytes_DecodeEscapeRecode(&s, end,
-                                                errors, recode_encoding,
-                                                &writer, p);
-                if (p == NULL)
-                    goto failed;
-            }
+            *p++ = *s++;
             continue;
         }
 
@@ -1241,12 +1189,11 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
 PyObject *PyBytes_DecodeEscape(const char *s,
                                 Py_ssize_t len,
                                 const char *errors,
-                                Py_ssize_t unicode,
-                                const char *recode_encoding)
+                                Py_ssize_t Py_UNUSED(unicode),
+                                const char *Py_UNUSED(recode_encoding))
 {
     const char* first_invalid_escape;
-    PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
-                                             recode_encoding,
+    PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
                                              &first_invalid_escape);
     if (result == NULL)
         return NULL;
index e6f71671c18e75c344fd3265db8799827fcb2f81..05147a49fc1dc6770302d1f2d82e043dc88d437e 100644 (file)
@@ -4766,7 +4766,7 @@ decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
                           size_t len)
 {
     const char *first_invalid_escape;
-    PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
+    PyObject *result = _PyBytes_DecodeEscape(s, len, NULL,
                                              &first_invalid_escape);
     if (result == NULL)
         return NULL;