From 2e2f52a4a0fd560d37a11a7383a4d52b63ca6cfc Mon Sep 17 00:00:00 2001 From: Bram Moolenaar Date: Mon, 21 Dec 2020 16:03:02 +0100 Subject: [PATCH] patch 8.2.2178: Python 3: non-utf8 character cannot be handled MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Problem: Python 3: non-utf8 character cannot be handled. Solution: Change the string decode. (Björn Linse, closes #1053) --- src/if_py_both.h | 12 +++++++----- src/if_python.c | 4 ++++ src/if_python3.c | 11 +++++++---- src/testdir/test_python2.vim | 7 +++++++ src/testdir/test_python3.vim | 7 +++++++ src/version.c | 2 ++ 6 files changed, 34 insertions(+), 9 deletions(-) diff --git a/src/if_py_both.h b/src/if_py_both.h index 7b748b25e..2903b0ba9 100644 --- a/src/if_py_both.h +++ b/src/if_py_both.h @@ -130,10 +130,11 @@ StringToChars(PyObject *obj, PyObject **todecref) { PyObject *bytes; - if (!(bytes = PyUnicode_AsEncodedString(obj, ENC_OPT, NULL))) + if (!(bytes = PyUnicode_AsEncodedString(obj, ENC_OPT, + ERRORS_ENCODE_ARG))) return NULL; - if(PyBytes_AsStringAndSize(bytes, (char **) &str, NULL) == -1 + if (PyBytes_AsStringAndSize(bytes, (char **) &str, NULL) == -1 || str == NULL) { Py_DECREF(bytes); @@ -4243,7 +4244,8 @@ StringToLine(PyObject *obj) } else if (PyUnicode_Check(obj)) { - if (!(bytes = PyUnicode_AsEncodedString(obj, ENC_OPT, NULL))) + if (!(bytes = PyUnicode_AsEncodedString(obj, ENC_OPT, + ERRORS_ENCODE_ARG))) return NULL; if (PyBytes_AsStringAndSize(bytes, &str, &len) == -1 @@ -6290,11 +6292,11 @@ _ConvertFromPyObject(PyObject *obj, typval_T *tv, PyObject *lookup_dict) PyObject *bytes; char_u *str; - bytes = PyUnicode_AsEncodedString(obj, ENC_OPT, NULL); + bytes = PyUnicode_AsEncodedString(obj, ENC_OPT, ERRORS_ENCODE_ARG); if (bytes == NULL) return -1; - if(PyBytes_AsStringAndSize(bytes, (char **) &str, NULL) == -1) + if (PyBytes_AsStringAndSize(bytes, (char **) &str, NULL) == -1) return -1; if (str == NULL) return -1; diff --git a/src/if_python.c b/src/if_python.c index 6338a5b8d..7f90ede15 100644 --- a/src/if_python.c +++ b/src/if_python.c @@ -69,6 +69,10 @@ # undef PY_SSIZE_T_CLEAN #endif +// these are NULL for Python 2 +#define ERRORS_DECODE_ARG NULL +#define ERRORS_ENCODE_ARG ERRORS_DECODE_ARG + #undef main // Defined in python.h - aargh #undef HAVE_FCNTL_H // Clash with os_win32.h diff --git a/src/if_python3.c b/src/if_python3.c index a51be2949..ea4fd7dd8 100644 --- a/src/if_python3.c +++ b/src/if_python3.c @@ -81,12 +81,15 @@ // Python 3 does not support CObjects, always use Capsules #define PY_USE_CAPSULE +#define ERRORS_DECODE_ARG CODEC_ERROR_HANDLER +#define ERRORS_ENCODE_ARG ERRORS_DECODE_ARG + #define PyInt Py_ssize_t #ifndef PyString_Check # define PyString_Check(obj) PyUnicode_Check(obj) #endif #define PyString_FromString(repr) \ - PyUnicode_Decode(repr, STRLEN(repr), ENC_OPT, NULL) + PyUnicode_Decode(repr, STRLEN(repr), ENC_OPT, ERRORS_DECODE_ARG) #define PyString_FromFormat PyUnicode_FromFormat #ifndef PyInt_Check # define PyInt_Check(obj) PyLong_Check(obj) @@ -1088,8 +1091,8 @@ DoPyCommand(const char *cmd, rangeinitializer init_range, runner run, void *arg) // PyRun_SimpleString expects a UTF-8 string. Wrong encoding may cause // SyntaxError (unicode error). cmdstr = PyUnicode_Decode(cmd, strlen(cmd), - (char *)ENC_OPT, CODEC_ERROR_HANDLER); - cmdbytes = PyUnicode_AsEncodedString(cmdstr, "utf-8", CODEC_ERROR_HANDLER); + (char *)ENC_OPT, ERRORS_DECODE_ARG); + cmdbytes = PyUnicode_AsEncodedString(cmdstr, "utf-8", ERRORS_ENCODE_ARG); Py_XDECREF(cmdstr); run(PyBytes_AsString(cmdbytes), arg, &pygilstate); @@ -1745,7 +1748,7 @@ LineToString(const char *str) } *p = '\0'; - result = PyUnicode_Decode(tmp, len, (char *)ENC_OPT, CODEC_ERROR_HANDLER); + result = PyUnicode_Decode(tmp, len, (char *)ENC_OPT, ERRORS_DECODE_ARG); vim_free(tmp); return result; diff --git a/src/testdir/test_python2.vim b/src/testdir/test_python2.vim index afefd10fe..cf35a50b3 100644 --- a/src/testdir/test_python2.vim +++ b/src/testdir/test_python2.vim @@ -3775,4 +3775,11 @@ func Test_python_keyboard_interrupt() close! endfunc +func Test_python_non_utf8_string() + smap @ + python vim.command('redir => _tmp_smaps | smap | redir END') + python vim.eval('_tmp_smaps').splitlines() + sunmap @ +endfunc + " vim: shiftwidth=2 sts=2 expandtab diff --git a/src/testdir/test_python3.vim b/src/testdir/test_python3.vim index 78fe5d4c4..1a86a7e97 100644 --- a/src/testdir/test_python3.vim +++ b/src/testdir/test_python3.vim @@ -4008,4 +4008,11 @@ func Test_python3_iter_ref() call assert_equal(1, g:options_iter_ref_count_increase) endfunc +func Test_python3_non_utf8_string() + smap @ + py3 vim.command('redir => _tmp_smaps | smap | redir END') + py3 vim.eval('_tmp_smaps').splitlines() + sunmap @ +endfunc + " vim: shiftwidth=2 sts=2 expandtab diff --git a/src/version.c b/src/version.c index 05f08e5c7..aa6967d85 100644 --- a/src/version.c +++ b/src/version.c @@ -750,6 +750,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 2178, /**/ 2177, /**/ -- 2.40.0