From 32a26443052a734f7c24434497a0c9a2210607eb Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Mon, 16 Mar 2020 13:09:16 +0100 Subject: [PATCH] Fix #79200: Some iconv functions cut Windows-1258 To cater to potentially state-dependent encodings, we have to reset the conversion descriptor into its initial shift state to properly finish the conversion. Furthermore, state-dependent encodings may not show progress when comparing `in_left` before and after the conversion; we rather have to see whether `out_left` has decreased. Also we have to cater to the fact that the final potentially state resetting call does not signal failure, but we still have to break respective loops afterwards. --- NEWS | 3 ++ ext/iconv/iconv.c | 85 +++++++++++++++++++---------------- ext/iconv/tests/bug79200.phpt | 18 ++++++++ 3 files changed, 68 insertions(+), 38 deletions(-) create mode 100644 ext/iconv/tests/bug79200.phpt diff --git a/NEWS b/NEWS index 10b5b91fe5..51bd45a428 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,9 @@ PHP NEWS . Fixed bug #79396 (DateTime hour incorrect during DST jump forward). (Nate Brunette) +- Iconv: + . Fixed bug #79200 (Some iconv functions cut Windows-1258). (cmb) + - SimpleXML: . Fixed bug #61597 (SXE properties may lack attributes and content). (cmb) diff --git a/ext/iconv/iconv.c b/ext/iconv/iconv.c index a152187e6c..02580fff8a 100644 --- a/ext/iconv/iconv.c +++ b/ext/iconv/iconv.c @@ -739,6 +739,7 @@ static php_iconv_err_t _php_iconv_strlen(size_t *pretval, const char *str, size_ size_t out_left; size_t cnt; + int more; *pretval = (size_t)-1; @@ -758,25 +759,23 @@ static php_iconv_err_t _php_iconv_strlen(size_t *pretval, const char *str, size_ errno = 0; out_left = 0; + more = nbytes > 0; - for (in_p = str, in_left = nbytes, cnt = 0; in_left > 0; cnt+=2) { - size_t prev_in_left; + for (in_p = str, in_left = nbytes, cnt = 0; more;) { out_p = buf; out_left = sizeof(buf); - prev_in_left = in_left; + more = in_left > 0; - if (iconv(cd, (char **)&in_p, &in_left, (char **) &out_p, &out_left) == (size_t)-1) { - if (prev_in_left == in_left) { - break; - } + iconv(cd, more ? (char **)&in_p : NULL, more ? &in_left : NULL, (char **) &out_p, &out_left); + if (out_left == sizeof(buf)) { + break; + } else { + ZEND_ASSERT((sizeof(buf) - out_left) % GENERIC_SUPERSET_NBYTES == 0); + cnt += (sizeof(buf) - out_left) / GENERIC_SUPERSET_NBYTES; } } - if (out_left > 0) { - cnt -= out_left / GENERIC_SUPERSET_NBYTES; - } - #if ICONV_SUPPORTS_ERRNO switch (errno) { case EINVAL: @@ -825,6 +824,7 @@ static php_iconv_err_t _php_iconv_substr(smart_str *pretval, size_t cnt; size_t total_len; + int more; err = _php_iconv_strlen(&total_len, str, nbytes, enc); if (err != PHP_ICONV_ERR_SUCCESS) { @@ -879,18 +879,17 @@ static php_iconv_err_t _php_iconv_substr(smart_str *pretval, cd2 = (iconv_t)NULL; errno = 0; + more = nbytes > 0 && len > 0; - for (in_p = str, in_left = nbytes, cnt = 0; in_left > 0 && len > 0; ++cnt) { - size_t prev_in_left; + for (in_p = str, in_left = nbytes, cnt = 0; more; ++cnt) { out_p = buf; out_left = sizeof(buf); - prev_in_left = in_left; + more = in_left > 0 && len > 0; - if (iconv(cd1, (char **)&in_p, &in_left, (char **) &out_p, &out_left) == (size_t)-1) { - if (prev_in_left == in_left) { - break; - } + iconv(cd1, more ? (char **)&in_p : NULL, more ? &in_left : NULL, (char **) &out_p, &out_left); + if (out_left == sizeof(buf)) { + break; } if ((zend_long)cnt >= offset) { @@ -978,6 +977,8 @@ static php_iconv_err_t _php_iconv_strpos(size_t *pretval, size_t ndl_buf_left; size_t match_ofs; + int more; + size_t iconv_ret; *pretval = (size_t)-1; @@ -1010,37 +1011,38 @@ static php_iconv_err_t _php_iconv_strpos(size_t *pretval, ndl_buf_p = ZSTR_VAL(ndl_buf); ndl_buf_left = ZSTR_LEN(ndl_buf); match_ofs = (size_t)-1; + more = haystk_nbytes > 0; - for (in_p = haystk, in_left = haystk_nbytes, cnt = 0; in_left > 0; ++cnt) { - size_t prev_in_left; + for (in_p = haystk, in_left = haystk_nbytes, cnt = 0; more; ++cnt) { out_p = buf; out_left = sizeof(buf); - prev_in_left = in_left; + more = in_left > 0; - if (iconv(cd, (char **)&in_p, &in_left, (char **) &out_p, &out_left) == (size_t)-1) { - if (prev_in_left == in_left) { + iconv_ret = iconv(cd, more ? (char **)&in_p : NULL, more ? &in_left : NULL, (char **) &out_p, &out_left); + if (out_left == sizeof(buf)) { + break; + } #if ICONV_SUPPORTS_ERRNO - switch (errno) { - case EINVAL: - err = PHP_ICONV_ERR_ILLEGAL_CHAR; - break; + if (iconv_ret == (size_t)-1) { + switch (errno) { + case EINVAL: + err = PHP_ICONV_ERR_ILLEGAL_CHAR; + break; - case EILSEQ: - err = PHP_ICONV_ERR_ILLEGAL_SEQ; - break; + case EILSEQ: + err = PHP_ICONV_ERR_ILLEGAL_SEQ; + break; - case E2BIG: - break; + case E2BIG: + break; - default: - err = PHP_ICONV_ERR_UNKNOWN; - break; - } -#endif - break; + default: + err = PHP_ICONV_ERR_UNKNOWN; + break; } } +#endif if (offset >= 0) { if (cnt >= (size_t)offset) { if (_php_iconv_memequal(buf, ndl_buf_p, sizeof(buf))) { @@ -2012,6 +2014,13 @@ static php_iconv_err_t _php_iconv_mime_decode(smart_str *pretval, const char *st *next_pos = p1; } + if (cd != (iconv_t)(-1)) { + _php_iconv_appendl(pretval, NULL, 0, cd); + } + if (cd_pl != (iconv_t)(-1)) { + _php_iconv_appendl(pretval, NULL, 0, cd_pl); + } + smart_str_0(pretval); out: if (cd != (iconv_t)(-1)) { diff --git a/ext/iconv/tests/bug79200.phpt b/ext/iconv/tests/bug79200.phpt new file mode 100644 index 0000000000..e8d86086a3 --- /dev/null +++ b/ext/iconv/tests/bug79200.phpt @@ -0,0 +1,18 @@ +--TEST-- +Bug #79200 (Some iconv functions cut Windows-1258) +--SKIPIF-- + +--FILE-- + +--EXPECT-- +string(9) "test test" +int(9) +int(0) +string(9) "test test" -- 2.40.0