]> granicus.if.org Git - php/commitdiff
Fix #72330: CSV fields incorrectly split if escape char followed by UTF chars
authorChristoph M. Becker <cmb@php.net>
Thu, 21 Jul 2016 16:36:12 +0000 (18:36 +0200)
committerChristoph M. Becker <cmb@php.net>
Thu, 21 Jul 2016 16:37:24 +0000 (18:37 +0200)
We must not forget to properly reset the state for multibyte characters
following an escape character.

NEWS
ext/standard/file.c
ext/standard/tests/file/bug72330.phpt [new file with mode: 0644]

diff --git a/NEWS b/NEWS
index fb08c0213e29ec9d76511df2bd796396912fe87d..c2cae0af647921649b8683a6c8fe481867880447 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -44,6 +44,10 @@ PHP                                                                        NEWS
   . Fixed bug #72222 (ReflectionClass::export doesn't handle array constants).
     (Nikita Nefedov)
 
+- Standard:
+  . Fixed bug #72330 (CSV fields incorrectly split if escape char followed by
+    UTF chars). (cmb)
+
 - SPL:
   . Fixed bug #72122 (IteratorIterator breaks '@' error suppression). (kinglozzer)
 
index f8c4e0450bc39aacd445c929905ce87d4c74846b..d8471fff1c64b3b8c1905969a42041cf456ba98e 100644 (file)
@@ -2219,6 +2219,7 @@ PHPAPI void php_fgetcsv(php_stream *stream, char delimiter, char enclosure, char
                                                                memcpy(tptr, hunk_begin, bptr - hunk_begin);
                                                                tptr += (bptr - hunk_begin);
                                                                hunk_begin = bptr;
+                                                               state = 0;
                                                                break;
                                                        default:
                                                                bptr += inc_len;
diff --git a/ext/standard/tests/file/bug72330.phpt b/ext/standard/tests/file/bug72330.phpt
new file mode 100644 (file)
index 0000000..843032a
--- /dev/null
@@ -0,0 +1,26 @@
+--TEST--
+Bug #72330 (CSV fields incorrectly split if escape char followed by UTF chars)
+--SKIPIF--
+<?php
+if (setlocale(LC_ALL, "en_US.utf8", "en_AU.utf8", "ko_KR.utf8", "zh_CN.utf8", "de_DE.utf8", "es_EC.utf8", "fr_FR.utf8", "ja_JP.utf8", "el_GR.utf8", "nl_NL.utf8") === false) {
+    die('skip available locales not usable');
+}
+?>
+--FILE--
+<?php
+setlocale(LC_ALL, "en_US.utf8", "en_AU.utf8", "ko_KR.utf8", "zh_CN.utf8", "de_DE.utf8", "es_EC.utf8", "fr_FR.utf8", "ja_JP.utf8", "el_GR.utf8", "nl_NL.utf8");
+
+$utf_1 = chr(0xD1) . chr(0x81); // U+0440;
+$utf_2   = chr(0xD8) . chr(0x80); // U+0600
+
+$string = '"first #' . $utf_1 . $utf_2 . '";"second"';
+$fields = str_getcsv($string, ';', '"', "#");
+var_dump($fields);
+?>
+--EXPECT--
+array(2) {
+  [0]=>
+  string(11) "first #с؀"
+  [1]=>
+  string(6) "second"
+}