From f2c2a4be9e466f14677089efe33e20ca0b146809 Mon Sep 17 00:00:00 2001
From: "Christoph M. Becker" <cmb@php.net>
Date: Thu, 21 Jul 2016 18:36:12 +0200
Subject: [PATCH] Fix #72330: CSV fields incorrectly split if escape char
 followed by UTF chars

We must not forget to properly reset the state for multibyte characters
following an escape character.
---
 NEWS                                  |  4 ++++
 ext/standard/file.c                   |  1 +
 ext/standard/tests/file/bug72330.phpt | 26 ++++++++++++++++++++++++++
 3 files changed, 31 insertions(+)
 create mode 100644 ext/standard/tests/file/bug72330.phpt

diff --git a/NEWS b/NEWS
index fb08c0213e..c2cae0af64 100644
--- a/NEWS
+++ b/NEWS
@@ -44,6 +44,10 @@ PHP                                                                        NEWS
   . Fixed bug #72222 (ReflectionClass::export doesn't handle array constants).
     (Nikita Nefedov)
 
+- Standard:
+  . Fixed bug #72330 (CSV fields incorrectly split if escape char followed by
+    UTF chars). (cmb)
+
 - SPL:
   . Fixed bug #72122 (IteratorIterator breaks '@' error suppression). (kinglozzer)
 
diff --git a/ext/standard/file.c b/ext/standard/file.c
index f8c4e0450b..d8471fff1c 100644
--- a/ext/standard/file.c
+++ b/ext/standard/file.c
@@ -2219,6 +2219,7 @@ PHPAPI void php_fgetcsv(php_stream *stream, char delimiter, char enclosure, char
 								memcpy(tptr, hunk_begin, bptr - hunk_begin);
 								tptr += (bptr - hunk_begin);
 								hunk_begin = bptr;
+								state = 0;
 								break;
 							default:
 								bptr += inc_len;
diff --git a/ext/standard/tests/file/bug72330.phpt b/ext/standard/tests/file/bug72330.phpt
new file mode 100644
index 0000000000..843032ae2d
--- /dev/null
+++ b/ext/standard/tests/file/bug72330.phpt
@@ -0,0 +1,26 @@
+--TEST--
+Bug #72330 (CSV fields incorrectly split if escape char followed by UTF chars)
+--SKIPIF--
+<?php
+if (setlocale(LC_ALL, "en_US.utf8", "en_AU.utf8", "ko_KR.utf8", "zh_CN.utf8", "de_DE.utf8", "es_EC.utf8", "fr_FR.utf8", "ja_JP.utf8", "el_GR.utf8", "nl_NL.utf8") === false) {
+    die('skip available locales not usable');
+}
+?>
+--FILE--
+<?php
+setlocale(LC_ALL, "en_US.utf8", "en_AU.utf8", "ko_KR.utf8", "zh_CN.utf8", "de_DE.utf8", "es_EC.utf8", "fr_FR.utf8", "ja_JP.utf8", "el_GR.utf8", "nl_NL.utf8");
+
+$utf_1 = chr(0xD1) . chr(0x81); // U+0440;
+$utf_2   = chr(0xD8) . chr(0x80); // U+0600
+
+$string = '"first #' . $utf_1 . $utf_2 . '";"second"';
+$fields = str_getcsv($string, ';', '"', "#");
+var_dump($fields);
+?>
+--EXPECT--
+array(2) {
+  [0]=>
+  string(11) "first #с؀"
+  [1]=>
+  string(6) "second"
+}
-- 
2.40.0