]> granicus.if.org Git - php/commitdiff
Fix #65732: grapheme_*() is not Unicode compliant on CR LF sequence
authorChristoph M. Becker <cmbecker69@gmx.de>
Fri, 19 Aug 2016 17:05:33 +0000 (19:05 +0200)
committerChristoph M. Becker <cmbecker69@gmx.de>
Sat, 20 Aug 2016 01:01:35 +0000 (03:01 +0200)
According to the Unicode specification (at least as of 5.1), CRLF sequences
are considered to be a single grapheme. We cater to that special case by
letting grapheme_ascii_check() fail. While it would be trivial to fix
grapheme_ascii_check() wrt. grapheme_strlen(), grapheme_substr() and
grapheme_strrpos() would be much harder to handle, so we accept the slight
performance penalty if CRLF is involved.

NEWS
ext/intl/grapheme/grapheme_util.c
ext/intl/tests/bug65732.phpt [new file with mode: 0644]

diff --git a/NEWS b/NEWS
index 013d85f84b13c5b8e64e0ccd636b3f447bffc010..e1d7f0441628359c9760297110651fd6614ae016 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,10 @@ PHP                                                                        NEWS
 - IMAP:
   . Fixed bug #72852 (imap_mail null dereference). (Anatol)
 
+- Intl:
+  . Fixed bug #65732 (grapheme_*() is not Unicode compliant on CR LF
+    sequence). (cmb)
+
 - JSON:
   . Fixed bug #72787 (json_decode reads out of bounds). (Jakub Zelenka)
 
index c752b02372e2cb3dcba2a387b4bbb604afb0efff..350ba66255894b9cd3367a37f673c5c970215920 100644 (file)
@@ -221,7 +221,7 @@ int grapheme_ascii_check(const unsigned char *day, int32_t len)
 {
        int ret_len = len;
        while ( len-- ) {
-       if ( *day++ > 0x7f )
+       if ( *day++ > 0x7f || (*day == '\n' && *(day - 1) == '\r') )
                return -1;
        }
 
diff --git a/ext/intl/tests/bug65732.phpt b/ext/intl/tests/bug65732.phpt
new file mode 100644 (file)
index 0000000..b49f884
--- /dev/null
@@ -0,0 +1,19 @@
+--TEST--
+Bug #65732 (grapheme_*() is not Unicode compliant on CR LF sequence)
+--SKIPIF--
+<?php
+if (!extension_loaded('intl')) die('skip intl extension not available');
+?>
+--FILE--
+<?php
+var_dump(grapheme_strlen("\r\n"));
+var_dump(grapheme_substr(implode("\r\n", ['abc', 'def', 'ghi']), 5));
+var_dump(grapheme_strrpos("a\r\nb", 'b'));
+?>
+==DONE==
+--EXPECT--
+int(1)
+string(7) "ef
+ghi"
+int(2)
+==DONE==