]> granicus.if.org Git - php/commitdiff
Move utf8_encode and utf8_decode to ext/standard
authorAndrea Faulds <ajf@ajf.me>
Thu, 13 Oct 2016 22:33:33 +0000 (23:33 +0100)
committerAndrea Faulds <ajf@ajf.me>
Mon, 17 Oct 2016 14:39:02 +0000 (15:39 +0100)
ext/standard/basic_functions.c
ext/standard/php_string.h
ext/standard/string.c
ext/standard/tests/strings/bug43957.phpt [moved from ext/xml/tests/bug43957.phpt with 58% similarity]
ext/standard/tests/strings/bug49687.phpt [moved from ext/xml/tests/bug49687.phpt with 70% similarity]
ext/standard/tests/strings/utf8.phpt [moved from ext/xml/tests/xml006.phpt with 85% similarity]
ext/standard/tests/strings/utf8_decode_error.phpt [moved from ext/xml/tests/utf8_decode_error.phpt with 88% similarity]
ext/standard/tests/strings/utf8_decode_variation1.phpt [moved from ext/xml/tests/utf8_decode_variation1.phpt with 95% similarity]
ext/standard/tests/strings/utf8_encode_error.phpt [moved from ext/xml/tests/utf8_encode_error.phpt with 88% similarity]
ext/standard/tests/strings/utf8_encode_variation1.phpt [moved from ext/xml/tests/utf8_encode_variation1.phpt with 95% similarity]
ext/xml/xml.c

index 13e8a4e6eb80e65be06fe3ba75eb531f5ac8ce41..d528e5190834c78ee01246eda123b9be1759e875 100644 (file)
@@ -2465,6 +2465,14 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_substr_compare, 0, 0, 3)
        ZEND_ARG_INFO(0, length)
        ZEND_ARG_INFO(0, case_sensitivity)
 ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_encode, 0, 0, 1)
+       ZEND_ARG_INFO(0, data)
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_decode, 0, 0, 1)
+       ZEND_ARG_INFO(0, data)
+ZEND_END_ARG_INFO()
 /* }}} */
 /* {{{ syslog.c */
 #ifdef HAVE_SYSLOG_H
@@ -2764,6 +2772,8 @@ const zend_function_entry basic_functions[] = { /* {{{ */
        PHP_FE(str_split,                                                                                                               arginfo_str_split)
        PHP_FE(strpbrk,                                                                                                                 arginfo_strpbrk)
        PHP_FE(substr_compare,                                                                                                  arginfo_substr_compare)
+       PHP_FE(utf8_encode,                                                                                                     arginfo_utf8_encode)
+       PHP_FE(utf8_decode,                                                                                                     arginfo_utf8_decode)
 
 #ifdef HAVE_STRCOLL
        PHP_FE(strcoll,                                                                                                                 arginfo_strcoll)
index 14b66e7e131499c7facfad53688c301c422fcb69..6fc75871216f1836dbec8208ab397f939c83ae4c 100644 (file)
@@ -93,6 +93,8 @@ PHP_FUNCTION(str_word_count);
 PHP_FUNCTION(str_split);
 PHP_FUNCTION(strpbrk);
 PHP_FUNCTION(substr_compare);
+PHP_FUNCTION(utf8_encode);
+PHP_FUNCTION(utf8_decode);
 #ifdef HAVE_STRCOLL
 PHP_FUNCTION(strcoll);
 #endif
index fa59ddd06f7af2309496c2393c0df6e04cacfd3e..4389e1070266fe88dec58d5cd5ff381dae465907 100644 (file)
@@ -64,6 +64,8 @@
 
 /* For str_getcsv() support */
 #include "ext/standard/file.h"
+/* For php_next_utf8_char() */
+#include "ext/standard/html.h"
 
 #define STR_PAD_LEFT                   0
 #define STR_PAD_RIGHT                  1
@@ -5653,6 +5655,98 @@ PHP_FUNCTION(substr_compare)
 }
 /* }}} */
 
+/* {{{ */
+static zend_string *php_utf8_encode(const char *s, size_t len)
+{
+       size_t pos = len;
+       zend_string *str;
+       unsigned char c;
+
+       str = zend_string_safe_alloc(len, 2, 0, 0);
+       ZSTR_LEN(str) = 0;
+       while (pos > 0) {
+               /* The lower 256 codepoints of Unicode are identical to Latin-1,
+                * so we don't need to do any mapping here. */
+               c = (unsigned char)(*s);
+               if (c < 0x80) {
+                       ZSTR_VAL(str)[ZSTR_LEN(str)++] = (char) c;
+               /* We only account for the single-byte and two-byte cases because
+                * we're only dealing with the first 256 Unicode codepoints. */
+               } else {
+                       ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0xc0 | (c >> 6));
+                       ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0x80 | (c & 0x3f));
+               }
+               pos--;
+               s++;
+       }
+       ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
+       str = zend_string_truncate(str, ZSTR_LEN(str), 0);
+       return str;
+}
+/* }}} */
+
+/* {{{ */
+static zend_string *php_utf8_decode(const char *s, size_t len)
+{
+       size_t pos = 0;
+       unsigned int c;
+       zend_string *str;
+
+       str = zend_string_alloc(len, 0);
+       ZSTR_LEN(str) = 0;
+       while (pos < len) {
+               int status = FAILURE;
+               c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
+
+               /* The lower 256 codepoints of Unicode are identical to Latin-1,
+                * so we don't need to do any mapping here beyond replacing non-Latin-1
+                * characters. */
+               if (status == FAILURE || c > 0xFFU) {
+                       c = '?';
+               }
+
+               ZSTR_VAL(str)[ZSTR_LEN(str)++] = c;
+       }
+       ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
+       if (ZSTR_LEN(str) < len) {
+               str = zend_string_truncate(str, ZSTR_LEN(str), 0);
+       }
+
+       return str;
+}
+/* }}} */
+
+
+/* {{{ proto string utf8_encode(string data) 
+   Encodes an ISO-8859-1 string to UTF-8 */
+PHP_FUNCTION(utf8_encode)
+{
+       char *arg;
+       size_t arg_len;
+
+       if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
+               return;
+       }
+
+       RETURN_STR(php_utf8_encode(arg, arg_len));
+}
+/* }}} */
+
+/* {{{ proto string utf8_decode(string data) 
+   Converts a UTF-8 encoded string to ISO-8859-1 */
+PHP_FUNCTION(utf8_decode)
+{
+       char *arg;
+       size_t arg_len;
+
+       if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
+               return;
+       }
+
+       RETURN_STR(php_utf8_decode(arg, arg_len));
+}
+/* }}} */
+
 /*
  * Local variables:
  * tab-width: 4
similarity index 58%
rename from ext/xml/tests/bug43957.phpt
rename to ext/standard/tests/strings/bug43957.phpt
index f11d15627be823f425dfcf1071af0def89b2288c..0380787b73c1da6565d7a25634f9f63295acfdb5 100644 (file)
@@ -1,10 +1,5 @@
 --TEST--
 Bug #43957 (utf8_decode() bogus conversion on multibyte indicator near end of string)
---SKIPIF--
-<?php
-require_once("skipif.inc");
-if (!extension_loaded('xml')) die ("skip xml extension not available");
-?>
 --FILE--
 <?php
   echo utf8_decode('abc'.chr(0xe0));
similarity index 70%
rename from ext/xml/tests/bug49687.phpt
rename to ext/standard/tests/strings/bug49687.phpt
index 3ff19cee7e0a2f878bb303111aa3ed83095008a4..99e8dc3ec6592f47e330a1dc2fe2160bbb8fdfbe 100644 (file)
@@ -1,10 +1,5 @@
 --TEST--\r
 Bug #49687 Several utf8_decode deficiencies and vulnerabilities\r
---SKIPIF--\r
-<?php\r
-require_once("skipif.inc");\r
-if (!extension_loaded('xml')) die ("skip xml extension not available");\r
-?>\r
 --FILE--\r
 <?php\r
 \r
similarity index 85%
rename from ext/xml/tests/xml006.phpt
rename to ext/standard/tests/strings/utf8.phpt
index c714e85913979a2623b1440671aba22621cfee0c..aea04fdecd855fb1cb180ca21a7d74a90039b6e2 100644 (file)
@@ -1,7 +1,5 @@
 --TEST--
 UTF-8<->ISO Latin 1 encoding/decoding test
---SKIPIF--
-<?php include("skipif.inc"); ?>
 --FILE--
 <?php
 printf("%s -> %s\n", urlencode("æ"), urlencode(utf8_encode("æ")));
similarity index 88%
rename from ext/xml/tests/utf8_decode_error.phpt
rename to ext/standard/tests/strings/utf8_decode_error.phpt
index 8735fd82f6cec9184c00a0db138c352df9d5ded7..911cc15cfcb843b9199a58d7224c7b4f6e17fe79 100644 (file)
@@ -1,16 +1,10 @@
 --TEST--
 Test utf8_decode() function : error conditions 
---SKIPIF--
-<?php 
-if (!extension_loaded("xml")) {
-       print "skip - XML extension not loaded"; 
-}       
-?>
 --FILE--
 <?php
 /* Prototype  : proto string utf8_decode(string data)
  * Description: Converts a UTF-8 encoded string to ISO-8859-1 
- * Source code: ext/xml/xml.c
+ * Source code: ext/standard/string.c
  * Alias to functions: 
  */
 
similarity index 95%
rename from ext/xml/tests/utf8_decode_variation1.phpt
rename to ext/standard/tests/strings/utf8_decode_variation1.phpt
index 4b9679a89576f15f232d2f732f70f2315f49c1b5..f564b87da02a90c98967b8e21c602ecdb58be627 100644 (file)
@@ -1,16 +1,10 @@
 --TEST--
 Test utf8_decode() function : usage variations  - different types for data
---SKIPIF--
-<?php 
-if (!extension_loaded("xml")) {
-       print "skip - XML extension not loaded"; 
-}       
-?>
 --FILE--
 <?php
 /* Prototype  : proto string utf8_decode(string data)
  * Description: Converts a UTF-8 encoded string to ISO-8859-1 
- * Source code: ext/xml/xml.c
+ * Source code: ext/standard/string.c
  * Alias to functions: 
  */
 
similarity index 88%
rename from ext/xml/tests/utf8_encode_error.phpt
rename to ext/standard/tests/strings/utf8_encode_error.phpt
index a82f98ff3b2d23c4a26f6cf5adb83b23c2a8a512..e12f0978b6bc515a5beb82d53beb91dec27f1795 100644 (file)
@@ -1,16 +1,10 @@
 --TEST--
 Test utf8_encode() function : error conditions
---SKIPIF--
-<?php 
-if (!extension_loaded("xml")) {
-       print "skip - XML extension not loaded"; 
-}       
-?>
 --FILE--
 <?php
 /* Prototype  : proto string utf8_encode(string data)
  * Description: Encodes an ISO-8859-1 string to UTF-8 
- * Source code: ext/xml/xml.c
+ * Source code: ext/standard/string.c
  * Alias to functions: 
  */
 
similarity index 95%
rename from ext/xml/tests/utf8_encode_variation1.phpt
rename to ext/standard/tests/strings/utf8_encode_variation1.phpt
index 04b956c42216090d99bee5cf47d8081f29445cd6..fa4b79976e5d502138b64ecd7584117f88542515 100644 (file)
@@ -1,16 +1,10 @@
 --TEST--
 Test utf8_encode() function : usage variations  - <type here specifics of this variation>
---SKIPIF--
-<?php 
-if (!extension_loaded("xml")) {
-       print "skip - XML extension not loaded"; 
-}       
-?>
 --FILE--
 <?php
 /* Prototype  : proto string utf8_encode(string data)
  * Description: Encodes an ISO-8859-1 string to UTF-8 
- * Source code: ext/xml/xml.c
+ * Source code: ext/standard/string.c
  * Alias to functions: 
  */
 
index f0da47dc5b4309ba5cdf6535b2f39e92774ffde0..f8d72523a00ee5ae301bff89d30974dd46976775 100644 (file)
@@ -212,14 +212,6 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_parser_get_option, 0, 0, 2)
        ZEND_ARG_INFO(0, option)
 ZEND_END_ARG_INFO()
 
-ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_encode, 0, 0, 1)
-       ZEND_ARG_INFO(0, data)
-ZEND_END_ARG_INFO()
-
-ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_decode, 0, 0, 1)
-       ZEND_ARG_INFO(0, data)
-ZEND_END_ARG_INFO()
-
 const zend_function_entry xml_functions[] = {
        PHP_FE(xml_parser_create,                                       arginfo_xml_parser_create)
        PHP_FE(xml_parser_create_ns,                            arginfo_xml_parser_create_ns)
@@ -243,8 +235,6 @@ const zend_function_entry xml_functions[] = {
        PHP_FE(xml_parser_free,                                         arginfo_xml_parser_free)
        PHP_FE(xml_parser_set_option,                           arginfo_xml_parser_set_option)
        PHP_FE(xml_parser_get_option,                           arginfo_xml_parser_get_option)
-       PHP_FE(utf8_encode,                                             arginfo_utf8_encode)
-       PHP_FE(utf8_decode,                                             arginfo_utf8_decode)
        PHP_FE_END
 };
 
@@ -1667,46 +1657,6 @@ PHP_FUNCTION(xml_parser_get_option)
 }
 /* }}} */
 
-/* {{{ proto string utf8_encode(string data) 
-   Encodes an ISO-8859-1 string to UTF-8 */
-PHP_FUNCTION(utf8_encode)
-{
-       char *arg;
-       size_t arg_len;
-       zend_string *encoded;
-
-       if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
-               return;
-       }
-
-       encoded = xml_utf8_encode(arg, arg_len, (XML_Char*)"ISO-8859-1");
-       if (encoded == NULL) {
-               RETURN_FALSE;
-       }
-       RETURN_STR(encoded);
-}
-/* }}} */
-
-/* {{{ proto string utf8_decode(string data) 
-   Converts a UTF-8 encoded string to ISO-8859-1 */
-PHP_FUNCTION(utf8_decode)
-{
-       char *arg;
-       size_t arg_len;
-       zend_string *decoded;
-
-       if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
-               return;
-       }
-
-       decoded = xml_utf8_decode((XML_Char*)arg, arg_len, (XML_Char*)"ISO-8859-1");
-       if (decoded == NULL) {
-               RETURN_FALSE;
-       }
-       RETURN_STR(decoded);
-}
-/* }}} */
-
 #endif
 
 /*