]> granicus.if.org Git - php/commitdiff
Optimize php_unicode_convert_case (cuts mbstring case conversion time ~15%)
authorAlex Dowad <alexinbeijing@gmail.com>
Sat, 27 Jun 2020 22:25:13 +0000 (00:25 +0200)
committerAlex Dowad <alexinbeijing@gmail.com>
Mon, 31 Aug 2020 21:17:25 +0000 (23:17 +0200)
This function uses various subfunctions to convert case of Unicode wchars.
Previously, these subfunctions would store the case-converted characters in
a buffer, and the parent function would then pass them (byte by byte) to
the next filter in the filter chain.

Rather than passing around that buffer, it's better for the subfunctions to
directly pass the case-converted bytes to the next filter in the filter chain.
This speeds things up nicely.

ext/mbstring/php_unicode.c

index dc0c3f08319998e0205215e183d1203bcc6544cd..37a996b8beb89e7e5dc431e57d36abeadfd4ec2f 100644 (file)
@@ -249,53 +249,60 @@ static inline unsigned php_unicode_tofold_simple(unsigned code, enum mbfl_no_enc
        return code;
 }
 
-static inline unsigned php_unicode_tolower_full(
-               unsigned code, enum mbfl_no_encoding enc, unsigned *out) {
+static inline void php_unicode_tolower_full(unsigned code, enum mbfl_no_encoding enc,
+       mbfl_convert_filter* next_filter) {
        code = php_unicode_tolower_raw(code, enc);
        if (UNEXPECTED(code > 0xffffff)) {
                unsigned len = code >> 24;
                const unsigned *p = &_uccase_extra_table[code & 0xffffff];
-               memcpy(out, p + 1, len * sizeof(unsigned));
-               return len;
+               while (len--) {
+                       (next_filter->filter_function)(*++p, next_filter);
+               }
+       } else {
+               (next_filter->filter_function)(code, next_filter);
        }
-       *out = code;
-       return 1;
 }
-static inline unsigned php_unicode_toupper_full(
-               unsigned code, enum mbfl_no_encoding enc, unsigned *out) {
+
+static inline void php_unicode_toupper_full(unsigned code, enum mbfl_no_encoding enc,
+       mbfl_convert_filter* next_filter) {
        code = php_unicode_toupper_raw(code, enc);
        if (UNEXPECTED(code > 0xffffff)) {
                unsigned len = code >> 24;
                const unsigned *p = &_uccase_extra_table[code & 0xffffff];
-               memcpy(out, p + 1, len * sizeof(unsigned));
-               return len;
+               while (len--) {
+                       (next_filter->filter_function)(*++p, next_filter);
+               }
+       } else {
+               (next_filter->filter_function)(code, next_filter);
        }
-       *out = code;
-       return 1;
 }
-static inline unsigned php_unicode_totitle_full(
-               unsigned code, enum mbfl_no_encoding enc, unsigned *out) {
+
+static inline void php_unicode_totitle_full(unsigned code, enum mbfl_no_encoding enc,
+       mbfl_convert_filter* next_filter) {
        code = php_unicode_totitle_raw(code, enc);
        if (UNEXPECTED(code > 0xffffff)) {
                unsigned len = code >> 24;
                const unsigned *p = &_uccase_extra_table[code & 0xffffff];
-               memcpy(out, p + 1, len * sizeof(unsigned));
-               return len;
+               while (len--) {
+                       (next_filter->filter_function)(*++p, next_filter);
+               }
+       } else {
+               (next_filter->filter_function)(code, next_filter);
        }
-       *out = code;
-       return 1;
 }
-static inline unsigned php_unicode_tofold_full(
-               unsigned code, enum mbfl_no_encoding enc, unsigned *out) {
+
+static inline void php_unicode_tofold_full(unsigned code, enum mbfl_no_encoding enc,
+       mbfl_convert_filter* next_filter) {
        code = php_unicode_tofold_raw(code, enc);
        if (UNEXPECTED(code > 0xffffff)) {
                unsigned len = code >> 24;
                const unsigned *p = &_uccase_extra_table[code & 0xffffff];
-               memcpy(out, p + 1, len * sizeof(unsigned));
-               return len;
+               while (len--) {
+                       (next_filter->filter_function)(*++p, next_filter);
+               }
+       } else {
+               (next_filter->filter_function)(code, next_filter);
        }
-       *out = code;
-       return 1;
 }
 
 struct convert_case_data {
@@ -308,8 +315,7 @@ struct convert_case_data {
 static int convert_case_filter(int c, void *void_data)
 {
        struct convert_case_data *data = (struct convert_case_data *) void_data;
-       unsigned out[3];
-       unsigned len, i;
+       unsigned code;
 
        /* Handle invalid characters early, as we assign special meaning to
         * codepoints above 0xffffff. */
@@ -320,30 +326,30 @@ static int convert_case_filter(int c, void *void_data)
 
        switch (data->case_mode) {
                case PHP_UNICODE_CASE_UPPER_SIMPLE:
-                       out[0] = php_unicode_toupper_simple(c, data->no_encoding);
-                       len = 1;
+                       code = php_unicode_toupper_simple(c, data->no_encoding);
+                       (data->next_filter->filter_function)(code, data->next_filter);
                        break;
 
                case PHP_UNICODE_CASE_UPPER:
-                       len = php_unicode_toupper_full(c, data->no_encoding, out);
+                       php_unicode_toupper_full(c, data->no_encoding, data->next_filter);
                        break;
 
                case PHP_UNICODE_CASE_LOWER_SIMPLE:
-                       out[0] = php_unicode_tolower_simple(c, data->no_encoding);
-                       len = 1;
+                       code = php_unicode_tolower_simple(c, data->no_encoding);
+                       (data->next_filter->filter_function)(code, data->next_filter);
                        break;
 
                case PHP_UNICODE_CASE_LOWER:
-                       len = php_unicode_tolower_full(c, data->no_encoding, out);
+                       php_unicode_tolower_full(c, data->no_encoding, data->next_filter);
                        break;
 
                case PHP_UNICODE_CASE_FOLD:
-                       len = php_unicode_tofold_full(c, data->no_encoding, out);
+                       php_unicode_tofold_full(c, data->no_encoding, data->next_filter);
                        break;
 
                case PHP_UNICODE_CASE_FOLD_SIMPLE:
-                       out[0] = php_unicode_tofold_simple(c, data->no_encoding);
-                       len = 1;
+                       code = php_unicode_tofold_simple(c, data->no_encoding);
+                       (data->next_filter->filter_function)(code, data->next_filter);
                        break;
 
                case PHP_UNICODE_CASE_TITLE_SIMPLE:
@@ -351,17 +357,17 @@ static int convert_case_filter(int c, void *void_data)
                {
                        if (data->title_mode) {
                                if (data->case_mode == PHP_UNICODE_CASE_TITLE_SIMPLE) {
-                                       out[0] = php_unicode_tolower_simple(c, data->no_encoding);
-                                       len = 1;
+                                       code = php_unicode_tolower_simple(c, data->no_encoding);
+                                       (data->next_filter->filter_function)(code, data->next_filter);
                                } else {
-                                       len = php_unicode_tolower_full(c, data->no_encoding, out);
+                                       php_unicode_tolower_full(c, data->no_encoding, data->next_filter);
                                }
                        } else {
                                if (data->case_mode == PHP_UNICODE_CASE_TITLE_SIMPLE) {
-                                       out[0] = php_unicode_totitle_simple(c, data->no_encoding);
-                                       len = 1;
+                                       code = php_unicode_totitle_simple(c, data->no_encoding);
+                                       (data->next_filter->filter_function)(code, data->next_filter);
                                } else {
-                                       len = php_unicode_totitle_full(c, data->no_encoding, out);
+                                       php_unicode_totitle_full(c, data->no_encoding, data->next_filter);
                                }
                        }
                        if (!php_unicode_is_case_ignorable(c)) {
@@ -372,9 +378,6 @@ static int convert_case_filter(int c, void *void_data)
                EMPTY_SWITCH_DEFAULT_CASE()
        }
 
-       for (i = 0; i < len; i++) {
-               (*data->next_filter->filter_function)(out[i], data->next_filter);
-       }
        return 0;
 }