From 9011a70c7f83ce5fc70ac9b6aef523ddda9e84fa Mon Sep 17 00:00:00 2001 From: Xinchen Hui Date: Sun, 23 Mar 2014 23:38:32 +0800 Subject: [PATCH] Refactor mbstring (incompleted) --- ext/mbstring/mbstring.c | 30 +++++++--- ext/mbstring/php_mbregex.c | 116 ++++++++++++++++++------------------- 2 files changed, 78 insertions(+), 68 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index a7a39155e7..0b963f9f99 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -898,7 +898,7 @@ static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *a return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0); } -static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC) +static size_t php_mb_zend_encoding_converter(zend_string **to, zend_string *from, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC) { mbfl_string string, result; mbfl_buffer_converter *convd; @@ -910,8 +910,8 @@ static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_leng mbfl_string_init(&result); string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding; string.no_language = MBSTRG(language); - string.val = (unsigned char*)from; - string.len = from_length; + string.val = (unsigned char*)from->val; + string.len = from->len; /* initialize converter */ convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len); @@ -934,8 +934,7 @@ static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_leng return (size_t)-1; } - *to = result.val; - *to_length = result.len; + *to = STR_INIT(result.val, result.len, 0); mbfl_buffer_converter_delete(convd); @@ -2173,6 +2172,7 @@ PHP_FUNCTION(mb_output_handler) mbfl_buffer_converter_result(MBSTRG(outconv), &result); //???? RETVAL_STRINGL((char *)result.val, result.len); /* the string is already strdup()'ed */ + efree(result.val); /* delete the converter if it is the last feed. */ if (last_feed) { @@ -2476,6 +2476,7 @@ PHP_FUNCTION(mb_strstr) if (ret != NULL) { //??? RETVAL_STRINGL((char *)ret->val, ret->len); + efree(ret->val); } else { RETVAL_FALSE; } @@ -2485,6 +2486,7 @@ PHP_FUNCTION(mb_strstr) if (ret != NULL) { //???? RETVAL_STRINGL((char *)ret->val, ret->len); + efree(ret->val); } else { RETVAL_FALSE; } @@ -2538,6 +2540,7 @@ PHP_FUNCTION(mb_strrchr) if (ret != NULL) { //???? RETVAL_STRINGL((char *)ret->val, ret->len); + efree(ret->val); } else { RETVAL_FALSE; } @@ -2547,6 +2550,7 @@ PHP_FUNCTION(mb_strrchr) if (ret != NULL) { //???? RETVAL_STRINGL((char *)ret->val, ret->len); + efree(ret->val); } else { RETVAL_FALSE; } @@ -2602,6 +2606,7 @@ PHP_FUNCTION(mb_stristr) if (ret != NULL) { //???? RETVAL_STRINGL((char *)ret->val, ret->len); + efree(ret->val); } else { RETVAL_FALSE; } @@ -2611,6 +2616,7 @@ PHP_FUNCTION(mb_stristr) if (ret != NULL) { //???? RETVAL_STRINGL((char *)ret->val, ret->len); + efree(ret->val); } else { RETVAL_FALSE; } @@ -2657,6 +2663,7 @@ PHP_FUNCTION(mb_strrichr) if (ret != NULL) { //??? RETVAL_STRINGL((char *)ret->val, ret->len); + efree(ret->val); } else { RETVAL_FALSE; } @@ -2666,6 +2673,7 @@ PHP_FUNCTION(mb_strrichr) if (ret != NULL) { //???? RETVAL_STRINGL((char *)ret->val, ret->len); + efree(ret->val); } else { RETVAL_FALSE; } @@ -2789,7 +2797,8 @@ PHP_FUNCTION(mb_substr) } //???? - RETURN_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */ + RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */ + efree(ret->val); } /* }}} */ @@ -2857,7 +2866,8 @@ PHP_FUNCTION(mb_strcut) } //???? - RETURN_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */ + RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */ + efree(ret->val); } /* }}} */ @@ -2951,6 +2961,7 @@ PHP_FUNCTION(mb_strimwidth) } //???? RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */ + efree(ret->val); } /* }}} */ @@ -3567,6 +3578,7 @@ PHP_FUNCTION(mb_convert_variables) while (n < argc || stack_level > 0) { if (stack_level <= 0) { var = &args[n++]; + ZVAL_DEREF(var); if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) { target_hash = HASH_OF(var); if (target_hash != NULL) { @@ -3648,6 +3660,7 @@ detect_end: while (n < argc || stack_level > 0) { if (stack_level <= 0) { var = &args[n++]; + ZVAL_DEREF(var); if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) { target_hash = HASH_OF(var); if (target_hash != NULL) { @@ -3686,6 +3699,7 @@ detect_end: zval_ptr_dtor(hash_entry); //??? ZVAL_STRINGL(hash_entry, (char *)ret->val, ret->len); + efree(ret->val); } } } @@ -3698,6 +3712,7 @@ detect_end: zval_ptr_dtor(var); //???? ZVAL_STRINGL(var, (char *)ret->val, ret->len); + efree(ret->val); } } } @@ -3788,6 +3803,7 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type) if (ret != NULL) { //??? RETVAL_STRINGL((char *)ret->val, ret->len); + efree(ret->val); } else { RETVAL_FALSE; } diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index 4b9213780f..8d84e41054 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -43,7 +43,7 @@ struct _zend_mb_regex_globals { OnigEncoding default_mbctype; OnigEncoding current_mbctype; HashTable ht_rc; - zval *search_str; + zval search_str; zval *search_str_val; unsigned int search_pos; php_mb_regex_t *search_re; @@ -55,8 +55,7 @@ struct _zend_mb_regex_globals { #define MBREX(g) (MBSTRG(mb_regex_globals)->g) /* {{{ static void php_mb_regex_free_cache() */ -static void php_mb_regex_free_cache(zval *el) -{ +static void php_mb_regex_free_cache(zval *el) { onig_free((php_mb_regex_t *)Z_PTR_P(el)); } /* }}} */ @@ -67,7 +66,7 @@ static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals TSRMLS_DC) pglobals->default_mbctype = ONIG_ENCODING_EUC_JP; pglobals->current_mbctype = ONIG_ENCODING_EUC_JP; zend_hash_init(&(pglobals->ht_rc), 0, NULL, php_mb_regex_free_cache, 1); - pglobals->search_str = (zval*) NULL; + ZVAL_UNDEF(&pglobals->search_str); pglobals->search_re = (php_mb_regex_t*)NULL; pglobals->search_pos = 0; pglobals->search_regs = (OnigRegion*)NULL; @@ -139,9 +138,9 @@ PHP_RSHUTDOWN_FUNCTION(mb_regex) { MBREX(current_mbctype) = MBREX(default_mbctype); - if (MBREX(search_str) != NULL) { - zval_ptr_dtor(MBREX(search_str)); - MBREX(search_str) = NULL; + if (!ZVAL_IS_UNDEF(&MBREX(search_str))) { + zval_ptr_dtor(&MBREX(search_str)); + ZVAL_UNDEF(&MBREX(search_str)); } MBREX(search_pos) = 0; @@ -448,22 +447,21 @@ const char *php_mb_regex_get_default_mbctype(TSRMLS_D) static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC) { int err_code = 0; - int found = 0; - php_mb_regex_t *retval = NULL, **rc = NULL; + php_mb_regex_t *retval = NULL, *rc = NULL; OnigErrorInfo err_info; OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; - found = zend_hash_find(&MBREX(ht_rc), (char *)pattern, patlen+1, (void **) &rc); - if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) { + rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen); + if (!rc || rc->options != options || rc->enc != enc || rc->syntax != syntax) { if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) { onig_error_code_to_str(err_str, err_code, err_info); php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str); retval = NULL; goto out; } - zend_hash_update(&MBREX(ht_rc), (char *) pattern, patlen + 1, (void *) &retval, sizeof(retval), NULL); - } else if (found == SUCCESS) { - retval = *rc; + zend_hash_str_update_ptr(&MBREX(ht_rc), (char *)pattern, patlen, retval); + } else if (rc) { + retval = rc; } out: return retval; @@ -671,7 +669,7 @@ PHP_FUNCTION(mb_regex_encoding) RETURN_FALSE; } - RETURN_STRING((char *)retval, 1); + RETURN_STRING((char *)retval); } else if (argc == 1) { mbctype = _php_mb_regex_name2mbctype(encoding); @@ -689,7 +687,7 @@ PHP_FUNCTION(mb_regex_encoding) /* {{{ _php_mb_regex_ereg_exec */ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) { - zval **arg_pattern, *array; + zval *arg_pattern, *array; char *string; int string_len; php_mb_regex_t *re; @@ -700,7 +698,7 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) array = NULL; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) { RETURN_FALSE; } @@ -710,22 +708,22 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) } /* compile the regular expression from the supplied regex */ - if (Z_TYPE_PP(arg_pattern) != IS_STRING) { + if (Z_TYPE_P(arg_pattern) != IS_STRING) { /* we convert numbers to integers and treat them as a string */ - if (Z_TYPE_PP(arg_pattern) == IS_DOUBLE) { + if (Z_TYPE_P(arg_pattern) == IS_DOUBLE) { convert_to_long_ex(arg_pattern); /* get rid of decimal places */ } convert_to_string_ex(arg_pattern); /* don't bother doing an extended regex with just a number */ } - if (!Z_STRVAL_PP(arg_pattern) || Z_STRLEN_PP(arg_pattern) == 0) { + if (!Z_STRVAL_P(arg_pattern) || Z_STRLEN_P(arg_pattern) == 0) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "empty pattern"); RETVAL_FALSE; goto out; } - re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC); + re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC); if (re == NULL) { RETVAL_FALSE; goto out; @@ -786,7 +784,7 @@ PHP_FUNCTION(mb_eregi) /* {{{ _php_mb_regex_ereg_replace_exec */ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable) { - zval **arg_pattern_zval; + zval *arg_pattern_zval; char *arg_pattern; int arg_pattern_len; @@ -804,8 +802,8 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp php_mb_regex_t *re; OnigSyntaxType *syntax; OnigRegion *regs = NULL; - smart_str out_buf = { 0 }; - smart_str eval_buf = { 0 }; + smart_str out_buf = {0}; + smart_str eval_buf = {0}; smart_str *pbuf; int i, err, eval, n; OnigUChar *pos; @@ -830,7 +828,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp int option_str_len = 0; if (!is_callable) { - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zss|s", + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zss|s", &arg_pattern_zval, &replace, &replace_len, &string, &string_len, @@ -838,7 +836,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp RETURN_FALSE; } } else { - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zfs|s", + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zfs|s", &arg_pattern_zval, &arg_replace_fci, &arg_replace_fci_cache, &string, &string_len, @@ -854,13 +852,13 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp syntax = MBREX(regex_default_syntax); } } - if (Z_TYPE_PP(arg_pattern_zval) == IS_STRING) { - arg_pattern = Z_STRVAL_PP(arg_pattern_zval); - arg_pattern_len = Z_STRLEN_PP(arg_pattern_zval); + if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) { + arg_pattern = Z_STRVAL_P(arg_pattern_zval); + arg_pattern_len = Z_STRLEN_P(arg_pattern_zval); } else { /* FIXME: this code is not multibyte aware! */ convert_to_long_ex(arg_pattern_zval); - pat_buf[0] = (char)Z_LVAL_PP(arg_pattern_zval); + pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval); pat_buf[1] = '\0'; arg_pattern = pat_buf; @@ -940,9 +938,9 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp /* null terminate buffer */ smart_str_0(&eval_buf); /* do eval */ - if (zend_eval_stringl(eval_buf.c, eval_buf.len, &v, description TSRMLS_CC) == FAILURE) { + if (zend_eval_stringl(eval_buf.s->val, eval_buf.s->len, &v, description TSRMLS_CC) == FAILURE) { efree(description); - php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, eval_buf.c); + php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, eval_buf.s->val); /* zend_error() does not return in this case */ } @@ -950,33 +948,31 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp convert_to_string(&v); smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v)); /* Clean up */ - eval_buf.len = 0; + eval_buf.s->len = 0; zval_dtor(&v); } else if (is_callable) { - zval *retval_ptr; - zval **args[1]; - zval *subpats; + zval args[1]; + zval subpats, retval; int i; - MAKE_STD_ZVAL(subpats); - array_init(subpats); - + array_init(&subpats); for (i = 0; i < regs->num_regs; i++) { - add_next_index_stringl(subpats, string + regs->beg[i], regs->end[i] - regs->beg[i], 1); + add_next_index_stringl(&subpats, string + regs->beg[i], regs->end[i] - regs->beg[i], 1); } - args[0] = &subpats; + ZVAL_COPY_VALUE(&args[0], &subpats); /* null terminate buffer */ smart_str_0(&eval_buf); arg_replace_fci.param_count = 1; arg_replace_fci.params = args; - arg_replace_fci.retval_ptr_ptr = &retval_ptr; - if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache TSRMLS_CC) == SUCCESS && arg_replace_fci.retval_ptr_ptr) { - convert_to_string_ex(&retval_ptr); - smart_str_appendl(&out_buf, Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr)); - eval_buf.len = 0; - zval_ptr_dtor(&retval_ptr); + arg_replace_fci.retval = &retval; + if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache TSRMLS_CC) == SUCCESS && + !ZVAL_IS_UNDEF(&retval)) { + convert_to_string_ex(&retval); + smart_str_appendl(&out_buf, Z_STRVAL(retval), Z_STRLEN(retval)); + eval_buf.s->len = 0; + zval_ptr_dtor(&retval); } else { efree(description); if (!EG(exception)) { @@ -1017,7 +1013,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp RETVAL_FALSE; } else { smart_str_appendc(&out_buf, '\0'); - RETVAL_STRINGL((char *)out_buf.c, out_buf.len - 1, 0); + RETVAL_STR(out_buf.s); } } /* }}} */ @@ -1206,9 +1202,9 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) pos = MBREX(search_pos); str = NULL; len = 0; - if (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING){ - str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str)); - len = Z_STRLEN_P(MBREX(search_str)); + if (!ZVAL_IS_UNDEF(&MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING){ + str = (OnigUChar *)Z_STRVAL(MBREX(search_str)); + len = Z_STRLEN(MBREX(search_str)); } if (MBREX(search_re) == NULL) { @@ -1338,20 +1334,18 @@ PHP_FUNCTION(mb_ereg_search_init) } } - if (MBREX(search_str) != NULL) { + if (!ZVAL_IS_NULL(&MBREX(search_str))) { zval_ptr_dtor(&MBREX(search_str)); - MBREX(search_str) = (zval *)NULL; } - MBREX(search_str) = arg_str; - Z_ADDREF_P(MBREX(search_str)); + ZVAL_COPY(&MBREX(search_str), arg_str); SEPARATE_ZVAL_IF_NOT_REF(&MBREX(search_str)); MBREX(search_pos) = 0; if (MBREX(search_regs) != NULL) { onig_region_free(MBREX(search_regs), 1); - MBREX(search_regs) = (OnigRegion *) NULL; + MBREX(search_regs) = NULL; } RETURN_TRUE; @@ -1365,11 +1359,11 @@ PHP_FUNCTION(mb_ereg_search_getregs) int n, i, len, beg, end; OnigUChar *str; - if (MBREX(search_regs) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && Z_STRVAL_P(MBREX(search_str)) != NULL) { + if (MBREX(search_regs) != NULL && Z_TYPE(MBREX(search_str)) == IS_STRING) { array_init(return_value); - str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str)); - len = Z_STRLEN_P(MBREX(search_str)); + str = (OnigUChar *)Z_STRVAL(MBREX(search_str)); + len = Z_STRLEN(MBREX(search_str)); n = MBREX(search_regs)->num_regs; for (i = 0; i < n; i++) { beg = MBREX(search_regs)->beg[i]; @@ -1404,7 +1398,7 @@ PHP_FUNCTION(mb_ereg_search_setpos) return; } - if (position < 0 || (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && position >= Z_STRLEN_P(MBREX(search_str)))) { + if (position < 0 || (!ZVAL_IS_UNDEF(&MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING && position >= Z_STRLEN(MBREX(search_str)))) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range"); MBREX(search_pos) = 0; RETURN_FALSE; @@ -1454,7 +1448,7 @@ PHP_FUNCTION(mb_regex_set_options) } _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax); - RETVAL_STRING(buf, 1); + RETVAL_STRING(buf); } /* }}} */ -- 2.40.0