From 45ee78e0403b973e7df4cbcd2ce0c485efa590ea Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Fri, 6 Oct 2017 12:08:55 +0300 Subject: [PATCH] mb_convert_variables() refactored to use simple recursion. Fixed incorrect recursion protection (previous implementation kept protection flag or apply counter in non-zero state). --- ext/mbstring/mbstring.c | 274 ++++++++++++++++------------------------ 1 file changed, 111 insertions(+), 163 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 6d1103aff7..445bcd3d98 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -3681,24 +3681,112 @@ PHP_FUNCTION(mb_convert_kana) } /* }}} */ -#define PHP_MBSTR_STACK_BLOCK_SIZE 32 +static int mb_recursive_encoder_detector_feed(mbfl_encoding_detector *identd, zval *var, int *recursion_error) /* {{{ */ +{ + mbfl_string string; + HashTable *ht; + zval *entry; + + ZVAL_DEREF(var); + if (Z_TYPE_P(var) == IS_STRING) { + string.val = (unsigned char *)Z_STRVAL_P(var); + string.len = Z_STRLEN_P(var); + if (mbfl_encoding_detector_feed(identd, &string)) { + return 1; /* complete detecting */ + } + } else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) { + if (Z_REFCOUNTED_P(var)) { + if (Z_IS_RECURSIVE_P(var)) { + *recursion_error = 1; + return 0; + } + Z_PROTECT_RECURSION_P(var); + } + + ht = HASH_OF(var); + if (ht != NULL) { + ZEND_HASH_FOREACH_VAL_IND(ht, entry) { + if (mb_recursive_encoder_detector_feed(identd, entry, recursion_error)) { + if (Z_REFCOUNTED_P(var)) { + Z_UNPROTECT_RECURSION_P(var); + } + return 1; + } else if (*recursion_error) { + if (Z_REFCOUNTED_P(var)) { + Z_UNPROTECT_RECURSION_P(var); + } + return 0; + } + } ZEND_HASH_FOREACH_END(); + } + + if (Z_REFCOUNTED_P(var)) { + Z_UNPROTECT_RECURSION_P(var); + } + } + return 0; +} /* }}} */ + +static int mb_recursive_convert_variable(mbfl_buffer_converter *convd, zval *var) /* {{{ */ +{ + mbfl_string string, result, *ret; + HashTable *ht; + zval *entry, *orig_var; + + orig_var = var; + ZVAL_DEREF(var); + if (Z_TYPE_P(var) == IS_STRING) { + string.val = (unsigned char *)Z_STRVAL_P(var); + string.len = Z_STRLEN_P(var); + ret = mbfl_buffer_converter_feed_result(convd, &string, &result); + if (ret != NULL) { + zval_ptr_dtor(orig_var); + // TODO: avoid reallocation ??? + ZVAL_STRINGL(orig_var, (char *)ret->val, ret->len); + efree(ret->val); + } + } else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) { + SEPARATE_ZVAL_NOREF(var); + if (Z_REFCOUNTED_P(var)) { + if (Z_IS_RECURSIVE_P(var)) { + return 1; + } + Z_PROTECT_RECURSION_P(var); + } + + ht = HASH_OF(var); + if (ht != NULL) { + ZEND_HASH_FOREACH_VAL_IND(ht, entry) { + if (mb_recursive_convert_variable(convd, entry)) { + if (Z_REFCOUNTED_P(var)) { + Z_UNPROTECT_RECURSION_P(var); + } + return 1; + } + } ZEND_HASH_FOREACH_END(); + } + + if (Z_REFCOUNTED_P(var)) { + Z_UNPROTECT_RECURSION_P(var); + } + } + return 0; +} /* }}} */ /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...]) Converts the string resource in variables to desired encoding */ PHP_FUNCTION(mb_convert_variables) { - zval *args, *stack, *var, *hash_entry, *hash_entry_ptr, *zfrom_enc; - HashTable *target_hash; - mbfl_string string, result, *ret; + zval *args, *zfrom_enc; + mbfl_string string, result; const mbfl_encoding *from_encoding, *to_encoding; mbfl_encoding_detector *identd; mbfl_buffer_converter *convd; - int n, argc, stack_level, stack_max; + int n, argc; size_t to_enc_len; size_t elistsz; const mbfl_encoding **elist; char *to_enc; - void *ptmp; int recursion_error = 0; if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) { @@ -3739,92 +3827,25 @@ PHP_FUNCTION(mb_convert_variables) } else { /* auto detect */ from_encoding = NULL; - stack_max = PHP_MBSTR_STACK_BLOCK_SIZE; - stack = (zval *)safe_emalloc(stack_max, sizeof(zval), 0); - stack_level = 0; identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection)); if (identd != NULL) { n = 0; - while (n < argc || stack_level > 0) { - if (stack_level <= 0) { - var = &args[n++]; - ZVAL_DEREF(var); - SEPARATE_ZVAL_NOREF(var); - if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) { - target_hash = HASH_OF(var); - if (target_hash != NULL) { - zend_hash_internal_pointer_reset(target_hash); - } - } - } else { - stack_level--; - var = &stack[stack_level]; - } - if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) { - target_hash = HASH_OF(var); - if (target_hash != NULL) { - while ((hash_entry = zend_hash_get_current_data(target_hash)) != NULL) { - if (Z_REFCOUNTED_P(var)) { - if (GC_IS_RECURSIVE(target_hash)) { - recursion_error = 1; - goto detect_end; - } - GC_PROTECT_RECURSION(target_hash); - } - zend_hash_move_forward(target_hash); - if (Z_TYPE_P(hash_entry) == IS_INDIRECT) { - hash_entry = Z_INDIRECT_P(hash_entry); - } - ZVAL_DEREF(hash_entry); - if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) { - if (stack_level >= stack_max) { - stack_max += PHP_MBSTR_STACK_BLOCK_SIZE; - ptmp = erealloc(stack, sizeof(zval) * stack_max); - stack = (zval *)ptmp; - } - ZVAL_COPY_VALUE(&stack[stack_level], var); - stack_level++; - var = hash_entry; - target_hash = HASH_OF(var); - if (target_hash != NULL) { - zend_hash_internal_pointer_reset(target_hash); - continue; - } - } else if (Z_TYPE_P(hash_entry) == IS_STRING) { - string.val = (unsigned char *)Z_STRVAL_P(hash_entry); - string.len = Z_STRLEN_P(hash_entry); - if (mbfl_encoding_detector_feed(identd, &string)) { - goto detect_end; /* complete detecting */ - } - } - } - } - } else if (Z_TYPE_P(var) == IS_STRING) { - string.val = (unsigned char *)Z_STRVAL_P(var); - string.len = Z_STRLEN_P(var); - if (mbfl_encoding_detector_feed(identd, &string)) { - goto detect_end; /* complete detecting */ - } + while (n < argc) { + if (mb_recursive_encoder_detector_feed(identd, &args[n], &recursion_error)) { + break; } + n++; } -detect_end: from_encoding = mbfl_encoding_detector_judge(identd); mbfl_encoding_detector_delete(identd); - } - if (recursion_error) { - while(stack_level-- && (var = &stack[stack_level])) { - if (Z_REFCOUNTED_P(var)) { - Z_UNPROTECT_RECURSION_P(var); + if (recursion_error) { + if (elist != NULL) { + efree((void *)elist); } + php_error_docref(NULL, E_WARNING, "Cannot handle recursive references"); + RETURN_FALSE; } - efree(stack); - if (elist != NULL) { - efree((void *)elist); - } - php_error_docref(NULL, E_WARNING, "Cannot handle recursive references"); - RETURN_FALSE; } - efree(stack); if (!from_encoding) { php_error_docref(NULL, E_WARNING, "Unable to detect encoding"); @@ -3848,98 +3869,25 @@ detect_end: /* convert */ if (convd != NULL) { - stack_max = PHP_MBSTR_STACK_BLOCK_SIZE; - stack = (zval*)safe_emalloc(stack_max, sizeof(zval), 0); - stack_level = 0; n = 0; - while (n < argc || stack_level > 0) { - if (stack_level <= 0) { - var = &args[n++]; - ZVAL_DEREF(var); - SEPARATE_ZVAL_NOREF(var); - if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) { - target_hash = HASH_OF(var); - if (target_hash != NULL) { - zend_hash_internal_pointer_reset(target_hash); - } - } - } else { - stack_level--; - var = &stack[stack_level]; - } - if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) { - target_hash = HASH_OF(var); - if (target_hash != NULL) { - while ((hash_entry_ptr = zend_hash_get_current_data(target_hash)) != NULL) { - zend_hash_move_forward(target_hash); - if (Z_TYPE_P(hash_entry_ptr) == IS_INDIRECT) { - hash_entry_ptr = Z_INDIRECT_P(hash_entry_ptr); - } - hash_entry = hash_entry_ptr; - ZVAL_DEREF(hash_entry); - if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) { - if (Z_REFCOUNTED_P(hash_entry)) { - if (Z_IS_RECURSIVE_P(hash_entry)) { - recursion_error = 1; - goto conv_end; - } - Z_PROTECT_RECURSION_P(hash_entry); - } - if (stack_level >= stack_max) { - stack_max += PHP_MBSTR_STACK_BLOCK_SIZE; - ptmp = erealloc(stack, sizeof(zval) * stack_max); - stack = (zval *)ptmp; - } - ZVAL_COPY_VALUE(&stack[stack_level], var); - stack_level++; - var = hash_entry; - SEPARATE_ZVAL(hash_entry); - target_hash = HASH_OF(var); - if (target_hash != NULL) { - zend_hash_internal_pointer_reset(target_hash); - continue; - } - } else if (Z_TYPE_P(hash_entry) == IS_STRING) { - string.val = (unsigned char *)Z_STRVAL_P(hash_entry); - string.len = Z_STRLEN_P(hash_entry); - ret = mbfl_buffer_converter_feed_result(convd, &string, &result); - if (ret != NULL) { - zval_ptr_dtor(hash_entry_ptr); - // TODO: avoid reallocation ??? - ZVAL_STRINGL(hash_entry_ptr, (char *)ret->val, ret->len); - efree(ret->val); - } - } - } - } - } else if (Z_TYPE_P(var) == IS_STRING) { - string.val = (unsigned char *)Z_STRVAL_P(var); - string.len = Z_STRLEN_P(var); - ret = mbfl_buffer_converter_feed_result(convd, &string, &result); - if (ret != NULL) { - zval_ptr_dtor(var); - // TODO: avoid reallocation ??? - ZVAL_STRINGL(var, (char *)ret->val, ret->len); - efree(ret->val); - } + while (n < argc) { + zval *zv = &args[n]; + + ZVAL_DEREF(zv); + recursion_error = mb_recursive_convert_variable(convd, zv); + if (recursion_error) { + break; } + n++; } -conv_end: MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); mbfl_buffer_converter_delete(convd); if (recursion_error) { - while(stack_level-- && (var = &stack[stack_level])) { - if (Z_REFCOUNTED_P(var)) { - Z_UNPROTECT_RECURSION_P(var); - } - } - efree(stack); php_error_docref(NULL, E_WARNING, "Cannot handle recursive references"); RETURN_FALSE; } - efree(stack); } if (from_encoding) { -- 2.50.1