From ab93d8c621645e05d6a6a431d52ac64eda956673 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Wed, 24 Nov 2010 05:41:23 +0000 Subject: [PATCH] Added multibyte suppport by default. Previosly php had to be compiled with --enable-zend-multibyte. Now it can be enabled or disabled throug zend.multibyte directive in php.ini --- NEWS | 3 + UPGRADING | 5 + Zend/Zend.m4 | 14 -- Zend/tests/declare_001.phpt | 4 +- Zend/tests/declare_002.phpt | 28 ++++ .../multibyte/multibyte_encoding_001.phpt | 1 + .../multibyte/multibyte_encoding_002.phpt | 1 + .../multibyte/multibyte_encoding_003.phpt | Bin 469 -> 465 bytes .../multibyte/multibyte_encoding_004.phpt | 1 + .../multibyte/multibyte_encoding_005.phpt | 1 + .../multibyte/multibyte_encoding_006.phpt | Bin 599 -> 624 bytes Zend/zend.c | 3 +- Zend/zend_compile.c | 59 +++---- Zend/zend_globals.h | 8 +- Zend/zend_highlight.c | 5 - Zend/zend_language_scanner.h | 2 - Zend/zend_language_scanner.l | 150 ++++++++---------- Zend/zend_multibyte.c | 43 ++++- Zend/zend_multibyte.h | 4 - ext/mbstring/mbstring.c | 61 +++---- ext/mbstring/mbstring.h | 2 - ext/mbstring/tests/zend_multibyte-10.phpt | 3 +- ext/mbstring/tests/zend_multibyte-11.phpt | 3 +- ext/phar/tests/zip/notphar.phpt | 1 + ext/standard/info.c | 6 +- main/main.c | 5 - win32/build/config.w32 | 6 - 27 files changed, 197 insertions(+), 222 deletions(-) create mode 100644 Zend/tests/declare_002.phpt diff --git a/NEWS b/NEWS index 661ef51812..b936b88270 100644 --- a/NEWS +++ b/NEWS @@ -28,6 +28,9 @@ PHP NEWS - Changed third parameter of preg_match_all() to optional. FR #53238. (Adam) - General improvements: + . Added multibyte suppport by default. Previosly php had to be compiled + with --enable-zend-multibyte. Now it can be enabled or disabled throug + zend.multibyte directive in php.ini (Dmitry) . Added scalar typehints to the parser and the reflection API. (Ilia, Derick) . Added support for Traits. (Stefan) . Added closure $this support back. (Stas) diff --git a/UPGRADING b/UPGRADING index d8bf42c747..6296984ff3 100755 --- a/UPGRADING +++ b/UPGRADING @@ -235,6 +235,11 @@ UPGRADE NOTES - PHP X.Y - Added session.upload_progress.enabled, session.upload_progress.cleanup, session.upload_progress.prefix, session.upload_progress.name, session.upload_progress.freq, session.upload_progress.min_freq. +- Added zend.multibyte directive as a replacement of PHP compile time + configuration option --enable-zend-multibyte. Now ZE always contains code for + multibyte support, but may enable or disable it by zend.multibyte. It doesn't + make a lot of sense to enable this option without ext/mbstring, because the + most functionality is implemented by mbstrings callbacks. ==================== 11. Syntax additions diff --git a/Zend/Zend.m4 b/Zend/Zend.m4 index c56b769a99..ececf783b5 100644 --- a/Zend/Zend.m4 +++ b/Zend/Zend.m4 @@ -176,13 +176,6 @@ AC_ARG_ENABLE(inline-optimization, ZEND_INLINE_OPTIMIZATION=yes ]) -AC_ARG_ENABLE(zend-multibyte, -[ --enable-zend-multibyte Compile with zend multibyte support], [ - ZEND_MULTIBYTE=$enableval -],[ - ZEND_MULTIBYTE=no -]) - AC_MSG_CHECKING([virtual machine dispatch method]) AC_MSG_RESULT($PHP_ZEND_VM) @@ -195,9 +188,6 @@ AC_MSG_RESULT($ZEND_INLINE_OPTIMIZATION) AC_MSG_CHECKING(whether to enable Zend debugging) AC_MSG_RESULT($ZEND_DEBUG) -AC_MSG_CHECKING(whether to enable Zend multibyte) -AC_MSG_RESULT($ZEND_MULTIBYTE) - case $PHP_ZEND_VM in SWITCH) AC_DEFINE(ZEND_VM_KIND,ZEND_VM_KIND_SWITCH,[virtual machine dispatch method]) @@ -232,10 +222,6 @@ if test "$ZEND_MAINTAINER_ZTS" = "yes"; then LIBZEND_CPLUSPLUS_CHECKS fi -if test "$ZEND_MULTIBYTE" = "yes"; then - AC_DEFINE(ZEND_MULTIBYTE, 1, [ ]) -fi - changequote({,}) if test -n "$GCC" && test "$ZEND_INLINE_OPTIMIZATION" != "yes"; then INLINE_CFLAGS=`echo $ac_n "$CFLAGS $ac_c" | sed s/-O[0-9s]*//` diff --git a/Zend/tests/declare_001.phpt b/Zend/tests/declare_001.phpt index 14c9a453d6..f55566758b 100644 --- a/Zend/tests/declare_001.phpt +++ b/Zend/tests/declare_001.phpt @@ -2,8 +2,8 @@ Testing declare statement with several type values --SKIPIF-- --FILE-- diff --git a/Zend/tests/declare_002.phpt b/Zend/tests/declare_002.phpt new file mode 100644 index 0000000000..1babdf338b --- /dev/null +++ b/Zend/tests/declare_002.phpt @@ -0,0 +1,28 @@ +--TEST-- +Testing declare statement with several type values +--SKIPIF-- + +--FILE-- + +--EXPECTF-- +Warning: Unsupported encoding [%d] in %sdeclare_002.php on line 3 + +Warning: Unsupported encoding [%f] in %sdeclare_002.php on line 4 + +Warning: Unsupported encoding [] in %sdeclare_002.php on line 5 + +Fatal error: Cannot use constants as encoding in %sdeclare_002.php on line 7 diff --git a/Zend/tests/multibyte/multibyte_encoding_001.phpt b/Zend/tests/multibyte/multibyte_encoding_001.phpt index 19b6064cfa..8c8a1b855e 100755 --- a/Zend/tests/multibyte/multibyte_encoding_001.phpt +++ b/Zend/tests/multibyte/multibyte_encoding_001.phpt @@ -10,6 +10,7 @@ if (!extension_loaded("mbstring")) { } ?> --INI-- +zend.multibyte=1 mbstring.internal_encoding=SJIS --FILE-- --INI-- +zend.multibyte=1 mbstring.internal_encoding=iso-8859-1 --FILE-- (7ZfQRHIs;%K --INI-- +zend.multibyte=1 mbstring.script_encoding=Shift_JIS mbstring.internal_encoding=Shift_JIS --FILE-- diff --git a/Zend/tests/multibyte/multibyte_encoding_005.phpt b/Zend/tests/multibyte/multibyte_encoding_005.phpt index 3946238e50..bf88e7ddac 100644 --- a/Zend/tests/multibyte/multibyte_encoding_005.phpt +++ b/Zend/tests/multibyte/multibyte_encoding_005.phpt @@ -10,6 +10,7 @@ if (!extension_loaded("mbstring")) { } ?> --INI-- +zend.multibyte=1 mbstring.encoding_translation = On mbstring.script_encoding=Shift_JIS mbstring.internal_encoding=UTF-8 diff --git a/Zend/tests/multibyte/multibyte_encoding_006.phpt b/Zend/tests/multibyte/multibyte_encoding_006.phpt index da5b8a22b87a5f2f9bb5899cfc31e55080aa7a97..680bf00f76120e849ba23a26001663a71e6ce6ed 100644 GIT binary patch delta 35 qcmcc4@_}VTB%_$8pQo-aS5<0Wie7GMPDy4`Wl5^7;pQ+#Mn(YI1q*Bd delta 11 Scmeysa-C&EB;({F#(w}EzXaX@ diff --git a/Zend/zend.c b/Zend/zend.c index a9cfc8f15d..d091593ec4 100644 --- a/Zend/zend.c +++ b/Zend/zend.c @@ -92,9 +92,8 @@ static ZEND_INI_MH(OnUpdateGCEnabled) /* {{{ */ ZEND_INI_BEGIN() ZEND_INI_ENTRY("error_reporting", NULL, ZEND_INI_ALL, OnUpdateErrorReporting) STD_ZEND_INI_BOOLEAN("zend.enable_gc", "1", ZEND_INI_ALL, OnUpdateGCEnabled, gc_enabled, zend_gc_globals, gc_globals) -#ifdef ZEND_MULTIBYTE + STD_ZEND_INI_BOOLEAN("zend.multibyte", "0", ZEND_INI_PERDIR, OnUpdateBool, multibyte, zend_compiler_globals, compiler_globals) STD_ZEND_INI_BOOLEAN("detect_unicode", "1", ZEND_INI_ALL, OnUpdateBool, detect_unicode, zend_compiler_globals, compiler_globals) -#endif ZEND_INI_END() diff --git a/Zend/zend_compile.c b/Zend/zend_compile.c index ca9dfe1d42..5abb81e8fd 100644 --- a/Zend/zend_compile.c +++ b/Zend/zend_compile.c @@ -27,10 +27,7 @@ #include "zend_API.h" #include "zend_exceptions.h" #include "tsrm_virtual_cwd.h" - -#ifdef ZEND_MULTIBYTE #include "zend_multibyte.h" -#endif /* ZEND_MULTIBYTE */ #define CONSTANT_EX(op_array, op) \ (op_array)->literals[op].constant @@ -149,14 +146,12 @@ static void build_runtime_defined_function_key(zval *result, const char *name, i /* NULL, name length, filename length, last accepting char position length */ result->value.str.len = 1+name_length+strlen(filename)+char_pos_len; -#ifdef ZEND_MULTIBYTE + /* must be binary safe */ result->value.str.val = (char *) safe_emalloc(result->value.str.len, 1, 1); result->value.str.val[0] = '\0'; sprintf(result->value.str.val+1, "%s%s%s", name, filename, char_pos_buf); -#else - zend_spprintf(&result->value.str.val, 0, "%c%s%s%s", '\0', name, filename, char_pos_buf); -#endif /* ZEND_MULTIBYTE */ + result->type = IS_STRING; Z_SET_REFCOUNT_P(result, 1); } @@ -202,7 +197,6 @@ void zend_init_compiler_data_structures(TSRMLS_D) /* {{{ */ init_compiler_declarables(TSRMLS_C); zend_stack_init(&CG(context_stack)); -#ifdef ZEND_MULTIBYTE CG(script_encoding_list) = NULL; CG(script_encoding_list_size) = 0; CG(internal_encoding) = NULL; @@ -210,7 +204,6 @@ void zend_init_compiler_data_structures(TSRMLS_D) /* {{{ */ CG(encoding_converter) = NULL; CG(encoding_oddlen) = NULL; CG(encoding_declared) = 0; -#endif /* ZEND_MULTIBYTE */ } /* }}} */ @@ -249,11 +242,9 @@ void shutdown_compiler(TSRMLS_D) /* {{{ */ zend_llist_destroy(&CG(open_files)); zend_stack_destroy(&CG(context_stack)); -#ifdef ZEND_MULTIBYTE if (CG(script_encoding_list)) { efree(CG(script_encoding_list)); } -#endif /* ZEND_MULTIBYTE */ } /* }}} */ @@ -5835,11 +5826,7 @@ void zend_do_declare_stmt(znode *var, znode *val TSRMLS_DC) /* {{{ */ if (!zend_binary_strcasecmp(var->u.constant.value.str.val, var->u.constant.value.str.len, "ticks", sizeof("ticks")-1)) { convert_to_long(&val->u.constant); CG(declarables).ticks = val->u.constant; -#ifdef ZEND_MULTIBYTE } else if (!zend_binary_strcasecmp(var->u.constant.value.str.val, var->u.constant.value.str.len, "encoding", sizeof("encoding")-1)) { - zend_encoding *new_encoding, *old_encoding; - zend_encoding_filter old_input_filter; - if ((Z_TYPE(val->u.constant) & IS_CONSTANT_TYPE_MASK) == IS_CONSTANT) { zend_error(E_COMPILE_ERROR, "Cannot use constants as encoding"); } @@ -5863,31 +5850,31 @@ void zend_do_declare_stmt(znode *var, znode *val TSRMLS_DC) /* {{{ */ zend_error(E_COMPILE_ERROR, "Encoding declaration pragma must be the very first statement in the script"); } } - CG(encoding_declared) = 1; - convert_to_string(&val->u.constant); - new_encoding = zend_multibyte_fetch_encoding(val->u.constant.value.str.val); - if (!new_encoding) { - zend_error(E_COMPILE_WARNING, "Unsupported encoding [%s]", val->u.constant.value.str.val); - } else { - old_input_filter = LANG_SCNG(input_filter); - old_encoding = LANG_SCNG(script_encoding); - zend_multibyte_set_filter(new_encoding TSRMLS_CC); - - /* need to re-scan if input filter changed */ - if (old_input_filter != LANG_SCNG(input_filter) || - ((old_input_filter == zend_multibyte_script_encoding_filter) && - (new_encoding != old_encoding))) { - zend_multibyte_yyinput_again(old_input_filter, old_encoding TSRMLS_CC); + if (CG(multibyte)) { + zend_encoding *new_encoding, *old_encoding; + zend_encoding_filter old_input_filter; + + CG(encoding_declared) = 1; + + convert_to_string(&val->u.constant); + new_encoding = zend_multibyte_fetch_encoding(val->u.constant.value.str.val); + if (!new_encoding) { + zend_error(E_COMPILE_WARNING, "Unsupported encoding [%s]", val->u.constant.value.str.val); + } else { + old_input_filter = LANG_SCNG(input_filter); + old_encoding = LANG_SCNG(script_encoding); + zend_multibyte_set_filter(new_encoding TSRMLS_CC); + + /* need to re-scan if input filter changed */ + if (old_input_filter != LANG_SCNG(input_filter) || + ((old_input_filter == zend_multibyte_script_encoding_filter) && + (new_encoding != old_encoding))) { + zend_multibyte_yyinput_again(old_input_filter, old_encoding TSRMLS_CC); + } } } - efree(val->u.constant.value.str.val); -#else /* !ZEND_MULTIBYTE */ - } else if (!zend_binary_strcasecmp(var->u.constant.value.str.val, var->u.constant.value.str.len, "encoding", sizeof("encoding")-1)) { - /* Do not generate any kind of warning for encoding declares */ - /* zend_error(E_COMPILE_WARNING, "Declare encoding [%s] not supported", val->u.constant.value.str.val); */ zval_dtor(&val->u.constant); -#endif /* ZEND_MULTIBYTE */ } else { zend_error(E_COMPILE_WARNING, "Unsupported declare '%s'", var->u.constant.value.str.val); zval_dtor(&val->u.constant); diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h index c66ef64c4d..1f6b8d4f1f 100644 --- a/Zend/zend_globals.h +++ b/Zend/zend_globals.h @@ -35,10 +35,7 @@ #include "zend_objects_API.h" #include "zend_modules.h" #include "zend_float.h" - -#ifdef ZEND_MULTIBYTE #include "zend_multibyte.h" -#endif /* ZEND_MULTIBYTE */ /* Define ZTS if you want a thread-safe Zend */ /*#undef ZTS*/ @@ -150,9 +147,9 @@ struct _zend_compiler_globals { HashTable interned_strings; -#ifdef ZEND_MULTIBYTE zend_encoding **script_encoding_list; size_t script_encoding_list_size; + zend_bool multibyte; zend_bool detect_unicode; zend_bool encoding_declared; @@ -162,7 +159,6 @@ struct _zend_compiler_globals { zend_encoding_detector encoding_detector; zend_encoding_converter encoding_converter; zend_encoding_oddlen encoding_oddlen; -#endif /* ZEND_MULTIBYTE */ #ifdef ZTS zval ***static_members_table; @@ -308,7 +304,6 @@ struct _zend_php_scanner_globals { int yy_state; zend_stack state_stack; -#ifdef ZEND_MULTIBYTE /* original (unfiltered) script */ unsigned char *script_org; size_t script_org_size; @@ -322,7 +317,6 @@ struct _zend_php_scanner_globals { zend_encoding_filter output_filter; zend_encoding *script_encoding; zend_encoding *internal_encoding; -#endif /* ZEND_MULTIBYTE */ }; #endif /* ZEND_GLOBALS_H */ diff --git a/Zend/zend_highlight.c b/Zend/zend_highlight.c index 171789b83b..58c83868b1 100644 --- a/Zend/zend_highlight.c +++ b/Zend/zend_highlight.c @@ -57,8 +57,6 @@ ZEND_API void zend_html_putc(char c) ZEND_API void zend_html_puts(const char *s, uint len TSRMLS_DC) { const char *ptr=s, *end=s+len; - -#ifdef ZEND_MULTIBYTE char *filtered; int filtered_len; @@ -67,7 +65,6 @@ ZEND_API void zend_html_puts(const char *s, uint len TSRMLS_DC) ptr = filtered; end = filtered + filtered_len; } -#endif /* ZEND_MULTIBYTE */ while (ptrfilename = zend_get_compiled_filename(TSRMLS_C); lex_state->lineno = CG(zend_lineno); -#ifdef ZEND_MULTIBYTE - lex_state->script_org = SCNG(script_org); - lex_state->script_org_size = SCNG(script_org_size); - lex_state->script_filtered = SCNG(script_filtered); - lex_state->script_filtered_size = SCNG(script_filtered_size); - lex_state->input_filter = SCNG(input_filter); - lex_state->output_filter = SCNG(output_filter); - lex_state->script_encoding = SCNG(script_encoding); - lex_state->internal_encoding = SCNG(internal_encoding); -#endif /* ZEND_MULTIBYTE */ + if (CG(multibyte)) { + lex_state->script_org = SCNG(script_org); + lex_state->script_org_size = SCNG(script_org_size); + lex_state->script_filtered = SCNG(script_filtered); + lex_state->script_filtered_size = SCNG(script_filtered_size); + lex_state->input_filter = SCNG(input_filter); + lex_state->output_filter = SCNG(output_filter); + lex_state->script_encoding = SCNG(script_encoding); + lex_state->internal_encoding = SCNG(internal_encoding); + } } ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC) @@ -209,24 +209,24 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC) YYSETCONDITION(lex_state->yy_state); CG(zend_lineno) = lex_state->lineno; zend_restore_compiled_filename(lex_state->filename TSRMLS_CC); -#ifdef ZEND_MULTIBYTE - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } - if (SCNG(script_filtered)) { - efree(SCNG(script_filtered)); - SCNG(script_filtered) = NULL; + if (CG(multibyte)) { + if (SCNG(script_org)) { + efree(SCNG(script_org)); + SCNG(script_org) = NULL; + } + if (SCNG(script_filtered)) { + efree(SCNG(script_filtered)); + SCNG(script_filtered) = NULL; + } + SCNG(script_org) = lex_state->script_org; + SCNG(script_org_size) = lex_state->script_org_size; + SCNG(script_filtered) = lex_state->script_filtered; + SCNG(script_filtered_size) = lex_state->script_filtered_size; + SCNG(input_filter) = lex_state->input_filter; + SCNG(output_filter) = lex_state->output_filter; + SCNG(script_encoding) = lex_state->script_encoding; + SCNG(internal_encoding) = lex_state->internal_encoding; } - SCNG(script_org) = lex_state->script_org; - SCNG(script_org_size) = lex_state->script_org_size; - SCNG(script_filtered) = lex_state->script_filtered; - SCNG(script_filtered_size) = lex_state->script_filtered_size; - SCNG(input_filter) = lex_state->input_filter; - SCNG(output_filter) = lex_state->output_filter; - SCNG(script_encoding) = lex_state->script_encoding; - SCNG(internal_encoding) = lex_state->internal_encoding; -#endif /* ZEND_MULTIBYTE */ } ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC) @@ -269,32 +269,32 @@ ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC) SCNG(yy_start) = NULL; if (size != -1) { -#ifdef ZEND_MULTIBYTE - if (zend_multibyte_read_script((unsigned char *)buf, size TSRMLS_CC) != 0) { - return FAILURE; - } + if (CG(multibyte)) { + if (zend_multibyte_read_script((unsigned char *)buf, size TSRMLS_CC) != 0) { + return FAILURE; + } - SCNG(yy_in) = NULL; + SCNG(yy_in) = NULL; - zend_multibyte_set_filter(NULL TSRMLS_CC); + zend_multibyte_set_filter(NULL TSRMLS_CC); - if (!SCNG(input_filter)) { - SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1); - memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1); - SCNG(script_filtered_size) = SCNG(script_org_size); - } else { - SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); - if (SCNG(script_filtered) == NULL) { - zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " - "encoding \"%s\" to a compatible encoding", LANG_SCNG(script_encoding)->name); + if (!SCNG(input_filter)) { + SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1); + memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1); + SCNG(script_filtered_size) = SCNG(script_org_size); + } else { + SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); + if (SCNG(script_filtered) == NULL) { + zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " + "encoding \"%s\" to a compatible encoding", LANG_SCNG(script_encoding)->name); + } } + SCNG(yy_start) = SCNG(script_filtered) - offset; + yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC); + } else { + SCNG(yy_start) = buf - offset; + yy_scan_buffer(buf, size TSRMLS_CC); } - SCNG(yy_start) = SCNG(script_filtered) - offset; - yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC); -#else /* !ZEND_MULTIBYTE */ - SCNG(yy_start) = buf - offset; - yy_scan_buffer(buf, size TSRMLS_CC); -#endif /* ZEND_MULTIBYTE */ } else { zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed"); } @@ -437,25 +437,25 @@ ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_D SCNG(yy_in)=NULL; SCNG(yy_start) = NULL; -#ifdef ZEND_MULTIBYTE - SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val); - SCNG(script_org_size) = str->value.str.len; + if (CG(multibyte)) { + SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val); + SCNG(script_org_size) = str->value.str.len; - zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC); + zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC); - if (!SCNG(input_filter)) { - SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1); - memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1); - SCNG(script_filtered_size) = SCNG(script_org_size); + if (!SCNG(input_filter)) { + SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1); + memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1); + SCNG(script_filtered_size) = SCNG(script_org_size); + } else { + SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); + } + + yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC); } else { - SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); + yy_scan_buffer(str->value.str.val, str->value.str.len TSRMLS_CC); } - yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC); -#else /* !ZEND_MULTIBYTE */ - yy_scan_buffer(str->value.str.val, str->value.str.len TSRMLS_CC); -#endif /* ZEND_MULTIBYTE */ - zend_set_compiled_filename(filename TSRMLS_CC); CG(zend_lineno) = 1; CG(increment_lineno) = 0; @@ -466,7 +466,6 @@ ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_D ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D) { size_t offset = SCNG(yy_cursor) - SCNG(yy_start); -#ifdef ZEND_MULTIBYTE if (SCNG(input_filter)) { size_t original_offset = offset, length = 0; do { unsigned char *p = NULL; @@ -482,7 +481,6 @@ ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D) } } while (original_offset != length); } -#endif return offset; } @@ -524,7 +522,6 @@ zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC) BEGIN(ST_IN_SCRIPTING); compiler_result = zendparse(TSRMLS_C); -#ifdef ZEND_MULTIBYTE if (SCNG(script_org)) { efree(SCNG(script_org)); SCNG(script_org) = NULL; @@ -533,7 +530,6 @@ zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC) efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; } -#endif /* ZEND_MULTIBYTE */ if (compiler_result==1) { CG(active_op_array) = original_active_op_array; @@ -571,7 +567,6 @@ int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlight return FAILURE; } zend_highlight(syntax_highlighter_ini TSRMLS_CC); -#ifdef ZEND_MULTIBYTE if (SCNG(script_org)) { efree(SCNG(script_org)); SCNG(script_org) = NULL; @@ -580,7 +575,6 @@ int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlight efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; } -#endif /* ZEND_MULTIBYTE */ zend_destroy_file_handle(&file_handle TSRMLS_CC); zend_restore_lexical_state(&original_lex_state TSRMLS_CC); return SUCCESS; @@ -600,7 +594,6 @@ int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ } BEGIN(INITIAL); zend_highlight(syntax_highlighter_ini TSRMLS_CC); -#ifdef ZEND_MULTIBYTE if (SCNG(script_org)) { efree(SCNG(script_org)); SCNG(script_org) = NULL; @@ -609,16 +602,11 @@ int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; } -#endif /* ZEND_MULTIBYTE */ zend_restore_lexical_state(&original_lex_state TSRMLS_CC); zval_dtor(str); return SUCCESS; } -END_EXTERN_C() -#ifdef ZEND_MULTIBYTE - -BEGIN_EXTERN_C() ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC) { size_t original_offset, offset, free_flag, new_len, length; @@ -728,11 +716,6 @@ ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC) zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \ zendlval->value.str.len = yyleng; \ } -#else /* ZEND_MULTIBYTE */ -# define zend_copy_value(zendlval, yytext, yyleng) \ - zendlval->value.str.val = (char *)estrndup(yytext, yyleng); \ - zendlval->value.str.len = yyleng; -#endif /* ZEND_MULTIBYTE */ static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC) { @@ -836,7 +819,6 @@ static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quo s++; } *t = 0; -#ifdef ZEND_MULTIBYTE if (SCNG(output_filter)) { size_t sz = 0; s = zendlval->value.str.val; @@ -844,7 +826,6 @@ static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quo zendlval->value.str.len = sz; efree(s); } -#endif /* ZEND_MULTIBYTE */ } @@ -1690,7 +1671,6 @@ inline_char_handler: inline_html: yyleng = YYCURSOR - SCNG(yy_text); -#ifdef ZEND_MULTIBYTE if (SCNG(output_filter)) { int readsize; size_t sz = 0; @@ -1703,10 +1683,6 @@ inline_html: zendlval->value.str.val = (char *) estrndup(yytext, yyleng); zendlval->value.str.len = yyleng; } -#else /* !ZEND_MULTIBYTE */ - zendlval->value.str.val = (char *) estrndup(yytext, yyleng); - zendlval->value.str.len = yyleng; -#endif zendlval->type = IS_STRING; HANDLE_NEWLINES(yytext, yyleng); return T_INLINE_HTML; @@ -1913,7 +1889,6 @@ inline_html: } *t = 0; -#ifdef ZEND_MULTIBYTE if (SCNG(output_filter)) { size_t sz = 0; s = zendlval->value.str.val; @@ -1921,7 +1896,6 @@ inline_html: zendlval->value.str.len = sz; efree(s); } -#endif /* ZEND_MULTIBYTE */ return T_CONSTANT_ENCAPSED_STRING; } diff --git a/Zend/zend_multibyte.c b/Zend/zend_multibyte.c index d47fa6d83a..33d8688b37 100644 --- a/Zend/zend_multibyte.c +++ b/Zend/zend_multibyte.c @@ -24,7 +24,6 @@ #include "zend_operators.h" #include "zend_multibyte.h" -#ifdef ZEND_MULTIBYTE static size_t zend_multibyte_encoding_filter(unsigned char **to, size_t *to_length, const char *to_encoding, const unsigned char *from, size_t from_length, const char *from_encoding TSRMLS_DC); size_t sjis_input_filter(unsigned char **buf, size_t *length, const unsigned char *sjis, size_t sjis_length TSRMLS_DC); size_t sjis_output_filter(unsigned char **buf, size_t *length, const unsigned char *sjis, size_t sjis_length TSRMLS_DC); @@ -1080,6 +1079,7 @@ static zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D) zend_encoding *script_encoding = NULL; int bom_size; unsigned char *script; + unsigned char *pos1, *pos2; if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) { return NULL; @@ -1115,7 +1115,45 @@ static zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D) } /* script contains NULL bytes -> auto-detection */ - if (memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size))) { + if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) { + /* check if the NULL byte is after the __HALT_COMPILER(); */ + pos2 = LANG_SCNG(script_org); + + while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) { + pos2 = memchr(pos2, '_', pos1 - pos2); + if (!pos2) break; + pos2++; + if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) { + pos2 += sizeof("_HALT_COMPILER")-1; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == '(') { + pos2++; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == ')') { + pos2++; + while (*pos2 == ' ' || + *pos2 == '\t' || + *pos2 == '\r' || + *pos2 == '\n') { + pos2++; + } + if (*pos2 == ';') { + return NULL; + } + } + } + } + } /* make best effort if BOM is missing */ return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC); } @@ -1168,7 +1206,6 @@ static zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *sc return NULL; } -#endif /* ZEND_MULTIBYTE */ /* * Local variables: diff --git a/Zend/zend_multibyte.h b/Zend/zend_multibyte.h index 56a70a36ce..02421a8ab5 100644 --- a/Zend/zend_multibyte.h +++ b/Zend/zend_multibyte.h @@ -22,8 +22,6 @@ #ifndef ZEND_MULTIBYTE_H #define ZEND_MULTIBYTE_H -#ifdef ZEND_MULTIBYTE - #define BOM_UTF32_BE "\x00\x00\xfe\xff" #define BOM_UTF32_LE "\xff\xfe\x00\x00" #define BOM_UTF16_BE "\xfe\xff" @@ -67,8 +65,6 @@ ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, si ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC); END_EXTERN_C() -#endif /* ZEND_MULTIBYTE */ - #endif /* ZEND_MULTIBYTE_H */ /* diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 72fe44b2a9..6c3a3260eb 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -77,9 +77,7 @@ #include "php_mbregex.h" #endif -#ifdef ZEND_MULTIBYTE #include "zend_multibyte.h" -#endif /* ZEND_MULTIBYTE */ #if HAVE_ONIG #include "php_onig_compat.h" @@ -98,12 +96,10 @@ ZEND_DECLARE_MODULE_GLOBALS(mbstring) static PHP_GINIT_FUNCTION(mbstring); static PHP_GSHUTDOWN_FUNCTION(mbstring); -#ifdef ZEND_MULTIBYTE static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC); static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC); static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC); static int php_mb_set_zend_encoding(TSRMLS_D); -#endif /* }}} */ /* {{{ php_mb_default_identify_list */ @@ -1126,12 +1122,14 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) } /* }}} */ -#ifdef ZEND_MULTIBYTE /* {{{ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) */ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) { int *list, size; + if (!CG(multibyte)) { + return FAILURE; + } if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { if (MBSTRG(script_encoding_list) != NULL) { free(MBSTRG(script_encoding_list)); @@ -1150,7 +1148,6 @@ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) return SUCCESS; } /* }}} */ -#endif /* ZEND_MULTIBYTE */ /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) @@ -1249,9 +1246,7 @@ PHP_INI_BEGIN() PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input) PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output) STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals) -#ifdef ZEND_MULTIBYTE PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding) -#endif /* ZEND_MULTIBYTE */ PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character) STD_PHP_INI_ENTRY("mbstring.func_overload", "0", PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals) @@ -1278,10 +1273,8 @@ static PHP_GINIT_FUNCTION(mbstring) mbstring_globals->language = mbfl_no_language_uni; mbstring_globals->internal_encoding = mbfl_no_encoding_invalid; mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding; -#ifdef ZEND_MULTIBYTE mbstring_globals->script_encoding_list = NULL; mbstring_globals->script_encoding_list_size = 0; -#endif /* ZEND_MULTIBYTE */ mbstring_globals->http_output_encoding = mbfl_no_encoding_pass; mbstring_globals->current_http_output_encoding = mbfl_no_encoding_pass; mbstring_globals->http_input_identify = mbfl_no_encoding_invalid; @@ -1319,11 +1312,9 @@ static PHP_GSHUTDOWN_FUNCTION(mbstring) if (mbstring_globals->http_input_list) { free(mbstring_globals->http_input_list); } -#ifdef ZEND_MULTIBYTE if (mbstring_globals->script_encoding_list) { free(mbstring_globals->script_encoding_list); } -#endif /* ZEND_MULTIBYTE */ if (mbstring_globals->detect_order_list) { free(mbstring_globals->detect_order_list); } @@ -1441,10 +1432,10 @@ PHP_RINIT_FUNCTION(mbstring) #if HAVE_MBREGEX PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); #endif -#ifdef ZEND_MULTIBYTE - zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC); - php_mb_set_zend_encoding(TSRMLS_C); -#endif /* ZEND_MULTIBYTE */ + if (CG(multibyte)) { + zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC); + php_mb_set_zend_encoding(TSRMLS_C); + } return SUCCESS; } @@ -1568,12 +1559,10 @@ PHP_FUNCTION(mb_internal_encoding) RETURN_FALSE; } else { MBSTRG(current_internal_encoding) = no_encoding; -#ifdef ZEND_MULTIBYTE /* TODO: make independent from mbstring.encoding_translation? */ - if (MBSTRG(encoding_translation)) { + if (CG(multibyte) && MBSTRG(encoding_translation)) { zend_multibyte_set_internal_encoding(name TSRMLS_CC); } -#endif /* ZEND_MULTIBYTE */ RETURN_TRUE; } } @@ -4116,9 +4105,7 @@ PHP_FUNCTION(mb_get_info) zval *row1, *row2; const mbfl_language *lang = mbfl_no2language(MBSTRG(language)); enum mbfl_no_encoding *entry; -#ifdef ZEND_MULTIBYTE zval *row3; -#endif /* ZEND_MULTIBYTE */ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) { RETURN_FALSE; @@ -4201,22 +4188,22 @@ PHP_FUNCTION(mb_get_info) } else { add_assoc_string(return_value, "strict_detection", "Off", 1); } -#ifdef ZEND_MULTIBYTE - entry = MBSTRG(script_encoding_list); - n = MBSTRG(script_encoding_list_size); - if(n > 0) { - MAKE_STD_ZVAL(row3); - array_init(row3); - while (n > 0) { - if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) { - add_next_index_string(row3, name, 1); + if (CG(multibyte)) { + entry = MBSTRG(script_encoding_list); + n = MBSTRG(script_encoding_list_size); + if(n > 0) { + MAKE_STD_ZVAL(row3); + array_init(row3); + while (n > 0) { + if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) { + add_next_index_string(row3, name, 1); + } + entry++; + n--; } - entry++; - n--; + add_assoc_zval(return_value, "script_encoding", row3); } - add_assoc_zval(return_value, "script_encoding", row3); } -#endif /* ZEND_MULTIBYTE */ } else if (!strcasecmp("internal_encoding", typ)) { if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) { RETVAL_STRING(name, 1); @@ -4303,8 +4290,7 @@ PHP_FUNCTION(mb_get_info) RETVAL_STRING("Off", 1); } } else { -#ifdef ZEND_MULTIBYTE - if (!strcasecmp("script_encoding", typ)) { + if (CG(multibyte) && !strcasecmp("script_encoding", typ)) { entry = MBSTRG(script_encoding_list); n = MBSTRG(script_encoding_list_size); if(n > 0) { @@ -4320,7 +4306,6 @@ PHP_FUNCTION(mb_get_info) } return; } -#endif /* ZEND_MULTIBYTE */ RETURN_FALSE; } } @@ -4706,7 +4691,6 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int } /* }}} */ -#ifdef ZEND_MULTIBYTE /* {{{ php_mb_set_zend_encoding() */ static int php_mb_set_zend_encoding(TSRMLS_D) { @@ -4864,7 +4848,6 @@ static size_t php_mb_oddlen(const unsigned char *string, size_t length, const ch return mbfl_oddlen(&mb_string); } /* }}} */ -#endif /* ZEND_MULTIBYTE */ #endif /* HAVE_MBSTRING */ diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index 8e9a5806e1..77f1c9d5ef 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -167,10 +167,8 @@ ZEND_BEGIN_MODULE_GLOBALS(mbstring) enum mbfl_no_language language; enum mbfl_no_encoding internal_encoding; enum mbfl_no_encoding current_internal_encoding; -#ifdef ZEND_MULTIBYTE enum mbfl_no_encoding *script_encoding_list; int script_encoding_list_size; -#endif /* ZEND_MULTIBYTE */ enum mbfl_no_encoding http_output_encoding; enum mbfl_no_encoding current_http_output_encoding; enum mbfl_no_encoding http_input_identify; diff --git a/ext/mbstring/tests/zend_multibyte-10.phpt b/ext/mbstring/tests/zend_multibyte-10.phpt index c3543c937a..435c339752 100644 --- a/ext/mbstring/tests/zend_multibyte-10.phpt +++ b/ext/mbstring/tests/zend_multibyte-10.phpt @@ -11,6 +11,7 @@ if (ini_set("mbstring.script_encoding","SJIS") != "SJIS") { --EXPECTF-- -Fatal error: Encoding declaration pragma must be the very first statement in the script in %s on line 3 +ok diff --git a/ext/mbstring/tests/zend_multibyte-11.phpt b/ext/mbstring/tests/zend_multibyte-11.phpt index 84e16ae467..b79e4339ba 100644 --- a/ext/mbstring/tests/zend_multibyte-11.phpt +++ b/ext/mbstring/tests/zend_multibyte-11.phpt @@ -11,7 +11,8 @@ if (ini_set("mbstring.script_encoding","SJIS") != "SJIS") { --EXPECTF-- -Fatal error: Encoding declaration pragma must be the very first statement in the script in %s on line 3 +ok diff --git a/ext/phar/tests/zip/notphar.phpt b/ext/phar/tests/zip/notphar.phpt index 3450c84bbd..c0648c4953 100644 --- a/ext/phar/tests/zip/notphar.phpt +++ b/ext/phar/tests/zip/notphar.phpt @@ -4,6 +4,7 @@ Phar: a non-executable zip with no stub named .phar.zip --INI-- phar.readonly=1 +detect_unicode=0 --FILE--