From a304a0fc04b8cdadda4d39550b23970fa3882868 Mon Sep 17 00:00:00 2001 From: Moriyoshi Koizumi Date: Mon, 20 Dec 2010 03:16:09 +0000 Subject: [PATCH] - Avoid allocating extra buffers. This makes parsing with zend.multibyte enabled as fast as with it disabled. --- Zend/zend_language_scanner.h | 2 - Zend/zend_language_scanner.l | 174 ++++++++++------------------------- 2 files changed, 50 insertions(+), 126 deletions(-) diff --git a/Zend/zend_language_scanner.h b/Zend/zend_language_scanner.h index a3d125bede..978fa89660 100644 --- a/Zend/zend_language_scanner.h +++ b/Zend/zend_language_scanner.h @@ -56,9 +56,7 @@ int zend_compare_file_handles(zend_file_handle *fh1, zend_file_handle *fh2); ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC); ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC); ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC); -ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC); ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC); -ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC); ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC); END_EXTERN_C() diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index 1d54b53ad8..7f9857e053 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -207,10 +207,6 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC) CG(zend_lineno) = lex_state->lineno; zend_restore_compiled_filename(lex_state->filename TSRMLS_CC); - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } if (SCNG(script_filtered)) { efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; @@ -462,31 +458,23 @@ ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC) if (size != -1) { if (CG(multibyte)) { - if (zend_multibyte_read_script((unsigned char *)buf, size TSRMLS_CC) != 0) { - return FAILURE; - } - - SCNG(yy_in) = NULL; + SCNG(script_org) = buf; + SCNG(script_org_size) = n; + SCNG(script_filtered) = NULL; zend_multibyte_set_filter(NULL TSRMLS_CC); - if (!SCNG(input_filter)) { - SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1); - memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1); - SCNG(script_filtered_size) = SCNG(script_org_size); - } else { - SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); - if (SCNG(script_filtered) == NULL) { + if (SCNG(input_filter)) { + if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); } + buf = SCNG(script_filtered); + size = SCNG(script_filtered_size); } - SCNG(yy_start) = SCNG(script_filtered) - offset; - yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC); - } else { - SCNG(yy_start) = (unsigned char *)buf - offset; - yy_scan_buffer(buf, size TSRMLS_CC); } + SCNG(yy_start) = (unsigned char *)buf - offset; + yy_scan_buffer(buf, size TSRMLS_CC); } else { zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed"); } @@ -615,6 +603,9 @@ zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC) ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC) { + char *buf; + size_t size; + /* enforce two trailing NULLs for flex... */ if (IS_INTERNED(str->value.str.val)) { char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD); @@ -626,28 +617,31 @@ ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_D memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD); - SCNG(yy_in)=NULL; + SCNG(yy_in) = NULL; SCNG(yy_start) = NULL; + buf = str->value.str.val; + size = str->value.str.len; + if (CG(multibyte)) { - SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val); - SCNG(script_org_size) = str->value.str.len; + SCNG(script_org) = buf; + SCNG(script_org_size) = size; + SCNG(script_filtered) = NULL; zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC); - if (!SCNG(input_filter)) { - SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1); - memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1); - SCNG(script_filtered_size) = SCNG(script_org_size); - } else { - SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); + if (SCNG(input_filter)) { + if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { + zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " + "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); + } + buf = SCNG(script_filtered); + size = SCNG(script_filtered_size); } - - yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC); - } else { - yy_scan_buffer(str->value.str.val, str->value.str.len TSRMLS_CC); } + yy_scan_buffer(buf, size TSRMLS_CC); + zend_set_compiled_filename(filename TSRMLS_CC); CG(zend_lineno) = 1; CG(increment_lineno) = 0; @@ -659,11 +653,11 @@ ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D) { size_t offset = SCNG(yy_cursor) - SCNG(yy_start); if (SCNG(input_filter)) { - size_t original_offset = offset, length = 0; do { + size_t original_offset = offset, length = 0; + do { unsigned char *p = NULL; - SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC); - if (!p) { - break; + if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) { + return (size_t)-1; } efree(p); if (length > original_offset) { @@ -714,10 +708,6 @@ zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC) BEGIN(ST_IN_SCRIPTING); compiler_result = zendparse(TSRMLS_C); - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } if (SCNG(script_filtered)) { efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; @@ -759,10 +749,6 @@ int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlight return FAILURE; } zend_highlight(syntax_highlighter_ini TSRMLS_CC); - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } if (SCNG(script_filtered)) { efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; @@ -786,10 +772,6 @@ int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ } BEGIN(INITIAL); zend_highlight(syntax_highlighter_ini TSRMLS_CC); - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } if (SCNG(script_filtered)) { efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; @@ -801,8 +783,8 @@ int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC) { - size_t original_offset, offset, free_flag, new_len, length; - unsigned char *p; + size_t original_offset, offset, length; + unsigned char *new_yy_start; /* calculate current position */ offset = original_offset = YYCURSOR - SCNG(yy_start); @@ -818,84 +800,28 @@ ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter /* convert and set */ if (!SCNG(input_filter)) { + if (SCNG(script_filtered)) { + efree(SCNG(script_filtered)); + SCNG(script_filtered) = NULL; + } + SCNG(script_filtered_size) = 0; length = SCNG(script_org_size) - offset; - p = SCNG(script_org) + offset; - free_flag = 0; + new_yy_start = SCNG(script_org) + offset; } else { - SCNG(input_filter)(&p, &length, SCNG(script_org) + offset, SCNG(script_org_size) - offset TSRMLS_CC); - free_flag = 1; - } - - new_len = original_offset + length; - - if (new_len > YYLIMIT - SCNG(yy_start)) { - unsigned char *new_yy_start = erealloc(SCNG(yy_start), new_len); - SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start)); - SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start)); - SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start)); - SCNG(yy_start) = new_yy_start; - SCNG(script_filtered) = new_yy_start; - SCNG(script_filtered_size) = new_len; - } - - SCNG(yy_limit) = SCNG(yy_start) + new_len; - memmove(SCNG(yy_start) + original_offset, p, length); - - if (free_flag) { - efree(p); - } -} - - -ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC) -{ - size_t n; - - if (CG(interactive) == 0) { - if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) { - return FAILURE; + if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org) + offset, SCNG(script_org_size) - offset TSRMLS_CC)) { + zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " + "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); } - n = len; - return n; - } - - /* interactive */ - if (SCNG(script_org)) { - efree(SCNG(script_org)); - } - if (SCNG(script_filtered)) { - efree(SCNG(script_filtered)); - } - SCNG(script_org) = NULL; - SCNG(script_org_size) = 0; - - /* TODO: support widechars */ - if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) { - return FAILURE; + SCNG(script_filtered) = new_yy_start; + SCNG(script_filtered_size) = length; } - n = len; - - SCNG(script_org_size) = n; - SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1); - memcpy(SCNG(script_org), buf, n); - return n; -} + SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start)); + SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start)); + SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start)); + SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start)); - -ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC) -{ - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } - SCNG(script_org_size) = n; - - SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1); - memcpy(SCNG(script_org), buf, n); - *(SCNG(script_org)+SCNG(script_org_size)) = '\0'; - - return 0; + SCNG(yy_start) = new_yy_start; } -- 2.40.0