From: Andrei Zmievski Date: Wed, 24 Aug 2005 20:42:09 +0000 (+0000) Subject: - Require declare(encoding=...) to come before any opcodes. Read source X-Git-Tag: PRE_NEW_OCI8_EXTENSION~99 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d0d6a1f16f8231eb56f9eedc2c6fc91edae00e3f;p=php - Require declare(encoding=...) to come before any opcodes. Read source comments for more info. - Op arrays now know which script encoding they were compiled from. - Use this information to intelligently convert inline HTML blocks to the output encoding. Currently it opens and closes a new converter for each block, but we can optimize it. --- diff --git a/Zend/zend_compile.c b/Zend/zend_compile.c index 7e4aed5d75..3927f36094 100644 --- a/Zend/zend_compile.c +++ b/Zend/zend_compile.c @@ -156,6 +156,7 @@ void init_compiler(TSRMLS_D) zend_init_compiler_data_structures(TSRMLS_C); zend_init_rsrc_list(TSRMLS_C); zend_hash_init(&CG(filenames_table), 5, NULL, (dtor_func_t) free_estring, 0); + zend_hash_init(&CG(script_encodings_table), 5, NULL, (dtor_func_t) free_estring, 0); zend_llist_init(&CG(open_files), sizeof(zend_file_handle), (void (*)(void *)) zend_file_handle_dtor, 0); CG(unclean_shutdown) = 0; } @@ -170,6 +171,7 @@ void shutdown_compiler(TSRMLS_D) zend_stack_destroy(&CG(object_stack)); zend_stack_destroy(&CG(declare_stack)); zend_stack_destroy(&CG(list_stack)); + zend_hash_destroy(&CG(script_encodings_table)); zend_hash_destroy(&CG(filenames_table)); zend_llist_destroy(&CG(open_files)); } @@ -215,6 +217,34 @@ ZEND_API zend_bool zend_is_compiling(TSRMLS_D) } +ZEND_API char *zend_set_compiled_script_encoding(char *new_script_enc TSRMLS_DC) +{ + char **pp, *p; + int length = strlen(new_script_enc); + + if (zend_hash_find(&CG(script_encodings_table), new_script_enc, length+1, (void **) &pp) == SUCCESS) { + CG(script_encoding) = *pp; + return *pp; + } + p = estrndup(new_script_enc, length); + zend_hash_update(&CG(script_encodings_table), new_script_enc, length+1, &p, sizeof(char *), (void **) &pp); + CG(script_encoding) = p; + return p; +} + + +ZEND_API void zend_restore_compiled_script_encoding(char *original_script_enc TSRMLS_DC) +{ + CG(script_encoding) = original_script_enc; +} + + +ZEND_API char *zend_get_compiled_script_encoding(TSRMLS_D) +{ + return CG(script_encoding); +} + + static zend_uint get_temporary_variable(zend_op_array *op_array) { return (op_array->T)++ * sizeof(temp_variable); @@ -3764,14 +3794,30 @@ void zend_do_declare_stmt(znode *var, znode *val TSRMLS_DC) convert_to_long(&val->u.constant); CG(declarables).ticks = val->u.constant; } else if (UG(unicode) && ZEND_U_EQUAL(Z_TYPE(var->u.constant), Z_UNIVAL(var->u.constant), Z_UNILEN(var->u.constant), "encoding", sizeof("encoding")-1)) { + UErrorCode status = U_ZERO_ERROR; if (val->u.constant.type == IS_CONSTANT) { zend_error(E_COMPILE_ERROR, "Cannot use constants as encoding"); } + /* + * Check that the pragma comes before any opcodes. If the compilation + * got as far as this, the previous portion of the script must have been + * parseable according to the .ini script_encoding setting. We still + * want to tell them to put declare() at the top. + */ + if (CG(active_op_array)->last > 0) { + zend_error(E_COMPILE_ERROR, "Encoding declaration pragma has to be the very first statement in the script"); + } convert_to_string(&val->u.constant); if (zend_prepare_scanner_converters(Z_STRVAL(val->u.constant), 1 TSRMLS_CC) == FAILURE) { zend_error(E_COMPILE_WARNING, "Unsupported encoding [%s]", Z_STRVAL(val->u.constant)); } + zend_set_compiled_script_encoding((char*)ucnv_getName(LANG_SCNG(output_conv), &status) TSRMLS_CC); + /* + * Because we require declare(encoding=...) to be the very first thing, + * we can safely cache the script encoding in the op array here. + */ + CG(active_op_array)->script_encoding = zend_get_compiled_script_encoding(TSRMLS_C); efree(val->u.constant.value.str.val); } zval_dtor(&var->u.constant); diff --git a/Zend/zend_compile.h b/Zend/zend_compile.h index 09877c4f47..a75a76c8f8 100644 --- a/Zend/zend_compile.h +++ b/Zend/zend_compile.h @@ -209,6 +209,7 @@ struct _zend_op_array { zend_bool uses_this; char *filename; + char *script_encoding; zend_uint line_start; zend_uint line_end; char *doc_comment; @@ -329,6 +330,9 @@ ZEND_API void zend_restore_compiled_filename(char *original_compiled_filename TS ZEND_API char *zend_get_compiled_filename(TSRMLS_D); ZEND_API int zend_get_compiled_lineno(TSRMLS_D); ZEND_API int zend_get_scanned_file_offset(TSRMLS_D); +ZEND_API char *zend_set_compiled_script_encoding(char *new_script_enc TSRMLS_DC); +ZEND_API void zend_restore_compiled_script_encoding(char *original_script_enc TSRMLS_DC); +ZEND_API char *zend_get_compiled_script_encoding(TSRMLS_D); ZEND_API char* zend_get_compiled_variable_name(zend_op_array *op_array, zend_uint var, int* name_len); diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h index 1182b4a833..7900e45268 100644 --- a/Zend/zend_globals.h +++ b/Zend/zend_globals.h @@ -133,6 +133,9 @@ struct _zend_compiler_globals { zend_uchar literal_type; + HashTable script_encodings_table; + char *script_encoding; + #ifdef ZTS HashTable *global_function_table; HashTable *global_class_table; diff --git a/Zend/zend_language_scanner.h b/Zend/zend_language_scanner.h index a7778d8d02..40813e1237 100644 --- a/Zend/zend_language_scanner.h +++ b/Zend/zend_language_scanner.h @@ -28,6 +28,7 @@ typedef struct _zend_lex_state { zend_file_handle *in; uint lineno; char *filename; + char *script_encoding; UConverter *input_conv; /* converter for flex input */ UConverter *output_conv; /* converter for data from flex output */ diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index a3c40e3c8e..3d98eb1f2e 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -209,6 +209,7 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC) BEGIN(lex_state->state); CG(zend_lineno) = lex_state->lineno; zend_restore_compiled_filename(lex_state->filename TSRMLS_CC); + zend_restore_compiled_script_encoding(lex_state->script_encoding TSRMLS_CC); if (SCNG(input_conv)) { ucnv_close(SCNG(input_conv)); @@ -758,6 +759,7 @@ ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC) } zend_set_compiled_filename(file_path TSRMLS_CC); + zend_set_compiled_script_encoding((char*)ucnv_getName(SCNG(output_conv), &status) TSRMLS_CC); if (CG(start_lineno)) { CG(zend_lineno) = CG(start_lineno); @@ -875,6 +877,7 @@ zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC) ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC) { const char *encoding; + UErrorCode status = U_ZERO_ERROR; if (Z_TYPE_P(str) == IS_UNICODE) { convert_to_string_with_converter(str, UG(utf8_conv)); @@ -895,6 +898,7 @@ ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_D yy_scan_buffer(str->value.str.val, str->value.str.len+2 TSRMLS_CC); zend_set_compiled_filename(filename TSRMLS_CC); + zend_set_compiled_script_encoding((char*)ucnv_getName(SCNG(output_conv), &status) TSRMLS_CC); CG(zend_lineno) = 1; CG(increment_lineno) = 0; return SUCCESS; @@ -1937,7 +1941,7 @@ NEWLINE ("\r"|"\n"|"\r\n") if (func_name) { len += u_strlen((UChar*)func_name); } else { - func_name = EMPTY_STR; + func_name = (char*)EMPTY_STR; } zendlval->value.str.len = len; Z_USTRVAL_P(zendlval) = eumalloc(len+1); diff --git a/Zend/zend_opcode.c b/Zend/zend_opcode.c index ce19bbca04..b2e7a53bf3 100644 --- a/Zend/zend_opcode.c +++ b/Zend/zend_opcode.c @@ -77,6 +77,7 @@ void init_op_array(zend_op_array *op_array, zend_uchar type, int initial_ops_siz op_array->function_name = NULL; op_array->filename = zend_get_compiled_filename(TSRMLS_C); + op_array->script_encoding = zend_get_compiled_script_encoding(TSRMLS_C); op_array->doc_comment = NULL; op_array->doc_comment_len = 0; diff --git a/Zend/zend_vm_def.h b/Zend/zend_vm_def.h index 81362d5f67..b99d79700f 100644 --- a/Zend/zend_vm_def.h +++ b/Zend/zend_vm_def.h @@ -894,9 +894,13 @@ ZEND_VM_HANDLER(40, ZEND_ECHO, CONST|TMP|VAR|CV, ANY) /* Convert inline HTML blocks to the output encoding, but only if necessary. */ if (opline->extended_value && strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status), - ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) { + EG(active_op_array)->script_encoding)) { zval z_conv; - zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status); + UConverter *script_enc_conv = NULL; + if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) { + zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding); + } + zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status); z_conv.type = IS_BINARY; if (U_SUCCESS(status)) { zend_print_variable(&z_conv); @@ -904,6 +908,7 @@ ZEND_VM_HANDLER(40, ZEND_ECHO, CONST|TMP|VAR|CV, ANY) zend_error(E_WARNING, "Could not convert inline HTML for output"); } zval_dtor(&z_conv); + ucnv_close(script_enc_conv); } else { zend_print_variable(z); } diff --git a/Zend/zend_vm_execute.h b/Zend/zend_vm_execute.h index 4db7b67aa8..8fbeedbdf6 100644 --- a/Zend/zend_vm_execute.h +++ b/Zend/zend_vm_execute.h @@ -1357,9 +1357,14 @@ static int ZEND_ECHO_SPEC_CONST_HANDLER(ZEND_OPCODE_HANDLER_ARGS) /* Convert inline HTML blocks to the output encoding, but only if necessary. */ if (opline->extended_value && strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status), - ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) { + EG(active_op_array)->script_encoding)) { zval z_conv; - zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status); + UConverter *script_enc_conv = NULL; + if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) { + zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding); + } + printf("converting %d bytes of T_INLINE_HTML\n", z->value.str.len); + zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status); z_conv.type = IS_BINARY; if (U_SUCCESS(status)) { zend_print_variable(&z_conv); @@ -1367,6 +1372,7 @@ static int ZEND_ECHO_SPEC_CONST_HANDLER(ZEND_OPCODE_HANDLER_ARGS) zend_error(E_WARNING, "Could not convert inline HTML for output"); } zval_dtor(&z_conv); + ucnv_close(script_enc_conv); } else { zend_print_variable(z); } @@ -3864,9 +3870,14 @@ static int ZEND_ECHO_SPEC_TMP_HANDLER(ZEND_OPCODE_HANDLER_ARGS) /* Convert inline HTML blocks to the output encoding, but only if necessary. */ if (opline->extended_value && strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status), - ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) { + EG(active_op_array)->script_encoding)) { zval z_conv; - zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status); + UConverter *script_enc_conv = NULL; + if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) { + zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding); + } + printf("converting %d bytes of T_INLINE_HTML\n", z->value.str.len); + zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status); z_conv.type = IS_BINARY; if (U_SUCCESS(status)) { zend_print_variable(&z_conv); @@ -3874,6 +3885,7 @@ static int ZEND_ECHO_SPEC_TMP_HANDLER(ZEND_OPCODE_HANDLER_ARGS) zend_error(E_WARNING, "Could not convert inline HTML for output"); } zval_dtor(&z_conv); + ucnv_close(script_enc_conv); } else { zend_print_variable(z); } @@ -6907,9 +6919,14 @@ static int ZEND_ECHO_SPEC_VAR_HANDLER(ZEND_OPCODE_HANDLER_ARGS) /* Convert inline HTML blocks to the output encoding, but only if necessary. */ if (opline->extended_value && strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status), - ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) { + EG(active_op_array)->script_encoding)) { zval z_conv; - zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status); + UConverter *script_enc_conv = NULL; + if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) { + zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding); + } + printf("converting %d bytes of T_INLINE_HTML\n", z->value.str.len); + zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status); z_conv.type = IS_BINARY; if (U_SUCCESS(status)) { zend_print_variable(&z_conv); @@ -6917,6 +6934,7 @@ static int ZEND_ECHO_SPEC_VAR_HANDLER(ZEND_OPCODE_HANDLER_ARGS) zend_error(E_WARNING, "Could not convert inline HTML for output"); } zval_dtor(&z_conv); + ucnv_close(script_enc_conv); } else { zend_print_variable(z); } @@ -19602,9 +19620,14 @@ static int ZEND_ECHO_SPEC_CV_HANDLER(ZEND_OPCODE_HANDLER_ARGS) /* Convert inline HTML blocks to the output encoding, but only if necessary. */ if (opline->extended_value && strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status), - ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) { + EG(active_op_array)->script_encoding)) { zval z_conv; - zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status); + UConverter *script_enc_conv = NULL; + if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) { + zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding); + } + printf("converting %d bytes of T_INLINE_HTML\n", z->value.str.len); + zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status); z_conv.type = IS_BINARY; if (U_SUCCESS(status)) { zend_print_variable(&z_conv); @@ -19612,6 +19635,7 @@ static int ZEND_ECHO_SPEC_CV_HANDLER(ZEND_OPCODE_HANDLER_ARGS) zend_error(E_WARNING, "Could not convert inline HTML for output"); } zval_dtor(&z_conv); + ucnv_close(script_enc_conv); } else { zend_print_variable(z); }