]> granicus.if.org Git - php/commitdiff
- Require declare(encoding=...) to come before any opcodes. Read source
authorAndrei Zmievski <andrei@php.net>
Wed, 24 Aug 2005 20:42:09 +0000 (20:42 +0000)
committerAndrei Zmievski <andrei@php.net>
Wed, 24 Aug 2005 20:42:09 +0000 (20:42 +0000)
  comments for more info.
- Op arrays now know which script encoding they were compiled from.
- Use this information to intelligently convert inline HTML blocks to
  the output encoding. Currently it opens and closes a new converter for
  each block, but we can optimize it.

Zend/zend_compile.c
Zend/zend_compile.h
Zend/zend_globals.h
Zend/zend_language_scanner.h
Zend/zend_language_scanner.l
Zend/zend_opcode.c
Zend/zend_vm_def.h
Zend/zend_vm_execute.h

index 7e4aed5d7578918d3bcac5189040bb16e0800a0c..3927f36094961900527ee8ab9e8429933c92fda1 100644 (file)
@@ -156,6 +156,7 @@ void init_compiler(TSRMLS_D)
        zend_init_compiler_data_structures(TSRMLS_C);
        zend_init_rsrc_list(TSRMLS_C);
        zend_hash_init(&CG(filenames_table), 5, NULL, (dtor_func_t) free_estring, 0);
+       zend_hash_init(&CG(script_encodings_table), 5, NULL, (dtor_func_t) free_estring, 0);
        zend_llist_init(&CG(open_files), sizeof(zend_file_handle), (void (*)(void *)) zend_file_handle_dtor, 0);
        CG(unclean_shutdown) = 0;
 }
@@ -170,6 +171,7 @@ void shutdown_compiler(TSRMLS_D)
        zend_stack_destroy(&CG(object_stack));
        zend_stack_destroy(&CG(declare_stack));
        zend_stack_destroy(&CG(list_stack));
+       zend_hash_destroy(&CG(script_encodings_table));
        zend_hash_destroy(&CG(filenames_table));
        zend_llist_destroy(&CG(open_files));
 }
@@ -215,6 +217,34 @@ ZEND_API zend_bool zend_is_compiling(TSRMLS_D)
 }
 
 
+ZEND_API char *zend_set_compiled_script_encoding(char *new_script_enc TSRMLS_DC)
+{
+       char **pp, *p;
+       int length = strlen(new_script_enc);
+
+       if (zend_hash_find(&CG(script_encodings_table), new_script_enc, length+1, (void **) &pp) == SUCCESS) {
+               CG(script_encoding) = *pp;
+               return *pp;
+       }
+       p = estrndup(new_script_enc, length);
+       zend_hash_update(&CG(script_encodings_table), new_script_enc, length+1, &p, sizeof(char *), (void **) &pp);
+       CG(script_encoding) = p;
+       return p;
+}
+
+
+ZEND_API void zend_restore_compiled_script_encoding(char *original_script_enc TSRMLS_DC)
+{
+       CG(script_encoding) = original_script_enc;
+}
+
+
+ZEND_API char *zend_get_compiled_script_encoding(TSRMLS_D)
+{
+       return CG(script_encoding);
+}
+
+
 static zend_uint get_temporary_variable(zend_op_array *op_array)
 {
        return (op_array->T)++ * sizeof(temp_variable);
@@ -3764,14 +3794,30 @@ void zend_do_declare_stmt(znode *var, znode *val TSRMLS_DC)
                convert_to_long(&val->u.constant);
                CG(declarables).ticks = val->u.constant;
        } else if (UG(unicode) && ZEND_U_EQUAL(Z_TYPE(var->u.constant), Z_UNIVAL(var->u.constant), Z_UNILEN(var->u.constant), "encoding", sizeof("encoding")-1)) {
+               UErrorCode status = U_ZERO_ERROR;
 
                if (val->u.constant.type == IS_CONSTANT) {
                        zend_error(E_COMPILE_ERROR, "Cannot use constants as encoding");
                }
+               /*
+                * Check that the pragma comes before any opcodes. If the compilation
+                * got as far as this, the previous portion of the script must have been
+                * parseable according to the .ini script_encoding setting. We still
+                * want to tell them to put declare() at the top.
+                */
+               if (CG(active_op_array)->last > 0) {
+                       zend_error(E_COMPILE_ERROR, "Encoding declaration pragma has to be the very first statement in the script");
+               }
                convert_to_string(&val->u.constant);
                if (zend_prepare_scanner_converters(Z_STRVAL(val->u.constant), 1 TSRMLS_CC) == FAILURE) {
                        zend_error(E_COMPILE_WARNING, "Unsupported encoding [%s]", Z_STRVAL(val->u.constant));
                }
+               zend_set_compiled_script_encoding((char*)ucnv_getName(LANG_SCNG(output_conv), &status) TSRMLS_CC);
+               /*
+                * Because we require declare(encoding=...) to be the very first thing,
+                * we can safely cache the script encoding in the op array here.
+                */
+               CG(active_op_array)->script_encoding = zend_get_compiled_script_encoding(TSRMLS_C);
                efree(val->u.constant.value.str.val);
        }
        zval_dtor(&var->u.constant);
index 09877c4f477790dd87ed8aaf31db62fc6c453357..a75a76c8f8757bd55df95a1f971b32da8cdaae01 100644 (file)
@@ -209,6 +209,7 @@ struct _zend_op_array {
        zend_bool uses_this;
 
        char *filename;
+       char *script_encoding;
        zend_uint line_start;
        zend_uint line_end;
        char *doc_comment;
@@ -329,6 +330,9 @@ ZEND_API void zend_restore_compiled_filename(char *original_compiled_filename TS
 ZEND_API char *zend_get_compiled_filename(TSRMLS_D);
 ZEND_API int zend_get_compiled_lineno(TSRMLS_D);
 ZEND_API int zend_get_scanned_file_offset(TSRMLS_D);
+ZEND_API char *zend_set_compiled_script_encoding(char *new_script_enc TSRMLS_DC);
+ZEND_API void zend_restore_compiled_script_encoding(char *original_script_enc TSRMLS_DC);
+ZEND_API char *zend_get_compiled_script_encoding(TSRMLS_D);
 
 ZEND_API char* zend_get_compiled_variable_name(zend_op_array *op_array, zend_uint var, int* name_len);
 
index 1182b4a833ed8d54aa47fc8969d5d4b03b011029..7900e452684ecfc59abd6061ad7e9e5206232bb4 100644 (file)
@@ -133,6 +133,9 @@ struct _zend_compiler_globals {
 
        zend_uchar literal_type;
 
+       HashTable script_encodings_table;
+       char *script_encoding;
+
 #ifdef ZTS
        HashTable *global_function_table;
        HashTable *global_class_table;
index a7778d8d02b98ea68c8a763182b882d299aa83de..40813e1237b6bc53af7d5444821d5248adb52820 100644 (file)
@@ -28,6 +28,7 @@ typedef struct _zend_lex_state {
        zend_file_handle *in;
        uint lineno;
        char *filename;
+       char *script_encoding;
 
        UConverter *input_conv;     /* converter for flex input */
        UConverter *output_conv;    /* converter for data from flex output */
index a3c40e3c8e645cec4f156d3bd509b7a236af8149..3d98eb1f2e4538ff92468a49c757b4a48f228810 100644 (file)
@@ -209,6 +209,7 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
        BEGIN(lex_state->state);
        CG(zend_lineno) = lex_state->lineno;
        zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
+       zend_restore_compiled_script_encoding(lex_state->script_encoding TSRMLS_CC);
 
        if (SCNG(input_conv)) {
                ucnv_close(SCNG(input_conv));
@@ -758,6 +759,7 @@ ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
        }
 
        zend_set_compiled_filename(file_path TSRMLS_CC);
+       zend_set_compiled_script_encoding((char*)ucnv_getName(SCNG(output_conv), &status) TSRMLS_CC);
 
        if (CG(start_lineno)) {
                CG(zend_lineno) = CG(start_lineno);
@@ -875,6 +877,7 @@ zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
 {
        const char *encoding;
+       UErrorCode status = U_ZERO_ERROR;
 
        if (Z_TYPE_P(str) == IS_UNICODE) {
                convert_to_string_with_converter(str, UG(utf8_conv));
@@ -895,6 +898,7 @@ ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_D
        yy_scan_buffer(str->value.str.val, str->value.str.len+2 TSRMLS_CC);
 
        zend_set_compiled_filename(filename TSRMLS_CC);
+       zend_set_compiled_script_encoding((char*)ucnv_getName(SCNG(output_conv), &status) TSRMLS_CC);
        CG(zend_lineno) = 1;
        CG(increment_lineno) = 0;
        return SUCCESS;
@@ -1937,7 +1941,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
                if (func_name) {
                        len += u_strlen((UChar*)func_name);
                } else {
-                       func_name = EMPTY_STR;
+                       func_name = (char*)EMPTY_STR;
                }
                zendlval->value.str.len = len;
                Z_USTRVAL_P(zendlval) = eumalloc(len+1);
index ce19bbca0438d25487fa2421e09a8bdabbb8a23d..b2e7a53bf3cae5faceec18292950d3d5dd58ec06 100644 (file)
@@ -77,6 +77,7 @@ void init_op_array(zend_op_array *op_array, zend_uchar type, int initial_ops_siz
 
        op_array->function_name = NULL;
        op_array->filename = zend_get_compiled_filename(TSRMLS_C);
+       op_array->script_encoding = zend_get_compiled_script_encoding(TSRMLS_C);
        op_array->doc_comment = NULL;
        op_array->doc_comment_len = 0;
 
index 81362d5f6725eaf07f82a113195fc7cd8ac6408d..b99d79700ffd56e8af7d72f075b23bd12b36ac2a 100644 (file)
@@ -894,9 +894,13 @@ ZEND_VM_HANDLER(40, ZEND_ECHO, CONST|TMP|VAR|CV, ANY)
                /* Convert inline HTML blocks to the output encoding, but only if necessary. */
                if (opline->extended_value &&
                        strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status),
-                                  ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) {
+                                  EG(active_op_array)->script_encoding)) {
                        zval z_conv;
-                       zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
+                       UConverter *script_enc_conv = NULL;
+                       if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) {
+                               zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding);
+                       }
+                       zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
                        z_conv.type = IS_BINARY;
                        if (U_SUCCESS(status)) {
                                zend_print_variable(&z_conv);
@@ -904,6 +908,7 @@ ZEND_VM_HANDLER(40, ZEND_ECHO, CONST|TMP|VAR|CV, ANY)
                                zend_error(E_WARNING, "Could not convert inline HTML for output");
                        }
                        zval_dtor(&z_conv);
+                       ucnv_close(script_enc_conv);
                } else {
                        zend_print_variable(z);
                }
index 4db7b67aa8bce1c6c32c1cfb05961565227a927b..8fbeedbdf6672ad5a861b95a42621e7d5a122a6f 100644 (file)
@@ -1357,9 +1357,14 @@ static int ZEND_ECHO_SPEC_CONST_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
                /* Convert inline HTML blocks to the output encoding, but only if necessary. */
                if (opline->extended_value &&
                        strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status),
-                                  ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) {
+                                  EG(active_op_array)->script_encoding)) {
                        zval z_conv;
-                       zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
+                       UConverter *script_enc_conv = NULL;
+                       if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) {
+                               zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding);
+                       }
+                       printf("converting %d bytes of T_INLINE_HTML\n", z->value.str.len);
+                       zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
                        z_conv.type = IS_BINARY;
                        if (U_SUCCESS(status)) {
                                zend_print_variable(&z_conv);
@@ -1367,6 +1372,7 @@ static int ZEND_ECHO_SPEC_CONST_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
                                zend_error(E_WARNING, "Could not convert inline HTML for output");
                        }
                        zval_dtor(&z_conv);
+                       ucnv_close(script_enc_conv);
                } else {
                        zend_print_variable(z);
                }
@@ -3864,9 +3870,14 @@ static int ZEND_ECHO_SPEC_TMP_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
                /* Convert inline HTML blocks to the output encoding, but only if necessary. */
                if (opline->extended_value &&
                        strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status),
-                                  ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) {
+                                  EG(active_op_array)->script_encoding)) {
                        zval z_conv;
-                       zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
+                       UConverter *script_enc_conv = NULL;
+                       if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) {
+                               zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding);
+                       }
+                       printf("converting %d bytes of T_INLINE_HTML\n", z->value.str.len);
+                       zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
                        z_conv.type = IS_BINARY;
                        if (U_SUCCESS(status)) {
                                zend_print_variable(&z_conv);
@@ -3874,6 +3885,7 @@ static int ZEND_ECHO_SPEC_TMP_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
                                zend_error(E_WARNING, "Could not convert inline HTML for output");
                        }
                        zval_dtor(&z_conv);
+                       ucnv_close(script_enc_conv);
                } else {
                        zend_print_variable(z);
                }
@@ -6907,9 +6919,14 @@ static int ZEND_ECHO_SPEC_VAR_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
                /* Convert inline HTML blocks to the output encoding, but only if necessary. */
                if (opline->extended_value &&
                        strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status),
-                                  ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) {
+                                  EG(active_op_array)->script_encoding)) {
                        zval z_conv;
-                       zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
+                       UConverter *script_enc_conv = NULL;
+                       if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) {
+                               zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding);
+                       }
+                       printf("converting %d bytes of T_INLINE_HTML\n", z->value.str.len);
+                       zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
                        z_conv.type = IS_BINARY;
                        if (U_SUCCESS(status)) {
                                zend_print_variable(&z_conv);
@@ -6917,6 +6934,7 @@ static int ZEND_ECHO_SPEC_VAR_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
                                zend_error(E_WARNING, "Could not convert inline HTML for output");
                        }
                        zval_dtor(&z_conv);
+                       ucnv_close(script_enc_conv);
                } else {
                        zend_print_variable(z);
                }
@@ -19602,9 +19620,14 @@ static int ZEND_ECHO_SPEC_CV_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
                /* Convert inline HTML blocks to the output encoding, but only if necessary. */
                if (opline->extended_value &&
                        strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status),
-                                  ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) {
+                                  EG(active_op_array)->script_encoding)) {
                        zval z_conv;
-                       zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
+                       UConverter *script_enc_conv = NULL;
+                       if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) {
+                               zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding);
+                       }
+                       printf("converting %d bytes of T_INLINE_HTML\n", z->value.str.len);
+                       zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
                        z_conv.type = IS_BINARY;
                        if (U_SUCCESS(status)) {
                                zend_print_variable(&z_conv);
@@ -19612,6 +19635,7 @@ static int ZEND_ECHO_SPEC_CV_HANDLER(ZEND_OPCODE_HANDLER_ARGS)
                                zend_error(E_WARNING, "Could not convert inline HTML for output");
                        }
                        zval_dtor(&z_conv);
+                       ucnv_close(script_enc_conv);
                } else {
                        zend_print_variable(z);
                }