comments for more info.
- Op arrays now know which script encoding they were compiled from.
- Use this information to intelligently convert inline HTML blocks to
the output encoding. Currently it opens and closes a new converter for
each block, but we can optimize it.
zend_init_compiler_data_structures(TSRMLS_C);
zend_init_rsrc_list(TSRMLS_C);
zend_hash_init(&CG(filenames_table), 5, NULL, (dtor_func_t) free_estring, 0);
+ zend_hash_init(&CG(script_encodings_table), 5, NULL, (dtor_func_t) free_estring, 0);
zend_llist_init(&CG(open_files), sizeof(zend_file_handle), (void (*)(void *)) zend_file_handle_dtor, 0);
CG(unclean_shutdown) = 0;
}
zend_stack_destroy(&CG(object_stack));
zend_stack_destroy(&CG(declare_stack));
zend_stack_destroy(&CG(list_stack));
+ zend_hash_destroy(&CG(script_encodings_table));
zend_hash_destroy(&CG(filenames_table));
zend_llist_destroy(&CG(open_files));
}
}
+ZEND_API char *zend_set_compiled_script_encoding(char *new_script_enc TSRMLS_DC)
+{
+ char **pp, *p;
+ int length = strlen(new_script_enc);
+
+ if (zend_hash_find(&CG(script_encodings_table), new_script_enc, length+1, (void **) &pp) == SUCCESS) {
+ CG(script_encoding) = *pp;
+ return *pp;
+ }
+ p = estrndup(new_script_enc, length);
+ zend_hash_update(&CG(script_encodings_table), new_script_enc, length+1, &p, sizeof(char *), (void **) &pp);
+ CG(script_encoding) = p;
+ return p;
+}
+
+
+ZEND_API void zend_restore_compiled_script_encoding(char *original_script_enc TSRMLS_DC)
+{
+ CG(script_encoding) = original_script_enc;
+}
+
+
+ZEND_API char *zend_get_compiled_script_encoding(TSRMLS_D)
+{
+ return CG(script_encoding);
+}
+
+
static zend_uint get_temporary_variable(zend_op_array *op_array)
{
return (op_array->T)++ * sizeof(temp_variable);
convert_to_long(&val->u.constant);
CG(declarables).ticks = val->u.constant;
} else if (UG(unicode) && ZEND_U_EQUAL(Z_TYPE(var->u.constant), Z_UNIVAL(var->u.constant), Z_UNILEN(var->u.constant), "encoding", sizeof("encoding")-1)) {
+ UErrorCode status = U_ZERO_ERROR;
if (val->u.constant.type == IS_CONSTANT) {
zend_error(E_COMPILE_ERROR, "Cannot use constants as encoding");
}
+ /*
+ * Check that the pragma comes before any opcodes. If the compilation
+ * got as far as this, the previous portion of the script must have been
+ * parseable according to the .ini script_encoding setting. We still
+ * want to tell them to put declare() at the top.
+ */
+ if (CG(active_op_array)->last > 0) {
+ zend_error(E_COMPILE_ERROR, "Encoding declaration pragma has to be the very first statement in the script");
+ }
convert_to_string(&val->u.constant);
if (zend_prepare_scanner_converters(Z_STRVAL(val->u.constant), 1 TSRMLS_CC) == FAILURE) {
zend_error(E_COMPILE_WARNING, "Unsupported encoding [%s]", Z_STRVAL(val->u.constant));
}
+ zend_set_compiled_script_encoding((char*)ucnv_getName(LANG_SCNG(output_conv), &status) TSRMLS_CC);
+ /*
+ * Because we require declare(encoding=...) to be the very first thing,
+ * we can safely cache the script encoding in the op array here.
+ */
+ CG(active_op_array)->script_encoding = zend_get_compiled_script_encoding(TSRMLS_C);
efree(val->u.constant.value.str.val);
}
zval_dtor(&var->u.constant);
zend_bool uses_this;
char *filename;
+ char *script_encoding;
zend_uint line_start;
zend_uint line_end;
char *doc_comment;
ZEND_API char *zend_get_compiled_filename(TSRMLS_D);
ZEND_API int zend_get_compiled_lineno(TSRMLS_D);
ZEND_API int zend_get_scanned_file_offset(TSRMLS_D);
+ZEND_API char *zend_set_compiled_script_encoding(char *new_script_enc TSRMLS_DC);
+ZEND_API void zend_restore_compiled_script_encoding(char *original_script_enc TSRMLS_DC);
+ZEND_API char *zend_get_compiled_script_encoding(TSRMLS_D);
ZEND_API char* zend_get_compiled_variable_name(zend_op_array *op_array, zend_uint var, int* name_len);
zend_uchar literal_type;
+ HashTable script_encodings_table;
+ char *script_encoding;
+
#ifdef ZTS
HashTable *global_function_table;
HashTable *global_class_table;
zend_file_handle *in;
uint lineno;
char *filename;
+ char *script_encoding;
UConverter *input_conv; /* converter for flex input */
UConverter *output_conv; /* converter for data from flex output */
BEGIN(lex_state->state);
CG(zend_lineno) = lex_state->lineno;
zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
+ zend_restore_compiled_script_encoding(lex_state->script_encoding TSRMLS_CC);
if (SCNG(input_conv)) {
ucnv_close(SCNG(input_conv));
}
zend_set_compiled_filename(file_path TSRMLS_CC);
+ zend_set_compiled_script_encoding((char*)ucnv_getName(SCNG(output_conv), &status) TSRMLS_CC);
if (CG(start_lineno)) {
CG(zend_lineno) = CG(start_lineno);
ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
{
const char *encoding;
+ UErrorCode status = U_ZERO_ERROR;
if (Z_TYPE_P(str) == IS_UNICODE) {
convert_to_string_with_converter(str, UG(utf8_conv));
yy_scan_buffer(str->value.str.val, str->value.str.len+2 TSRMLS_CC);
zend_set_compiled_filename(filename TSRMLS_CC);
+ zend_set_compiled_script_encoding((char*)ucnv_getName(SCNG(output_conv), &status) TSRMLS_CC);
CG(zend_lineno) = 1;
CG(increment_lineno) = 0;
return SUCCESS;
if (func_name) {
len += u_strlen((UChar*)func_name);
} else {
- func_name = EMPTY_STR;
+ func_name = (char*)EMPTY_STR;
}
zendlval->value.str.len = len;
Z_USTRVAL_P(zendlval) = eumalloc(len+1);
op_array->function_name = NULL;
op_array->filename = zend_get_compiled_filename(TSRMLS_C);
+ op_array->script_encoding = zend_get_compiled_script_encoding(TSRMLS_C);
op_array->doc_comment = NULL;
op_array->doc_comment_len = 0;
/* Convert inline HTML blocks to the output encoding, but only if necessary. */
if (opline->extended_value &&
strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status),
- ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) {
+ EG(active_op_array)->script_encoding)) {
zval z_conv;
- zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
+ UConverter *script_enc_conv = NULL;
+ if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) {
+ zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding);
+ }
+ zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
z_conv.type = IS_BINARY;
if (U_SUCCESS(status)) {
zend_print_variable(&z_conv);
zend_error(E_WARNING, "Could not convert inline HTML for output");
}
zval_dtor(&z_conv);
+ ucnv_close(script_enc_conv);
} else {
zend_print_variable(z);
}
/* Convert inline HTML blocks to the output encoding, but only if necessary. */
if (opline->extended_value &&
strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status),
- ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) {
+ EG(active_op_array)->script_encoding)) {
zval z_conv;
- zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
+ UConverter *script_enc_conv = NULL;
+ if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) {
+ zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding);
+ }
+ printf("converting %d bytes of T_INLINE_HTML\n", z->value.str.len);
+ zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
z_conv.type = IS_BINARY;
if (U_SUCCESS(status)) {
zend_print_variable(&z_conv);
zend_error(E_WARNING, "Could not convert inline HTML for output");
}
zval_dtor(&z_conv);
+ ucnv_close(script_enc_conv);
} else {
zend_print_variable(z);
}
/* Convert inline HTML blocks to the output encoding, but only if necessary. */
if (opline->extended_value &&
strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status),
- ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) {
+ EG(active_op_array)->script_encoding)) {
zval z_conv;
- zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
+ UConverter *script_enc_conv = NULL;
+ if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) {
+ zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding);
+ }
+ printf("converting %d bytes of T_INLINE_HTML\n", z->value.str.len);
+ zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
z_conv.type = IS_BINARY;
if (U_SUCCESS(status)) {
zend_print_variable(&z_conv);
zend_error(E_WARNING, "Could not convert inline HTML for output");
}
zval_dtor(&z_conv);
+ ucnv_close(script_enc_conv);
} else {
zend_print_variable(z);
}
/* Convert inline HTML blocks to the output encoding, but only if necessary. */
if (opline->extended_value &&
strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status),
- ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) {
+ EG(active_op_array)->script_encoding)) {
zval z_conv;
- zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
+ UConverter *script_enc_conv = NULL;
+ if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) {
+ zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding);
+ }
+ printf("converting %d bytes of T_INLINE_HTML\n", z->value.str.len);
+ zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
z_conv.type = IS_BINARY;
if (U_SUCCESS(status)) {
zend_print_variable(&z_conv);
zend_error(E_WARNING, "Could not convert inline HTML for output");
}
zval_dtor(&z_conv);
+ ucnv_close(script_enc_conv);
} else {
zend_print_variable(z);
}
/* Convert inline HTML blocks to the output encoding, but only if necessary. */
if (opline->extended_value &&
strcmp(ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status),
- ucnv_getName(ZEND_U_CONVERTER(UG(script_encoding_conv)), &status))) {
+ EG(active_op_array)->script_encoding)) {
zval z_conv;
- zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), ZEND_U_CONVERTER(UG(script_encoding_conv)), &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
+ UConverter *script_enc_conv = NULL;
+ if (zend_set_converter_encoding(&script_enc_conv, EG(active_op_array)->script_encoding) == FAILURE) {
+ zend_error(E_ERROR, "Unsupported encoding [%d]", EG(active_op_array)->script_encoding);
+ }
+ printf("converting %d bytes of T_INLINE_HTML\n", z->value.str.len);
+ zend_convert_encodings(ZEND_U_CONVERTER(UG(output_encoding_conv)), script_enc_conv, &z_conv.value.str.val, &z_conv.value.str.len, z->value.str.val, z->value.str.len, &status);
z_conv.type = IS_BINARY;
if (U_SUCCESS(status)) {
zend_print_variable(&z_conv);
zend_error(E_WARNING, "Could not convert inline HTML for output");
}
zval_dtor(&z_conv);
+ ucnv_close(script_enc_conv);
} else {
zend_print_variable(z);
}