From: Sara Golemon Date: Wed, 29 Mar 2006 01:20:43 +0000 (+0000) Subject: Another (and hopefully last) major streams commit. X-Git-Tag: RELEASE_1_3~205 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=30a2bd1d114da2c040cba070e400bb4ad8024b60;p=php Another (and hopefully last) major streams commit. This moves unicode conversion to the filter layer (rather than at the lower streams layer) unicode_filter.c has been moved from ext/unicode to main/streams as it's an integral part of the streams unicode conversion process. There are now three ways to set encoding on a stream: (1) By context $ctx = stream_context_create(NULL,array('encoding'=>'latin1')); $fp = fopen('somefile', 'r+t', false, $ctx); (2) By stream_encoding() $fp = fopen('somefile', 'r+'); stream_encoding($fp, 'latin1'); (3) By filter $fp = fopen('somefile', 'r+'); stream_filter_append($fp, 'unicode.from.latin1', STREAM_FILTER_READ); stream_filter_append($fp, 'unicode.to.latin1', STREAM_FILTER_WRITE); Note: Methods 1 and 2 are convenience wrappers around method 3. --- diff --git a/ext/standard/basic_functions.c b/ext/standard/basic_functions.c index 44656974d5..4b377b88cf 100644 --- a/ext/standard/basic_functions.c +++ b/ext/standard/basic_functions.c @@ -589,6 +589,7 @@ zend_function_entry basic_functions[] = { PHP_FE(stream_filter_prepend, NULL) PHP_FE(stream_filter_append, NULL) PHP_FE(stream_filter_remove, NULL) + PHP_FE(stream_encoding, NULL) PHP_FE(stream_socket_client, second_and_third_args_force_ref) PHP_FE(stream_socket_server, second_and_third_args_force_ref) PHP_FE(stream_socket_accept, third_arg_force_ref) diff --git a/ext/standard/file.c b/ext/standard/file.c index 042e9caf83..048f23aa9f 100644 --- a/ext/standard/file.c +++ b/ext/standard/file.c @@ -1008,14 +1008,14 @@ PHPAPI PHP_FUNCTION(fgets) php_stream_from_zval(stream, &zstream); - buf.v = php_stream_get_line_ex(stream, php_stream_reads_unicode(stream) ? IS_UNICODE : IS_STRING, NULL_ZSTR, 0, length, &retlen); + buf.v = php_stream_get_line_ex(stream, stream->readbuf_type, NULL_ZSTR, 0, length, &retlen); if (!buf.v) { RETURN_FALSE; } - if (php_stream_reads_unicode(stream)) { + if (stream->readbuf_type == IS_UNICODE) { RETURN_UNICODEL(buf.u, retlen, 0); - } else { + } else { /* IS_STRING */ RETURN_STRINGL(buf.s, retlen, 0); } } @@ -1034,7 +1034,7 @@ PHPAPI PHP_FUNCTION(fgetc) PHP_STREAM_TO_ZVAL(stream, arg1); - if (php_stream_reads_unicode(stream)) { + if (stream->readbuf_type == IS_UNICODE) { int buflen = 1; UChar *buf = php_stream_read_unicode_chars(stream, &buflen); @@ -1042,7 +1042,7 @@ PHPAPI PHP_FUNCTION(fgetc) RETURN_FALSE; } RETURN_UNICODEL(buf, buflen, 0); - } else { + } else { /* IS_STRING */ char buf[2]; buf[0] = php_stream_getc(stream); @@ -1068,7 +1068,7 @@ PHPAPI PHP_FUNCTION(fgetss) php_stream_from_zval(stream, &zstream); - if (php_stream_reads_unicode(stream)) { + if (stream->readbuf_type == IS_UNICODE) { UChar *buf = php_stream_get_line_ex(stream, IS_UNICODE, NULL_ZSTR, 0, length, &retlen); UChar *allowed = NULL; int allowed_len = 0; @@ -1085,7 +1085,7 @@ PHPAPI PHP_FUNCTION(fgetss) retlen = php_u_strip_tags(buf, retlen, &stream->fgetss_state, allowed, allowed_len TSRMLS_CC); RETURN_UNICODEL(buf, retlen, 0); - } else { + } else { /* IS_STRING */ char *buf = php_stream_get_line_ex(stream, IS_STRING, NULL_ZSTR, 0, length, &retlen); char *allowed = NULL; int allowed_len = 0; @@ -1752,7 +1752,7 @@ PHPAPI PHP_FUNCTION(fread) RETURN_FALSE; } - if (php_stream_reads_unicode(stream)) { + if (stream->readbuf_type == IS_UNICODE) { int buflen = len; UChar *buf = php_stream_read_unicode_chars(stream, &buflen); @@ -1761,7 +1761,7 @@ PHPAPI PHP_FUNCTION(fread) } RETURN_UNICODEL(buf, buflen, 0); - } else { + } else { /* IS_STRING */ char *buf = emalloc(len + 1); int buflen = php_stream_read(stream, buf, len); diff --git a/ext/standard/streamsfuncs.c b/ext/standard/streamsfuncs.c index a7c5323931..0dc9823094 100644 --- a/ext/standard/streamsfuncs.c +++ b/ext/standard/streamsfuncs.c @@ -489,11 +489,11 @@ PHP_FUNCTION(stream_get_meta_data) add_assoc_zval(return_value, "write_filters", newval); } - if (php_stream_reads_unicode(stream)) { + if (stream->readbuf_type == IS_UNICODE) { int readbuf_len = u_countChar32(stream->readbuf.u + stream->readpos, stream->writepos - stream->readpos); add_assoc_long(return_value, "unread_bytes", UBYTES(stream->writepos - stream->readpos)); add_assoc_long(return_value, "unread_chars", readbuf_len); - } else { + } else { /* IS_STRING */ add_assoc_long(return_value, "unread_bytes", stream->writepos - stream->readpos); add_assoc_long(return_value, "unread_chars", stream->writepos - stream->readpos); } @@ -1275,7 +1275,7 @@ PHP_FUNCTION(stream_get_line) php_stream_from_zval(stream, &zstream); - if (php_stream_reads_unicode(stream)) { + if (stream->readbuf_type == IS_UNICODE) { UChar *buf; UChar *d = NULL; int dlen = 0; @@ -1294,7 +1294,7 @@ PHP_FUNCTION(stream_get_line) } RETURN_UNICODEL(buf, buf_size, 0); - } else { + } else { /* IS_STRING */ char *buf; char *d = NULL; int dlen = 0; @@ -1462,6 +1462,67 @@ PHP_FUNCTION(stream_socket_enable_crypto) } /* }}} */ +/* {{{ proto void stream_encoding(resource stream[, string encoding]) +Set character set for stream encoding +UTODO: Return current encoding charset +*/ +PHP_FUNCTION(stream_encoding) +{ + zval *zstream; + php_stream *stream; + char *encoding = NULL; + int encoding_len = 0; + int remove_read_tail = 0, remove_write_tail = 0; + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "r|s", &zstream, &encoding, &encoding_len) == FAILURE) { + return; + } + + php_stream_from_zval(stream, &zstream); + + /* Double check that the target encoding is legal before attempting anything */ + + if (stream->readfilters.tail) { + if (stream->readfilters.tail->fops == &php_unicode_from_string_filter_ops) { + /* Remove the current unicode.from.* filter, + the filter layer will transcode anything in the read buffer back to binary + or invalidate the read buffer */ + remove_read_tail = 1; + } else if (stream->readbuf_type == IS_UNICODE) { + /* There's an encoding on the stream already, but then there's filtering happening after that point + It's asking too much for PHP to figure out what the user wants, throw an error back in their face */ + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot change encoding on filtered stream"); + RETURN_FALSE; + } + } + + if (stream->writefilters.tail) { + if (stream->writefilters.tail->fops == &php_unicode_to_string_filter_ops) { + /* Remove the current unicode.to.* filter */ + remove_write_tail = 1; + } else if ((stream->writefilters.tail->fops->flags & PSFO_FLAG_OUTPUTS_UNICODE) == 0) { + /* conversion to binary is happening, them another filter is doing something + bailout for same reason as read filters */ + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot change encoding on filtered stream"); + RETURN_FALSE; + } + } + + if (remove_read_tail) { + php_stream_filter_remove(stream->readfilters.tail, 1 TSRMLS_CC); + } + if (remove_write_tail) { + php_stream_filter_remove(stream->writefilters.tail, 1 TSRMLS_CC); + } + + /* UTODO: Allow overriding error handling for converters */ + php_stream_encoding_apply(stream, 1, encoding, UG(from_error_mode), UG(from_subst_char)); + php_stream_encoding_apply(stream, 0, encoding, UG(to_error_mode), NULL); + + RETURN_TRUE; +} +/* }}} */ + /* * Local variables: * tab-width: 4 diff --git a/ext/standard/streamsfuncs.h b/ext/standard/streamsfuncs.h index 77d5320fa8..bd0e99031a 100644 --- a/ext/standard/streamsfuncs.h +++ b/ext/standard/streamsfuncs.h @@ -53,6 +53,7 @@ PHP_FUNCTION(stream_context_get_default); PHP_FUNCTION(stream_filter_prepend); PHP_FUNCTION(stream_filter_append); PHP_FUNCTION(stream_filter_remove); +PHP_FUNCTION(stream_encoding); PHP_FUNCTION(stream_socket_enable_crypto); PHP_FUNCTION(stream_socket_pair); diff --git a/ext/unicode/config.m4 b/ext/unicode/config.m4 index afa089613c..5f102a50d4 100644 --- a/ext/unicode/config.m4 +++ b/ext/unicode/config.m4 @@ -4,4 +4,4 @@ dnl PHP_SUBST(UNICODE_SHARED_LIBADD) AC_DEFINE(HAVE_UNICODE, 1, [ ]) -PHP_NEW_EXTENSION(unicode, unicode.c locale.c unicode_filter.c unicode_iterators.c collator.c, $ext_shared) +PHP_NEW_EXTENSION(unicode, unicode.c locale.c unicode_iterators.c collator.c, $ext_shared) diff --git a/ext/unicode/config.w32 b/ext/unicode/config.w32 index 5321397c1e..abc368440f 100644 --- a/ext/unicode/config.w32 +++ b/ext/unicode/config.w32 @@ -1,5 +1,5 @@ // $Id$ // vim:ft=javascript -EXTENSION("unicode", "unicode.c unicode_filter.c unicode_iterators.c collator.c locale.c"); +EXTENSION("unicode", "unicode.c unicode_iterators.c collator.c locale.c"); AC_DEFINE('HAVE_UNICODE', 1, 'ICU API extension'); diff --git a/ext/unicode/php_unicode.h b/ext/unicode/php_unicode.h index 1de91686f3..cd9f59a6ed 100644 --- a/ext/unicode/php_unicode.h +++ b/ext/unicode/php_unicode.h @@ -67,7 +67,6 @@ PHP_FUNCTION(collator_get_attribute); PHP_METHOD(collator, __construct); void php_init_collation(TSRMLS_D); -extern php_stream_filter_factory php_unicode_filter_factory; #ifdef __cplusplus } // extern "C" diff --git a/ext/unicode/unicode.c b/ext/unicode/unicode.c index e468feabd8..827c36c8e8 100644 --- a/ext/unicode/unicode.c +++ b/ext/unicode/unicode.c @@ -273,10 +273,6 @@ ZEND_GET_MODULE(unicode) /* {{{ PHP_MINIT_FUNCTION */ PHP_MINIT_FUNCTION(unicode) { - if (php_stream_filter_register_factory("unicode.*", &php_unicode_filter_factory TSRMLS_CC) == FAILURE) { - return FAILURE; - } - php_register_unicode_iterators(TSRMLS_C); php_init_collation(TSRMLS_C); @@ -287,9 +283,6 @@ PHP_MINIT_FUNCTION(unicode) /* {{{ PHP_MSHUTDOWN_FUNCTION */ PHP_MSHUTDOWN_FUNCTION(unicode) { - if (php_stream_filter_unregister_factory("unicode.*" TSRMLS_CC) == FAILURE) { - return FAILURE; - } /* add your stuff here */ diff --git a/main/main.c b/main/main.c index ba2dd6cb4c..cc7279b39a 100644 --- a/main/main.c +++ b/main/main.c @@ -1611,6 +1611,12 @@ int php_module_startup(sapi_module_struct *sf, zend_module_entry *additional_mod return FAILURE; } + /* Initialize unicode filters */ + if (php_stream_filter_register_factory("unicode.*", &php_unicode_filter_factory TSRMLS_CC) == FAILURE) { + php_printf("PHP: Unable to initialize unicode stream filters.\n"); + return FAILURE; + } + /* initialize registry for images to be used in phpinfo() (this uses configuration parameters from php.ini) */ @@ -1744,6 +1750,7 @@ void php_module_shutdown(TSRMLS_D) zend_shutdown(TSRMLS_C); + /* Destroys filter & transport registries too */ php_shutdown_stream_wrappers(module_number TSRMLS_CC); php_shutdown_info_logos(); diff --git a/main/php_streams.h b/main/php_streams.h index d62a103937..42cf5eb98d 100755 --- a/main/php_streams.h +++ b/main/php_streams.h @@ -206,12 +206,9 @@ struct _php_stream { php_stream_context *context; int flags; /* PHP_STREAM_FLAG_XXX */ - /* unicode */ - UConverter *input_encoding; - UConverter *output_encoding; - /* buffer */ off_t position; /* of underlying stream */ + zend_uchar readbuf_type; zstr readbuf; /* readbuf.s or readbuf.u */ size_t readbuflen; /* Length in units (char or UChar) */ off_t readpos; /* Position in units (char or UChar) */ @@ -252,8 +249,6 @@ END_EXTERN_C() #define php_stream_from_zval_no_verify(xstr, ppzval) (xstr) = (php_stream*)zend_fetch_resource((ppzval) TSRMLS_CC, -1, "stream", NULL, 2, php_file_le_stream(), php_file_le_pstream()) #define PS_ULEN(is_unicode, len) ((is_unicode) ? UBYTES(len) : (len)) -#define php_stream_reads_unicode(stream) ((stream->input_encoding) ? 1 : 0) -#define php_stream_writes_unicode(stream) ((stream->output_encoding) ? 1 : 0) BEGIN_EXTERN_C() PHPAPI int php_stream_from_persistent_id(const char *persistent_id, php_stream **stream TSRMLS_DC); diff --git a/main/streams/filter.c b/main/streams/filter.c index f074ae7c7b..1e74cda667 100644 --- a/main/streams/filter.c +++ b/main/streams/filter.c @@ -396,50 +396,63 @@ PHPAPI void _php_stream_filter_append(php_stream_filter_chain *chain, php_stream chain->tail = filter; filter->chain = chain; - if (&(stream->readfilters) == chain && (stream->writepos - stream->readpos) > 0) { + if (&(stream->readfilters) == chain) { /* Let's going ahead and wind anything in the buffer through this filter */ php_stream_bucket_brigade brig_in = { NULL, NULL }, brig_out = { NULL, NULL }; php_stream_bucket_brigade *brig_inp = &brig_in, *brig_outp = &brig_out; - php_stream_filter_status_t status; + php_stream_filter_status_t status = PSFS_FEED_ME; php_stream_bucket *bucket; size_t consumed = 0; - if (stream->input_encoding) { - bucket = php_stream_bucket_new_unicode(stream, stream->readbuf.u + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC); - } else { - bucket = php_stream_bucket_new(stream, stream->readbuf.s + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC); - } - php_stream_bucket_append(brig_inp, bucket TSRMLS_CC); - status = filter->fops->filter(stream, filter, brig_inp, brig_outp, &consumed, PSFS_FLAG_NORMAL TSRMLS_CC); + if ((stream->writepos - stream->readpos) > 0) { + if (stream->readbuf_type == IS_UNICODE) { + bucket = php_stream_bucket_new_unicode(stream, stream->readbuf.u + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC); + } else { + bucket = php_stream_bucket_new(stream, stream->readbuf.s + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC); + } + php_stream_bucket_append(brig_inp, bucket TSRMLS_CC); + status = filter->fops->filter(stream, filter, brig_inp, brig_outp, &consumed, PSFS_FLAG_NORMAL TSRMLS_CC); - if (stream->readpos + consumed > stream->writepos || consumed < 0) { - /* No behaving filter should cause this. */ - status = PSFS_ERR_FATAL; + if (stream->readpos + consumed > stream->writepos || consumed < 0) { + /* No behaving filter should cause this. */ + status = PSFS_ERR_FATAL; + } } - switch (status) { - case PSFS_ERR_FATAL: - /* If this first cycle simply fails then there's something wrong with the filter. - Pull the filter off the chain and leave the read buffer alone. */ - if (chain->head == filter) { - chain->head = NULL; - chain->tail = NULL; - } else { - filter->prev->next = NULL; - chain->tail = filter->prev; - } - php_stream_bucket_unlink(bucket TSRMLS_CC); - php_stream_bucket_delref(bucket TSRMLS_CC); - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Filter failed to process pre-buffered data. Not adding to filterchain."); - break; - case PSFS_FEED_ME: + if (status == PSFS_ERR_FATAL) { + /* If this first cycle simply fails then there's something wrong with the filter. + Pull the filter off the chain and leave the read buffer alone. */ + if (chain->head == filter) { + chain->head = NULL; + chain->tail = NULL; + } else { + filter->prev->next = NULL; + chain->tail = filter->prev; + } + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Filter failed to process pre-buffered data. Not adding to filterchain."); + } else { + /* This filter addition may change the readbuffer type. + Since all the previously held data is in the bucket brigade, + we can reappropriate the buffer that already exists (if one does) */ + if (stream->readbuf_type == IS_UNICODE && (filter->fops->flags & PSFO_FLAG_OUTPUTS_UNICODE) == 0) { + /* Buffer is currently based on unicode characters, but filter only outputs STRING adjust counting */ + stream->readbuf_type = IS_STRING; + stream->readbuflen *= UBYTES(1); + } else if (stream->readbuf_type == IS_STRING && (filter->fops->flags & PSFO_FLAG_OUTPUTS_STRING) == 0) { + /* Buffer is currently based on binary characters, but filter only outputs UNICODE adjust counting */ + stream->readbuf_type = IS_UNICODE; + stream->readbuflen /= UBYTES(1); + } + + if (status == PSFS_FEED_ME) { /* We don't actually need data yet, leave this filter in a feed me state until data is needed. Reset stream's internal read buffer since the filter is "holding" it. */ stream->readpos = 0; stream->writepos = 0; - break; - case PSFS_PASS_ON: + } else if (status == PSFS_PASS_ON) { /* Put any filtered data onto the readbuffer stack. Previously read data has been at least partially consumed. */ stream->readpos += consumed; @@ -454,23 +467,20 @@ PHPAPI void _php_stream_filter_append(php_stream_filter_chain *chain, php_stream bucket = brig_outp->head; /* Convert for stream type */ - if (bucket->buf_type != IS_UNICODE && stream->input_encoding) { - /* Stream expects unicode, convert using stream encoding */ - php_stream_bucket_convert(bucket, IS_UNICODE, stream->input_encoding); - } else if (bucket->buf_type == IS_UNICODE && !stream->input_encoding) { - /* Stream expects binary, filter provided unicode, just take the buffer as is */ - php_stream_bucket_convert_notranscode(bucket, IS_STRING); + if (bucket->buf_type != stream->readbuf_type) { + /* Stream expects different type than bucket contains, convert slopily */ + php_stream_bucket_convert_notranscode(bucket, stream->readbuf_type); } /* Grow buffer to hold this bucket if need be. TODO: See warning in main/stream/streams.c::php_stream_fill_read_buffer */ if (stream->readbuflen - stream->writepos < bucket->buflen) { stream->readbuflen += bucket->buflen; - stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->input_encoding, stream->readbuflen), stream->is_persistent); + stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->readbuflen), stream->is_persistent); } /* Append to readbuf */ - if (stream->input_encoding) { + if (stream->readbuf_type == IS_UNICODE) { memcpy(stream->readbuf.u + stream->writepos, bucket->buf.u, UBYTES(bucket->buflen)); } else { memcpy(stream->readbuf.s + stream->writepos, bucket->buf.s, bucket->buflen); @@ -480,10 +490,9 @@ PHPAPI void _php_stream_filter_append(php_stream_filter_chain *chain, php_stream php_stream_bucket_unlink(bucket TSRMLS_CC); php_stream_bucket_delref(bucket TSRMLS_CC); } - break; + } } - - } + } /* end of readfilters specific code */ } PHPAPI int _php_stream_filter_check_chain(php_stream_filter_chain *chain TSRMLS_DC) @@ -597,26 +606,23 @@ PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish TSRMLS /* Dump any newly flushed data to the read buffer */ if (stream->readpos > stream->chunk_size) { /* Back the buffer up */ - memcpy(stream->readbuf.s, stream->readbuf.s + PS_ULEN(stream->input_encoding, stream->readpos), PS_ULEN(stream->input_encoding, stream->writepos - stream->readpos)); + memcpy(stream->readbuf.s, stream->readbuf.s + PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->readpos), PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->writepos - stream->readpos)); stream->writepos -= stream->readpos; stream->readpos = 0; } if (flushed_size > (stream->readbuflen - stream->writepos)) { /* Grow the buffer */ - stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->input_encoding, stream->writepos + flushed_size + stream->chunk_size), stream->is_persistent); + stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->writepos + flushed_size + stream->chunk_size), stream->is_persistent); } while ((bucket = inp->head)) { /* Convert if necessary */ - if (bucket->buf_type != IS_UNICODE && stream->input_encoding) { - /* Stream expects unicode, convert using stream encoding */ - php_stream_bucket_convert(bucket, IS_UNICODE, stream->input_encoding); - } else if (bucket->buf_type == IS_UNICODE && !stream->input_encoding) { - /* Stream expects binary, filter provided unicode, just take the buffer as is */ - php_stream_bucket_convert_notranscode(bucket, IS_STRING); + if (bucket->buf_type != stream->readbuf_type) { + /* Stream expects different type than what's in bucket, convert slopily */ + php_stream_bucket_convert_notranscode(bucket, stream->readbuf_type); } /* Append to readbuf */ - if (stream->input_encoding) { + if (stream->readbuf_type == IS_UNICODE) { memcpy(stream->readbuf.u + stream->writepos, bucket->buf.u, UBYTES(bucket->buflen)); } else { memcpy(stream->readbuf.s + stream->writepos, bucket->buf.s, bucket->buflen); @@ -632,13 +638,8 @@ PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish TSRMLS while ((bucket = inp->head)) { /* Convert if necessary */ if (bucket->buf_type == IS_UNICODE) { - if (stream->output_encoding) { - /* Stream has a configured output encoding, convert to appropriate type */ - php_stream_bucket_convert(bucket, IS_STRING, stream->output_encoding); - } else { - /* Stream is binary, write ugly UChars as is */ - php_stream_bucket_convert_notranscode(bucket, IS_STRING); - } + /* Force data to binary, adjusting buflen */ + php_stream_bucket_convert_notranscode(bucket, IS_STRING); } /* Must be binary by this point */ @@ -654,6 +655,9 @@ PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish TSRMLS PHPAPI php_stream_filter *php_stream_filter_remove(php_stream_filter *filter, int call_dtor TSRMLS_DC) { + /* UTODO: Figure out a sane way to "defilter" so that unicode converters can be swapped around + For now, at least fopen(,'b') + stream_encoding($fp, 'charset') works since there's nothing to remove */ + if (filter->prev) { filter->prev->next = filter->next; } else { @@ -770,6 +774,42 @@ PHPAPI int _php_stream_bucket_convert(php_stream_bucket *bucket, unsigned char t return FAILURE; } +PHPAPI int _php_stream_encoding_apply(php_stream *stream, int writechain, const char *encoding, uint16_t error_mode, UChar *subst TSRMLS_DC) +{ + int encoding_len = strlen(encoding); + int buflen = sizeof("unicode.from.") + encoding_len - 1; /* might be "to", but "from" is long enough for both */ + char *buf = emalloc(buflen + 1); + php_stream_filter *filter; + zval *filterparams; + + if (writechain) { + memcpy(buf, "unicode.to.", sizeof("unicode.to.") - 1); + memcpy(buf + sizeof("unicode.to.") - 1, encoding, encoding_len + 1); + } else { + memcpy(buf, "unicode.from.", sizeof("unicode.from.") - 1); + memcpy(buf + sizeof("unicode.from.") - 1, encoding, encoding_len + 1); + } + + ALLOC_INIT_ZVAL(filterparams); + array_init(filterparams); + add_assoc_long(filterparams, "error_mode", error_mode); + if (subst) { + add_assoc_unicode(filterparams, "subst_char", subst, 1); + } + filter = php_stream_filter_create(buf, filterparams, php_stream_is_persistent(stream) TSRMLS_CC); + efree(buf); + zval_ptr_dtor(&filterparams); + + if (!filter) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to apply encoding for charset: %s\n", encoding); + return FAILURE; + } + + php_stream_filter_append(writechain ? &stream->writefilters : &stream->readfilters, filter); + + return SUCCESS; +} + /* * Local variables: * tab-width: 4 diff --git a/main/streams/php_stream_filter_api.h b/main/streams/php_stream_filter_api.h index 78736a9096..1c131a3eb6 100644 --- a/main/streams/php_stream_filter_api.h +++ b/main/streams/php_stream_filter_api.h @@ -157,6 +157,7 @@ PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish TSRMLS PHPAPI php_stream_filter *php_stream_filter_remove(php_stream_filter *filter, int call_dtor TSRMLS_DC); PHPAPI void php_stream_filter_free(php_stream_filter *filter TSRMLS_DC); PHPAPI php_stream_filter *_php_stream_filter_alloc(php_stream_filter_ops *fops, void *abstract, int persistent STREAMS_DC TSRMLS_DC); +PHPAPI int _php_stream_encoding_apply(php_stream *stream, int writechain, const char *encoding, uint16_t error_mode, UChar *subst TSRMLS_DC); END_EXTERN_C() #define php_stream_filter_alloc(fops, thisptr, persistent) _php_stream_filter_alloc((fops), (thisptr), (persistent) STREAMS_CC TSRMLS_CC) #define php_stream_filter_alloc_rel(fops, thisptr, persistent) _php_stream_filter_alloc((fops), (thisptr), (persistent) STREAMS_REL_CC TSRMLS_CC) @@ -165,6 +166,8 @@ END_EXTERN_C() #define php_stream_filter_flush(filter, finish) _php_stream_filter_flush((filter), (finish) TSRMLS_CC) #define php_stream_filter_check_chain(chain) _php_stream_filter_check_chain((chain) TSRMLS_CC) #define php_stream_filter_output_prefer_unicode(filter) _php_stream_filter_output_prefer_unicode((filter) TSRMLS_CC) +#define php_stream_encoding_apply(stream, writechain, encoding, error_mode, subst) \ + _php_stream_encoding_apply((stream), (writechain), (encoding), (error_mode), (subst) TSRMLS_CC) #define php_stream_is_filtered(stream) ((stream)->readfilters.head || (stream)->writefilters.head) @@ -179,6 +182,12 @@ PHPAPI int php_stream_filter_register_factory_volatile(const char *filterpattern PHPAPI php_stream_filter *php_stream_filter_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC); END_EXTERN_C() +/* unicode_filter.c exports */ +extern php_stream_filter_ops php_unicode_to_string_filter_ops; +extern php_stream_filter_ops php_unicode_from_string_filter_ops; +extern php_stream_filter_ops php_unicode_tidy_filter_ops; +extern php_stream_filter_factory php_unicode_filter_factory; + /* * Local variables: * tab-width: 4 diff --git a/main/streams/streams.c b/main/streams/streams.c index 281cd69408..4c25c53f2e 100755 --- a/main/streams/streams.c +++ b/main/streams/streams.c @@ -239,6 +239,7 @@ fprintf(stderr, "stream_alloc: %s:%p persistent=%s\n", ops->label, ret, persiste ret->abstract = abstract; ret->is_persistent = persistent_id ? 1 : 0; ret->chunk_size = FG(def_chunk_size); + ret->readbuf_type = IS_STRING; if (FG(auto_detect_line_endings)) { ret->flags |= PHP_STREAM_FLAG_DETECT_EOL; @@ -483,12 +484,9 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D * stream read buffer */ while (brig_inp->head) { bucket = brig_inp->head; - if (bucket->buf_type != IS_UNICODE && stream->input_encoding) { - /* Stream expects unicode, convert using stream encoding */ - php_stream_bucket_convert(bucket, IS_UNICODE, stream->input_encoding); - } else if (bucket->buf_type == IS_UNICODE && !stream->input_encoding) { - /* Stream expects binary, filter provided unicode, just take the buffer as is */ - php_stream_bucket_convert_notranscode(bucket, IS_STRING); + if (bucket->buf_type != stream->readbuf_type) { + /* Stream expects different datatype than bucket has, convert slopily */ + php_stream_bucket_convert_notranscode(bucket, stream->readbuf_type); } /* Bucket type now matches stream type */ @@ -496,9 +494,9 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D * TODO: this can fail for persistent streams */ if (stream->readbuflen - stream->writepos < bucket->buflen) { stream->readbuflen += bucket->buflen; - stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->input_encoding, stream->readbuflen), stream->is_persistent); + stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->readbuflen), stream->is_persistent); } - memcpy(stream->readbuf.s + stream->writepos, bucket->buf.s, PS_ULEN(stream->input_encoding, bucket->buflen)); + memcpy(stream->readbuf.s + stream->writepos, bucket->buf.s, PS_ULEN(stream->readbuf_type == IS_UNICODE, bucket->buflen)); stream->writepos += bucket->buflen; php_stream_bucket_unlink(bucket TSRMLS_CC); @@ -530,46 +528,6 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D } efree(chunk_buf); - } else if (stream->input_encoding) { /* Unfiltered Unicode stream */ - /* is there enough data in the buffer ? */ - if (stream->writepos - stream->readpos < (off_t)size) { - char *binbuf; - UChar *ubuf; - int binbuf_len, ubuf_len; - size_t toread = (size > stream->chunk_size) ? size : stream->chunk_size; - UErrorCode status = U_ZERO_ERROR; - - /* Read stream data into temporary buffer, then convert to unicode - TODO: This can be improved */ - binbuf = emalloc(toread + 1); - binbuf_len = stream->ops->read(stream, binbuf, toread TSRMLS_CC); - if (binbuf_len == (size_t)-1) { - /* Failure */ - efree(binbuf); - return; - } - /* Convert to unicode */ - zend_convert_to_unicode(stream->input_encoding, &ubuf, &ubuf_len, binbuf, binbuf_len, &status); - efree(binbuf); - - /* reduce buffer memory consumption if possible, to avoid a realloc */ - if (stream->readbuf.u && stream->readbuflen - stream->writepos < stream->chunk_size) { - memmove(stream->readbuf.u, stream->readbuf.u + stream->readpos, UBYTES(stream->readbuflen - stream->readpos)); - stream->writepos -= stream->readpos; - stream->readpos = 0; - } - - /* grow the buffer if required - * TODO: this can fail for persistent streams */ - if (stream->readbuflen - stream->writepos < ubuf_len) { - stream->readbuflen += ((stream->chunk_size > ubuf_len) ? stream->chunk_size : ubuf_len); - stream->readbuf.u = (UChar*)perealloc(stream->readbuf.u, UBYTES(stream->readbuflen), stream->is_persistent); - } - - memcpy(stream->readbuf.u + stream->writepos, ubuf, UBYTES(ubuf_len)); - efree(ubuf); - stream->writepos += ubuf_len; - } } else { /* Unfiltered Binary stream */ /* is there enough data in the buffer ? */ if (stream->writepos - stream->readpos < (off_t)size) { @@ -609,13 +567,13 @@ PHPAPI size_t _php_stream_read(php_stream *stream, char *buf, size_t size TSRMLS * drain the remainder of the buffer before using the "raw" read mode for * the excess */ if (stream->writepos - stream->readpos > 0) { - toread = PS_ULEN(stream->input_encoding, stream->writepos - stream->readpos); + toread = PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->writepos - stream->readpos); if (toread > size) { toread = size; } - if (stream->input_encoding) { + if (stream->readbuf_type == IS_UNICODE) { /* Sloppy read, anyone using php_stream_read() on a unicode stream * had better know what they're doing */ @@ -647,7 +605,7 @@ PHPAPI size_t _php_stream_read(php_stream *stream, char *buf, size_t size TSRMLS } if (toread > 0) { - if (php_stream_reads_unicode(stream)) { + if (stream->readbuf_type == IS_UNICODE) { /* Sloppy read, anyone using php_stream_read() on a unicode stream * had better know what they're doing */ @@ -685,7 +643,7 @@ PHPAPI size_t _php_stream_read_unicode(php_stream *stream, UChar *buf, int size, { size_t toread = 0, didread = 0, string_length = 0; - if (!stream->input_encoding) { + if (stream->readbuf_type != IS_UNICODE) { return -1; } @@ -763,7 +721,7 @@ PHPAPI UChar *_php_stream_read_unicode_chars(php_stream *stream, int *pchars TSR int buflen = size; size_t toread = 0, didread = 0, string_length = 0; - if (!stream->input_encoding) { + if (stream->readbuf_type != IS_UNICODE) { return NULL; } @@ -921,7 +879,7 @@ PHPAPI void *php_stream_locate_eol(php_stream *stream, zstr zbuf, int buf_len TS char *readptr, *buf = zbuf.s; if (!buf) { - readptr = stream->readbuf.s + PS_ULEN(stream->input_encoding, stream->readpos); + readptr = stream->readbuf.s + PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->readpos); avail = stream->writepos - stream->readpos; } else { readptr = zbuf.s; @@ -929,7 +887,7 @@ PHPAPI void *php_stream_locate_eol(php_stream *stream, zstr zbuf, int buf_len TS } if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL) { - if (stream->input_encoding) { + if (stream->readbuf_type == IS_UNICODE) { cr = (char*)u_memchr((UChar*)readptr, '\r', avail); lf = (char*)u_memchr((UChar*)readptr, '\n', avail); } else { @@ -948,10 +906,10 @@ PHPAPI void *php_stream_locate_eol(php_stream *stream, zstr zbuf, int buf_len TS eol = lf; } } else if (stream->flags & PHP_STREAM_FLAG_EOL_MAC) { - eol = stream->input_encoding ? u_memchr((UChar*)readptr, '\r', avail) : memchr(readptr, '\r', avail); + eol = (stream->readbuf_type == IS_UNICODE) ? u_memchr((UChar*)readptr, '\r', avail) : memchr(readptr, '\r', avail); } else { /* unix (and dos) line endings */ - eol = stream->input_encoding ? u_memchr((UChar*)readptr, '\n', avail) : memchr(readptr, '\n', avail); + eol = (stream->readbuf_type == IS_UNICODE) ? u_memchr((UChar*)readptr, '\n', avail) : memchr(readptr, '\n', avail); } return (void*)eol; @@ -967,7 +925,7 @@ PHPAPI void *_php_stream_get_line(php_stream *stream, int buf_type, zstr buf, si size_t current_buf_size = 0; size_t total_copied = 0; int grow_mode = 0; - int is_unicode = php_stream_reads_unicode(stream); + int is_unicode = stream->readbuf_type == IS_UNICODE; int split_surrogate = 0; zstr bufstart = buf; @@ -1042,8 +1000,8 @@ PHPAPI void *_php_stream_get_line(php_stream *stream, int buf_type, zstr buf, si * than 8K, we waste 1 byte per additional 8K or so. * That seems acceptable to me, to avoid making this code * hard to follow */ - bufstart.s = erealloc(bufstart.s, PS_ULEN(stream->input_encoding, current_buf_size + cpysz + 1)); - buf.s = bufstart.s + PS_ULEN(stream->input_encoding, total_copied); + bufstart.s = erealloc(bufstart.s, PS_ULEN(stream->readbuf_type == IS_UNICODE, current_buf_size + cpysz + 1)); + buf.s = bufstart.s + PS_ULEN(stream->readbuf_type == IS_UNICODE, total_copied); current_buf_size += cpysz + 1; } else { if (cpysz >= maxlen - 1) { @@ -1177,7 +1135,7 @@ PHPAPI UChar *php_stream_get_record_unicode(php_stream *stream, size_t maxlen, s size_t toread; int skip = 0; - if (!php_stream_reads_unicode(stream)) { + if (stream->readbuf_type != IS_UNICODE) { return NULL; } @@ -1241,8 +1199,7 @@ PHPAPI UChar *php_stream_get_record_unicode(php_stream *stream, size_t maxlen, s /* Writes a buffer directly to a stream, using multiple of the chunk size */ static size_t _php_stream_write_buffer(php_stream *stream, int buf_type, zstr buf, int buflen TSRMLS_DC) { - size_t didwrite = 0, towrite, justwrote, shouldwrite, buflen_orig = buflen; - zstr buf_orig = buf; + size_t didwrite = 0, towrite, justwrote, shouldwrite; char *freeme = NULL; /* if we have a seekable stream we need to ensure that data is written at the @@ -1254,24 +1211,9 @@ static size_t _php_stream_write_buffer(php_stream *stream, int buf_type, zstr bu stream->ops->seek(stream, stream->position, SEEK_SET, &stream->position TSRMLS_CC); } - if (stream->output_encoding && buf_type == IS_UNICODE) { - char *dest; - int destlen, num_conv; - UErrorCode status = U_ZERO_ERROR; - - num_conv = zend_convert_from_unicode(stream->output_encoding, &dest, &destlen, buf.u, buflen, &status); - if (U_FAILURE(status)) { - int32_t offset = u_countChar32(buf.u, num_conv); - - zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", stream->output_encoding, ZEND_FROM_UNICODE, offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); - } - freeme = buf.s = dest; - buflen = destlen; - } else { - /* Sloppy handling, make it a binary buffer */ - if (buf_type != IS_STRING) { - buflen = UBYTES(buflen); - } + /* Sloppy handling, make it a binary buffer */ + if (buf_type == IS_UNICODE) { + buflen = UBYTES(buflen); } shouldwrite = buflen; @@ -1300,32 +1242,7 @@ static size_t _php_stream_write_buffer(php_stream *stream, int buf_type, zstr bu } } - - if (stream->output_encoding) { - /* Map didwrite back to the original character count */ - if (didwrite == shouldwrite) { - /* Everything wrote okay, no need to count */ - didwrite = buflen_orig; - } else { - UErrorCode status = U_ZERO_ERROR; - char *t = freeme; - const UChar *p = buf_orig.u; - - switch (ucnv_getType(stream->output_encoding)) { - case UCNV_SBCS: - case UCNV_LATIN_1: - case UCNV_US_ASCII: - /* 1:1 character->byte mapping, didwrite really does mean the number of characters written */ - break; - default: - /* Reconvert into junk buffer to see where conversion stops in source string */ - ucnv_resetFromUnicode(stream->output_encoding); - ucnv_fromUnicode(stream->output_encoding, &t, t + didwrite, &p, p + buflen_orig, NULL, TRUE, &status); - /* p stops at the first unconvertable UChar when t runs out of space */ - didwrite = p - buf_orig.u; - } - } - } else if (buf_type == IS_UNICODE) { + if (buf_type == IS_UNICODE) { /* Was slopily converted */ didwrite /= UBYTES(1); } @@ -2274,50 +2191,15 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio if (stream && strchr(implicit_mode, 't') && UG(unicode)) { if (strchr(implicit_mode, 'w') || strchr(implicit_mode, 'a') || strchr(implicit_mode, '+')) { char *encoding = (context && context->output_encoding) ? context->output_encoding : "utf8"; - UErrorCode status = U_ZERO_ERROR; - - stream->output_encoding = ucnv_open(encoding, &status); - if (U_FAILURE(status)) { - switch (status) { - case U_MEMORY_ALLOCATION_ERROR: - php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC, - "Unable to allocate memory for unicode output converter: %s", encoding); - break; - case U_FILE_ACCESS_ERROR: - php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC, - "Error loading unicode output converter: %s", encoding); - break; - default: - php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC, - "Unknown error starting unicode output converter: %s", encoding); - } - } else { - /* UTODO: (Maybe?) Allow overriding the default error handlers on a per-stream basis via context params */ - zend_set_converter_error_mode(stream->output_encoding, ZEND_FROM_UNICODE, UG(from_error_mode)); - zend_set_converter_subst_char(stream->output_encoding, UG(from_subst_char)); - } + + /* UTODO: (Maybe?) Allow overriding the default error handlers on a per-stream basis via context params */ + php_stream_encoding_apply(stream, 1, encoding, UG(from_error_mode), UG(from_subst_char)); } if (strchr(implicit_mode, 'r') || strchr(implicit_mode, '+')) { char *encoding = (context && context->input_encoding) ? context->input_encoding : "utf8"; - UErrorCode status = U_ZERO_ERROR; - - stream->input_encoding = ucnv_open(encoding, &status); - if (U_FAILURE(status)) { - switch (status) { - case U_MEMORY_ALLOCATION_ERROR: - php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC, - "Unable to allocate memory for unicode input converter: %s", encoding); - break; - case U_FILE_ACCESS_ERROR: - php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC, - "Error loading unicode input converter: %s", encoding); - break; - default: - php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC, - "Unknown error starting unicode input converter: %s", encoding); - } - } - /* UTODO: If/When Input error handling gets implemented, set the options on success */ + + /* UTODO: (Maybe?) Allow overriding the default error handlers on a per-stream basis via context params */ + php_stream_encoding_apply(stream, 0, encoding, UG(to_error_mode), NULL); } } @@ -2334,6 +2216,7 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio pefree(copy_of_path, persistent); } #endif + return stream; } /* }}} */ diff --git a/ext/unicode/unicode_filter.c b/main/streams/unicode_filter.c similarity index 85% rename from ext/unicode/unicode_filter.c rename to main/streams/unicode_filter.c index 0c5b025ec8..d54d8402e2 100644 --- a/ext/unicode/unicode_filter.c +++ b/main/streams/unicode_filter.c @@ -74,6 +74,7 @@ static php_stream_filter_status_t php_unicode_to_string_filter( destp = destbuf = (char *)pemalloc(destlen, data->is_persistent); ucnv_fromUnicode(data->conv, &destp, destbuf + destlen, (const UChar**)&src, src + remaining, NULL, FALSE, &errCode); + /* UTODO: Error catching */ new_bucket = php_stream_bucket_new(stream, destbuf, destp - destbuf, 1, data->is_persistent TSRMLS_CC); php_stream_bucket_append(buckets_out, new_bucket TSRMLS_CC); exit_status = PSFS_PASS_ON; @@ -88,6 +89,7 @@ static php_stream_filter_status_t php_unicode_to_string_filter( /* Spit it out! */ ucnv_fromUnicode(data->conv, &dest, destp, NULL, NULL, NULL, TRUE, &errCode); + /* UTODO: Error catching */ if (dest > d) { php_stream_bucket *bucket = php_stream_bucket_new(stream, d, dest - d, 0, 0 TSRMLS_CC); php_stream_bucket_append(buckets_out, bucket TSRMLS_CC); @@ -145,6 +147,7 @@ static php_stream_filter_status_t php_unicode_from_string_filter( destp = destbuf = (UChar *)pemalloc(destlen, data->is_persistent); ucnv_toUnicode(data->conv, &destp, (UChar*)((char*)destbuf + destlen), (const char**)&src, src + remaining, NULL, FALSE, &errCode); + /* UTODO: Error catching */ new_bucket = php_stream_bucket_new_unicode(stream, destbuf, destp - destbuf, 1, data->is_persistent TSRMLS_CC); php_stream_bucket_append(buckets_out, new_bucket TSRMLS_CC); @@ -160,6 +163,7 @@ static php_stream_filter_status_t php_unicode_from_string_filter( /* Spit it out! */ ucnv_toUnicode(data->conv, &dest, destp, NULL, NULL, NULL, TRUE, &errCode); + /* UTODO: Error catching */ if (dest > d) { php_stream_bucket *bucket = php_stream_bucket_new_unicode(stream, d, dest - d, 0, 0 TSRMLS_CC); php_stream_bucket_append(buckets_out, bucket TSRMLS_CC); @@ -220,21 +224,21 @@ static void php_unicode_filter_dtor(php_stream_filter *thisfilter TSRMLS_DC) } } -static php_stream_filter_ops php_unicode_to_string_filter_ops = { +php_stream_filter_ops php_unicode_to_string_filter_ops = { php_unicode_to_string_filter, php_unicode_filter_dtor, "unicode.to.*", PSFO_FLAG_ACCEPTS_UNICODE | PSFO_FLAG_OUTPUTS_STRING }; -static php_stream_filter_ops php_unicode_from_string_filter_ops = { +php_stream_filter_ops php_unicode_from_string_filter_ops = { php_unicode_from_string_filter, php_unicode_filter_dtor, "unicode.from.*", PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_UNICODE }; -static php_stream_filter_ops php_unicode_tidy_filter_ops = { +php_stream_filter_ops php_unicode_tidy_filter_ops = { php_unicode_tidy_filter, php_unicode_filter_dtor, "unicode.tidy.*", @@ -251,7 +255,10 @@ static php_stream_filter *php_unicode_filter_create(const char *filtername, zval const char *charset, *direction; php_stream_filter_ops *fops; UErrorCode ucnvError = U_ZERO_ERROR; + /* Note: from_error_mode means from unicode to charset. from filter means from charset to unicode */ + uint16_t err_mode = UG(from_error_mode); char to_unicode = 0; + zval **tmpzval; if (strncasecmp(filtername, "unicode.", sizeof("unicode.") - 1)) { /* Never happens */ @@ -264,8 +271,9 @@ static php_stream_filter *php_unicode_filter_create(const char *filtername, zval charset = direction + sizeof("to.") - 1; } else if (strncmp(direction, "from.", sizeof("from.") - 1) == 0) { fops = &php_unicode_from_string_filter_ops; - to_unicode = 1; charset = direction + sizeof("from.") - 1; + to_unicode = 1; + err_mode = UG(to_error_mode); } else if (strncmp(direction, "tidy.", sizeof("tidy.") - 1) == 0) { fops = &php_unicode_tidy_filter_ops; charset = direction + sizeof("tidy.") - 1; @@ -303,6 +311,46 @@ static php_stream_filter *php_unicode_filter_create(const char *filtername, zval return NULL; } + if (filterparams && + Z_TYPE_P(filterparams) == IS_ARRAY && + zend_hash_find(Z_ARRVAL_P(filterparams), "error_mode", sizeof("error_mode"), (void**)&tmpzval) == SUCCESS && + tmpzval && *tmpzval) { + if (Z_TYPE_PP(tmpzval) == IS_LONG) { + err_mode = Z_LVAL_PP(tmpzval); + } else { + zval copyval = **tmpzval; + zval_copy_ctor(©val); + convert_to_long(©val); + err_mode = Z_LVAL(copyval); + } + } + + zend_set_converter_error_mode(data->conv, to_unicode ? ZEND_TO_UNICODE : ZEND_FROM_UNICODE, err_mode); + if (!to_unicode) { + UChar *freeme = NULL; + UChar *subst_char = UG(from_subst_char); + + if (filterparams && + Z_TYPE_P(filterparams) == IS_ARRAY && + zend_hash_find(Z_ARRVAL_P(filterparams), "subst_char", sizeof("subst_char"), (void**)&tmpzval) == SUCCESS && + tmpzval && *tmpzval) { + if (Z_TYPE_PP(tmpzval) == IS_UNICODE) { + subst_char = Z_USTRVAL_PP(tmpzval); + } else { + zval copyval = **tmpzval; + zval_copy_ctor(©val); + convert_to_unicode(©val); + subst_char = freeme = Z_USTRVAL(copyval); + } + } + + zend_set_converter_subst_char(data->conv, subst_char); + + if (freeme) { + efree(freeme); + } + } + return php_stream_filter_alloc(fops, data, persistent); } diff --git a/win32/build/config.w32 b/win32/build/config.w32 index 2ccb05c866..9ae3bd6491 100644 --- a/win32/build/config.w32 +++ b/win32/build/config.w32 @@ -279,7 +279,7 @@ ADD_SOURCES("main", "main.c snprintf.c spprintf.c fopen_wrappers.c \ php_open_temporary_file.c php_logos.c output.c internal_functions.c php_sprintf.c"); ADD_SOURCES("main/streams", "streams.c cast.c memory.c filter.c plain_wrapper.c \ - userspace.c transports.c xp_socket.c mmap.c"); + userspace.c transports.c xp_socket.c mmap.c unicode_filter.c"); ADD_SOURCES("win32", "crypt_win32.c flock.c glob.c md5crypt.c pwd.c readdir.c \ registry.c select.c sendmail.c time.c wfile.c winutil.c wsyslog.c globals.c");