PHP_FE(stream_filter_prepend, NULL)
PHP_FE(stream_filter_append, NULL)
PHP_FE(stream_filter_remove, NULL)
+ PHP_FE(stream_encoding, NULL)
PHP_FE(stream_socket_client, second_and_third_args_force_ref)
PHP_FE(stream_socket_server, second_and_third_args_force_ref)
PHP_FE(stream_socket_accept, third_arg_force_ref)
php_stream_from_zval(stream, &zstream);
- buf.v = php_stream_get_line_ex(stream, php_stream_reads_unicode(stream) ? IS_UNICODE : IS_STRING, NULL_ZSTR, 0, length, &retlen);
+ buf.v = php_stream_get_line_ex(stream, stream->readbuf_type, NULL_ZSTR, 0, length, &retlen);
if (!buf.v) {
RETURN_FALSE;
}
- if (php_stream_reads_unicode(stream)) {
+ if (stream->readbuf_type == IS_UNICODE) {
RETURN_UNICODEL(buf.u, retlen, 0);
- } else {
+ } else { /* IS_STRING */
RETURN_STRINGL(buf.s, retlen, 0);
}
}
PHP_STREAM_TO_ZVAL(stream, arg1);
- if (php_stream_reads_unicode(stream)) {
+ if (stream->readbuf_type == IS_UNICODE) {
int buflen = 1;
UChar *buf = php_stream_read_unicode_chars(stream, &buflen);
RETURN_FALSE;
}
RETURN_UNICODEL(buf, buflen, 0);
- } else {
+ } else { /* IS_STRING */
char buf[2];
buf[0] = php_stream_getc(stream);
php_stream_from_zval(stream, &zstream);
- if (php_stream_reads_unicode(stream)) {
+ if (stream->readbuf_type == IS_UNICODE) {
UChar *buf = php_stream_get_line_ex(stream, IS_UNICODE, NULL_ZSTR, 0, length, &retlen);
UChar *allowed = NULL;
int allowed_len = 0;
retlen = php_u_strip_tags(buf, retlen, &stream->fgetss_state, allowed, allowed_len TSRMLS_CC);
RETURN_UNICODEL(buf, retlen, 0);
- } else {
+ } else { /* IS_STRING */
char *buf = php_stream_get_line_ex(stream, IS_STRING, NULL_ZSTR, 0, length, &retlen);
char *allowed = NULL;
int allowed_len = 0;
RETURN_FALSE;
}
- if (php_stream_reads_unicode(stream)) {
+ if (stream->readbuf_type == IS_UNICODE) {
int buflen = len;
UChar *buf = php_stream_read_unicode_chars(stream, &buflen);
}
RETURN_UNICODEL(buf, buflen, 0);
- } else {
+ } else { /* IS_STRING */
char *buf = emalloc(len + 1);
int buflen = php_stream_read(stream, buf, len);
add_assoc_zval(return_value, "write_filters", newval);
}
- if (php_stream_reads_unicode(stream)) {
+ if (stream->readbuf_type == IS_UNICODE) {
int readbuf_len = u_countChar32(stream->readbuf.u + stream->readpos, stream->writepos - stream->readpos);
add_assoc_long(return_value, "unread_bytes", UBYTES(stream->writepos - stream->readpos));
add_assoc_long(return_value, "unread_chars", readbuf_len);
- } else {
+ } else { /* IS_STRING */
add_assoc_long(return_value, "unread_bytes", stream->writepos - stream->readpos);
add_assoc_long(return_value, "unread_chars", stream->writepos - stream->readpos);
}
php_stream_from_zval(stream, &zstream);
- if (php_stream_reads_unicode(stream)) {
+ if (stream->readbuf_type == IS_UNICODE) {
UChar *buf;
UChar *d = NULL;
int dlen = 0;
}
RETURN_UNICODEL(buf, buf_size, 0);
- } else {
+ } else { /* IS_STRING */
char *buf;
char *d = NULL;
int dlen = 0;
}
/* }}} */
+/* {{{ proto void stream_encoding(resource stream[, string encoding])
+Set character set for stream encoding
+UTODO: Return current encoding charset
+*/
+PHP_FUNCTION(stream_encoding)
+{
+ zval *zstream;
+ php_stream *stream;
+ char *encoding = NULL;
+ int encoding_len = 0;
+ int remove_read_tail = 0, remove_write_tail = 0;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "r|s", &zstream, &encoding, &encoding_len) == FAILURE) {
+ return;
+ }
+
+ php_stream_from_zval(stream, &zstream);
+
+ /* Double check that the target encoding is legal before attempting anything */
+
+ if (stream->readfilters.tail) {
+ if (stream->readfilters.tail->fops == &php_unicode_from_string_filter_ops) {
+ /* Remove the current unicode.from.* filter,
+ the filter layer will transcode anything in the read buffer back to binary
+ or invalidate the read buffer */
+ remove_read_tail = 1;
+ } else if (stream->readbuf_type == IS_UNICODE) {
+ /* There's an encoding on the stream already, but then there's filtering happening after that point
+ It's asking too much for PHP to figure out what the user wants, throw an error back in their face */
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot change encoding on filtered stream");
+ RETURN_FALSE;
+ }
+ }
+
+ if (stream->writefilters.tail) {
+ if (stream->writefilters.tail->fops == &php_unicode_to_string_filter_ops) {
+ /* Remove the current unicode.to.* filter */
+ remove_write_tail = 1;
+ } else if ((stream->writefilters.tail->fops->flags & PSFO_FLAG_OUTPUTS_UNICODE) == 0) {
+ /* conversion to binary is happening, them another filter is doing something
+ bailout for same reason as read filters */
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot change encoding on filtered stream");
+ RETURN_FALSE;
+ }
+ }
+
+ if (remove_read_tail) {
+ php_stream_filter_remove(stream->readfilters.tail, 1 TSRMLS_CC);
+ }
+ if (remove_write_tail) {
+ php_stream_filter_remove(stream->writefilters.tail, 1 TSRMLS_CC);
+ }
+
+ /* UTODO: Allow overriding error handling for converters */
+ php_stream_encoding_apply(stream, 1, encoding, UG(from_error_mode), UG(from_subst_char));
+ php_stream_encoding_apply(stream, 0, encoding, UG(to_error_mode), NULL);
+
+ RETURN_TRUE;
+}
+/* }}} */
+
/*
* Local variables:
* tab-width: 4
PHP_FUNCTION(stream_filter_prepend);
PHP_FUNCTION(stream_filter_append);
PHP_FUNCTION(stream_filter_remove);
+PHP_FUNCTION(stream_encoding);
PHP_FUNCTION(stream_socket_enable_crypto);
PHP_FUNCTION(stream_socket_pair);
PHP_SUBST(UNICODE_SHARED_LIBADD)
AC_DEFINE(HAVE_UNICODE, 1, [ ])
-PHP_NEW_EXTENSION(unicode, unicode.c locale.c unicode_filter.c unicode_iterators.c collator.c, $ext_shared)
+PHP_NEW_EXTENSION(unicode, unicode.c locale.c unicode_iterators.c collator.c, $ext_shared)
// $Id$
// vim:ft=javascript
-EXTENSION("unicode", "unicode.c unicode_filter.c unicode_iterators.c collator.c locale.c");
+EXTENSION("unicode", "unicode.c unicode_iterators.c collator.c locale.c");
AC_DEFINE('HAVE_UNICODE', 1, 'ICU API extension');
PHP_METHOD(collator, __construct);
void php_init_collation(TSRMLS_D);
-extern php_stream_filter_factory php_unicode_filter_factory;
#ifdef __cplusplus
} // extern "C"
/* {{{ PHP_MINIT_FUNCTION */
PHP_MINIT_FUNCTION(unicode)
{
- if (php_stream_filter_register_factory("unicode.*", &php_unicode_filter_factory TSRMLS_CC) == FAILURE) {
- return FAILURE;
- }
-
php_register_unicode_iterators(TSRMLS_C);
php_init_collation(TSRMLS_C);
/* {{{ PHP_MSHUTDOWN_FUNCTION */
PHP_MSHUTDOWN_FUNCTION(unicode)
{
- if (php_stream_filter_unregister_factory("unicode.*" TSRMLS_CC) == FAILURE) {
- return FAILURE;
- }
/* add your stuff here */
return FAILURE;
}
+ /* Initialize unicode filters */
+ if (php_stream_filter_register_factory("unicode.*", &php_unicode_filter_factory TSRMLS_CC) == FAILURE) {
+ php_printf("PHP: Unable to initialize unicode stream filters.\n");
+ return FAILURE;
+ }
+
/* initialize registry for images to be used in phpinfo()
(this uses configuration parameters from php.ini)
*/
zend_shutdown(TSRMLS_C);
+ /* Destroys filter & transport registries too */
php_shutdown_stream_wrappers(module_number TSRMLS_CC);
php_shutdown_info_logos();
php_stream_context *context;
int flags; /* PHP_STREAM_FLAG_XXX */
- /* unicode */
- UConverter *input_encoding;
- UConverter *output_encoding;
-
/* buffer */
off_t position; /* of underlying stream */
+ zend_uchar readbuf_type;
zstr readbuf; /* readbuf.s or readbuf.u */
size_t readbuflen; /* Length in units (char or UChar) */
off_t readpos; /* Position in units (char or UChar) */
#define php_stream_from_zval_no_verify(xstr, ppzval) (xstr) = (php_stream*)zend_fetch_resource((ppzval) TSRMLS_CC, -1, "stream", NULL, 2, php_file_le_stream(), php_file_le_pstream())
#define PS_ULEN(is_unicode, len) ((is_unicode) ? UBYTES(len) : (len))
-#define php_stream_reads_unicode(stream) ((stream->input_encoding) ? 1 : 0)
-#define php_stream_writes_unicode(stream) ((stream->output_encoding) ? 1 : 0)
BEGIN_EXTERN_C()
PHPAPI int php_stream_from_persistent_id(const char *persistent_id, php_stream **stream TSRMLS_DC);
chain->tail = filter;
filter->chain = chain;
- if (&(stream->readfilters) == chain && (stream->writepos - stream->readpos) > 0) {
+ if (&(stream->readfilters) == chain) {
/* Let's going ahead and wind anything in the buffer through this filter */
php_stream_bucket_brigade brig_in = { NULL, NULL }, brig_out = { NULL, NULL };
php_stream_bucket_brigade *brig_inp = &brig_in, *brig_outp = &brig_out;
- php_stream_filter_status_t status;
+ php_stream_filter_status_t status = PSFS_FEED_ME;
php_stream_bucket *bucket;
size_t consumed = 0;
- if (stream->input_encoding) {
- bucket = php_stream_bucket_new_unicode(stream, stream->readbuf.u + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC);
- } else {
- bucket = php_stream_bucket_new(stream, stream->readbuf.s + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC);
- }
- php_stream_bucket_append(brig_inp, bucket TSRMLS_CC);
- status = filter->fops->filter(stream, filter, brig_inp, brig_outp, &consumed, PSFS_FLAG_NORMAL TSRMLS_CC);
+ if ((stream->writepos - stream->readpos) > 0) {
+ if (stream->readbuf_type == IS_UNICODE) {
+ bucket = php_stream_bucket_new_unicode(stream, stream->readbuf.u + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC);
+ } else {
+ bucket = php_stream_bucket_new(stream, stream->readbuf.s + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC);
+ }
+ php_stream_bucket_append(brig_inp, bucket TSRMLS_CC);
+ status = filter->fops->filter(stream, filter, brig_inp, brig_outp, &consumed, PSFS_FLAG_NORMAL TSRMLS_CC);
- if (stream->readpos + consumed > stream->writepos || consumed < 0) {
- /* No behaving filter should cause this. */
- status = PSFS_ERR_FATAL;
+ if (stream->readpos + consumed > stream->writepos || consumed < 0) {
+ /* No behaving filter should cause this. */
+ status = PSFS_ERR_FATAL;
+ }
}
- switch (status) {
- case PSFS_ERR_FATAL:
- /* If this first cycle simply fails then there's something wrong with the filter.
- Pull the filter off the chain and leave the read buffer alone. */
- if (chain->head == filter) {
- chain->head = NULL;
- chain->tail = NULL;
- } else {
- filter->prev->next = NULL;
- chain->tail = filter->prev;
- }
- php_stream_bucket_unlink(bucket TSRMLS_CC);
- php_stream_bucket_delref(bucket TSRMLS_CC);
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "Filter failed to process pre-buffered data. Not adding to filterchain.");
- break;
- case PSFS_FEED_ME:
+ if (status == PSFS_ERR_FATAL) {
+ /* If this first cycle simply fails then there's something wrong with the filter.
+ Pull the filter off the chain and leave the read buffer alone. */
+ if (chain->head == filter) {
+ chain->head = NULL;
+ chain->tail = NULL;
+ } else {
+ filter->prev->next = NULL;
+ chain->tail = filter->prev;
+ }
+ php_stream_bucket_unlink(bucket TSRMLS_CC);
+ php_stream_bucket_delref(bucket TSRMLS_CC);
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Filter failed to process pre-buffered data. Not adding to filterchain.");
+ } else {
+ /* This filter addition may change the readbuffer type.
+ Since all the previously held data is in the bucket brigade,
+ we can reappropriate the buffer that already exists (if one does) */
+ if (stream->readbuf_type == IS_UNICODE && (filter->fops->flags & PSFO_FLAG_OUTPUTS_UNICODE) == 0) {
+ /* Buffer is currently based on unicode characters, but filter only outputs STRING adjust counting */
+ stream->readbuf_type = IS_STRING;
+ stream->readbuflen *= UBYTES(1);
+ } else if (stream->readbuf_type == IS_STRING && (filter->fops->flags & PSFO_FLAG_OUTPUTS_STRING) == 0) {
+ /* Buffer is currently based on binary characters, but filter only outputs UNICODE adjust counting */
+ stream->readbuf_type = IS_UNICODE;
+ stream->readbuflen /= UBYTES(1);
+ }
+
+ if (status == PSFS_FEED_ME) {
/* We don't actually need data yet,
leave this filter in a feed me state until data is needed.
Reset stream's internal read buffer since the filter is "holding" it. */
stream->readpos = 0;
stream->writepos = 0;
- break;
- case PSFS_PASS_ON:
+ } else if (status == PSFS_PASS_ON) {
/* Put any filtered data onto the readbuffer stack.
Previously read data has been at least partially consumed. */
stream->readpos += consumed;
bucket = brig_outp->head;
/* Convert for stream type */
- if (bucket->buf_type != IS_UNICODE && stream->input_encoding) {
- /* Stream expects unicode, convert using stream encoding */
- php_stream_bucket_convert(bucket, IS_UNICODE, stream->input_encoding);
- } else if (bucket->buf_type == IS_UNICODE && !stream->input_encoding) {
- /* Stream expects binary, filter provided unicode, just take the buffer as is */
- php_stream_bucket_convert_notranscode(bucket, IS_STRING);
+ if (bucket->buf_type != stream->readbuf_type) {
+ /* Stream expects different type than bucket contains, convert slopily */
+ php_stream_bucket_convert_notranscode(bucket, stream->readbuf_type);
}
/* Grow buffer to hold this bucket if need be.
TODO: See warning in main/stream/streams.c::php_stream_fill_read_buffer */
if (stream->readbuflen - stream->writepos < bucket->buflen) {
stream->readbuflen += bucket->buflen;
- stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->input_encoding, stream->readbuflen), stream->is_persistent);
+ stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->readbuflen), stream->is_persistent);
}
/* Append to readbuf */
- if (stream->input_encoding) {
+ if (stream->readbuf_type == IS_UNICODE) {
memcpy(stream->readbuf.u + stream->writepos, bucket->buf.u, UBYTES(bucket->buflen));
} else {
memcpy(stream->readbuf.s + stream->writepos, bucket->buf.s, bucket->buflen);
php_stream_bucket_unlink(bucket TSRMLS_CC);
php_stream_bucket_delref(bucket TSRMLS_CC);
}
- break;
+ }
}
-
- }
+ } /* end of readfilters specific code */
}
PHPAPI int _php_stream_filter_check_chain(php_stream_filter_chain *chain TSRMLS_DC)
/* Dump any newly flushed data to the read buffer */
if (stream->readpos > stream->chunk_size) {
/* Back the buffer up */
- memcpy(stream->readbuf.s, stream->readbuf.s + PS_ULEN(stream->input_encoding, stream->readpos), PS_ULEN(stream->input_encoding, stream->writepos - stream->readpos));
+ memcpy(stream->readbuf.s, stream->readbuf.s + PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->readpos), PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->writepos - stream->readpos));
stream->writepos -= stream->readpos;
stream->readpos = 0;
}
if (flushed_size > (stream->readbuflen - stream->writepos)) {
/* Grow the buffer */
- stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->input_encoding, stream->writepos + flushed_size + stream->chunk_size), stream->is_persistent);
+ stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->writepos + flushed_size + stream->chunk_size), stream->is_persistent);
}
while ((bucket = inp->head)) {
/* Convert if necessary */
- if (bucket->buf_type != IS_UNICODE && stream->input_encoding) {
- /* Stream expects unicode, convert using stream encoding */
- php_stream_bucket_convert(bucket, IS_UNICODE, stream->input_encoding);
- } else if (bucket->buf_type == IS_UNICODE && !stream->input_encoding) {
- /* Stream expects binary, filter provided unicode, just take the buffer as is */
- php_stream_bucket_convert_notranscode(bucket, IS_STRING);
+ if (bucket->buf_type != stream->readbuf_type) {
+ /* Stream expects different type than what's in bucket, convert slopily */
+ php_stream_bucket_convert_notranscode(bucket, stream->readbuf_type);
}
/* Append to readbuf */
- if (stream->input_encoding) {
+ if (stream->readbuf_type == IS_UNICODE) {
memcpy(stream->readbuf.u + stream->writepos, bucket->buf.u, UBYTES(bucket->buflen));
} else {
memcpy(stream->readbuf.s + stream->writepos, bucket->buf.s, bucket->buflen);
while ((bucket = inp->head)) {
/* Convert if necessary */
if (bucket->buf_type == IS_UNICODE) {
- if (stream->output_encoding) {
- /* Stream has a configured output encoding, convert to appropriate type */
- php_stream_bucket_convert(bucket, IS_STRING, stream->output_encoding);
- } else {
- /* Stream is binary, write ugly UChars as is */
- php_stream_bucket_convert_notranscode(bucket, IS_STRING);
- }
+ /* Force data to binary, adjusting buflen */
+ php_stream_bucket_convert_notranscode(bucket, IS_STRING);
}
/* Must be binary by this point */
PHPAPI php_stream_filter *php_stream_filter_remove(php_stream_filter *filter, int call_dtor TSRMLS_DC)
{
+ /* UTODO: Figure out a sane way to "defilter" so that unicode converters can be swapped around
+ For now, at least fopen(,'b') + stream_encoding($fp, 'charset') works since there's nothing to remove */
+
if (filter->prev) {
filter->prev->next = filter->next;
} else {
return FAILURE;
}
+PHPAPI int _php_stream_encoding_apply(php_stream *stream, int writechain, const char *encoding, uint16_t error_mode, UChar *subst TSRMLS_DC)
+{
+ int encoding_len = strlen(encoding);
+ int buflen = sizeof("unicode.from.") + encoding_len - 1; /* might be "to", but "from" is long enough for both */
+ char *buf = emalloc(buflen + 1);
+ php_stream_filter *filter;
+ zval *filterparams;
+
+ if (writechain) {
+ memcpy(buf, "unicode.to.", sizeof("unicode.to.") - 1);
+ memcpy(buf + sizeof("unicode.to.") - 1, encoding, encoding_len + 1);
+ } else {
+ memcpy(buf, "unicode.from.", sizeof("unicode.from.") - 1);
+ memcpy(buf + sizeof("unicode.from.") - 1, encoding, encoding_len + 1);
+ }
+
+ ALLOC_INIT_ZVAL(filterparams);
+ array_init(filterparams);
+ add_assoc_long(filterparams, "error_mode", error_mode);
+ if (subst) {
+ add_assoc_unicode(filterparams, "subst_char", subst, 1);
+ }
+ filter = php_stream_filter_create(buf, filterparams, php_stream_is_persistent(stream) TSRMLS_CC);
+ efree(buf);
+ zval_ptr_dtor(&filterparams);
+
+ if (!filter) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to apply encoding for charset: %s\n", encoding);
+ return FAILURE;
+ }
+
+ php_stream_filter_append(writechain ? &stream->writefilters : &stream->readfilters, filter);
+
+ return SUCCESS;
+}
+
/*
* Local variables:
* tab-width: 4
PHPAPI php_stream_filter *php_stream_filter_remove(php_stream_filter *filter, int call_dtor TSRMLS_DC);
PHPAPI void php_stream_filter_free(php_stream_filter *filter TSRMLS_DC);
PHPAPI php_stream_filter *_php_stream_filter_alloc(php_stream_filter_ops *fops, void *abstract, int persistent STREAMS_DC TSRMLS_DC);
+PHPAPI int _php_stream_encoding_apply(php_stream *stream, int writechain, const char *encoding, uint16_t error_mode, UChar *subst TSRMLS_DC);
END_EXTERN_C()
#define php_stream_filter_alloc(fops, thisptr, persistent) _php_stream_filter_alloc((fops), (thisptr), (persistent) STREAMS_CC TSRMLS_CC)
#define php_stream_filter_alloc_rel(fops, thisptr, persistent) _php_stream_filter_alloc((fops), (thisptr), (persistent) STREAMS_REL_CC TSRMLS_CC)
#define php_stream_filter_flush(filter, finish) _php_stream_filter_flush((filter), (finish) TSRMLS_CC)
#define php_stream_filter_check_chain(chain) _php_stream_filter_check_chain((chain) TSRMLS_CC)
#define php_stream_filter_output_prefer_unicode(filter) _php_stream_filter_output_prefer_unicode((filter) TSRMLS_CC)
+#define php_stream_encoding_apply(stream, writechain, encoding, error_mode, subst) \
+ _php_stream_encoding_apply((stream), (writechain), (encoding), (error_mode), (subst) TSRMLS_CC)
#define php_stream_is_filtered(stream) ((stream)->readfilters.head || (stream)->writefilters.head)
PHPAPI php_stream_filter *php_stream_filter_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC);
END_EXTERN_C()
+/* unicode_filter.c exports */
+extern php_stream_filter_ops php_unicode_to_string_filter_ops;
+extern php_stream_filter_ops php_unicode_from_string_filter_ops;
+extern php_stream_filter_ops php_unicode_tidy_filter_ops;
+extern php_stream_filter_factory php_unicode_filter_factory;
+
/*
* Local variables:
* tab-width: 4
ret->abstract = abstract;
ret->is_persistent = persistent_id ? 1 : 0;
ret->chunk_size = FG(def_chunk_size);
+ ret->readbuf_type = IS_STRING;
if (FG(auto_detect_line_endings)) {
ret->flags |= PHP_STREAM_FLAG_DETECT_EOL;
* stream read buffer */
while (brig_inp->head) {
bucket = brig_inp->head;
- if (bucket->buf_type != IS_UNICODE && stream->input_encoding) {
- /* Stream expects unicode, convert using stream encoding */
- php_stream_bucket_convert(bucket, IS_UNICODE, stream->input_encoding);
- } else if (bucket->buf_type == IS_UNICODE && !stream->input_encoding) {
- /* Stream expects binary, filter provided unicode, just take the buffer as is */
- php_stream_bucket_convert_notranscode(bucket, IS_STRING);
+ if (bucket->buf_type != stream->readbuf_type) {
+ /* Stream expects different datatype than bucket has, convert slopily */
+ php_stream_bucket_convert_notranscode(bucket, stream->readbuf_type);
}
/* Bucket type now matches stream type */
* TODO: this can fail for persistent streams */
if (stream->readbuflen - stream->writepos < bucket->buflen) {
stream->readbuflen += bucket->buflen;
- stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->input_encoding, stream->readbuflen), stream->is_persistent);
+ stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->readbuflen), stream->is_persistent);
}
- memcpy(stream->readbuf.s + stream->writepos, bucket->buf.s, PS_ULEN(stream->input_encoding, bucket->buflen));
+ memcpy(stream->readbuf.s + stream->writepos, bucket->buf.s, PS_ULEN(stream->readbuf_type == IS_UNICODE, bucket->buflen));
stream->writepos += bucket->buflen;
php_stream_bucket_unlink(bucket TSRMLS_CC);
}
efree(chunk_buf);
- } else if (stream->input_encoding) { /* Unfiltered Unicode stream */
- /* is there enough data in the buffer ? */
- if (stream->writepos - stream->readpos < (off_t)size) {
- char *binbuf;
- UChar *ubuf;
- int binbuf_len, ubuf_len;
- size_t toread = (size > stream->chunk_size) ? size : stream->chunk_size;
- UErrorCode status = U_ZERO_ERROR;
-
- /* Read stream data into temporary buffer, then convert to unicode
- TODO: This can be improved */
- binbuf = emalloc(toread + 1);
- binbuf_len = stream->ops->read(stream, binbuf, toread TSRMLS_CC);
- if (binbuf_len == (size_t)-1) {
- /* Failure */
- efree(binbuf);
- return;
- }
- /* Convert to unicode */
- zend_convert_to_unicode(stream->input_encoding, &ubuf, &ubuf_len, binbuf, binbuf_len, &status);
- efree(binbuf);
-
- /* reduce buffer memory consumption if possible, to avoid a realloc */
- if (stream->readbuf.u && stream->readbuflen - stream->writepos < stream->chunk_size) {
- memmove(stream->readbuf.u, stream->readbuf.u + stream->readpos, UBYTES(stream->readbuflen - stream->readpos));
- stream->writepos -= stream->readpos;
- stream->readpos = 0;
- }
-
- /* grow the buffer if required
- * TODO: this can fail for persistent streams */
- if (stream->readbuflen - stream->writepos < ubuf_len) {
- stream->readbuflen += ((stream->chunk_size > ubuf_len) ? stream->chunk_size : ubuf_len);
- stream->readbuf.u = (UChar*)perealloc(stream->readbuf.u, UBYTES(stream->readbuflen), stream->is_persistent);
- }
-
- memcpy(stream->readbuf.u + stream->writepos, ubuf, UBYTES(ubuf_len));
- efree(ubuf);
- stream->writepos += ubuf_len;
- }
} else { /* Unfiltered Binary stream */
/* is there enough data in the buffer ? */
if (stream->writepos - stream->readpos < (off_t)size) {
* drain the remainder of the buffer before using the "raw" read mode for
* the excess */
if (stream->writepos - stream->readpos > 0) {
- toread = PS_ULEN(stream->input_encoding, stream->writepos - stream->readpos);
+ toread = PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->writepos - stream->readpos);
if (toread > size) {
toread = size;
}
- if (stream->input_encoding) {
+ if (stream->readbuf_type == IS_UNICODE) {
/* Sloppy read, anyone using php_stream_read() on a unicode stream
* had better know what they're doing */
}
if (toread > 0) {
- if (php_stream_reads_unicode(stream)) {
+ if (stream->readbuf_type == IS_UNICODE) {
/* Sloppy read, anyone using php_stream_read() on a unicode stream
* had better know what they're doing */
{
size_t toread = 0, didread = 0, string_length = 0;
- if (!stream->input_encoding) {
+ if (stream->readbuf_type != IS_UNICODE) {
return -1;
}
int buflen = size;
size_t toread = 0, didread = 0, string_length = 0;
- if (!stream->input_encoding) {
+ if (stream->readbuf_type != IS_UNICODE) {
return NULL;
}
char *readptr, *buf = zbuf.s;
if (!buf) {
- readptr = stream->readbuf.s + PS_ULEN(stream->input_encoding, stream->readpos);
+ readptr = stream->readbuf.s + PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->readpos);
avail = stream->writepos - stream->readpos;
} else {
readptr = zbuf.s;
}
if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL) {
- if (stream->input_encoding) {
+ if (stream->readbuf_type == IS_UNICODE) {
cr = (char*)u_memchr((UChar*)readptr, '\r', avail);
lf = (char*)u_memchr((UChar*)readptr, '\n', avail);
} else {
eol = lf;
}
} else if (stream->flags & PHP_STREAM_FLAG_EOL_MAC) {
- eol = stream->input_encoding ? u_memchr((UChar*)readptr, '\r', avail) : memchr(readptr, '\r', avail);
+ eol = (stream->readbuf_type == IS_UNICODE) ? u_memchr((UChar*)readptr, '\r', avail) : memchr(readptr, '\r', avail);
} else {
/* unix (and dos) line endings */
- eol = stream->input_encoding ? u_memchr((UChar*)readptr, '\n', avail) : memchr(readptr, '\n', avail);
+ eol = (stream->readbuf_type == IS_UNICODE) ? u_memchr((UChar*)readptr, '\n', avail) : memchr(readptr, '\n', avail);
}
return (void*)eol;
size_t current_buf_size = 0;
size_t total_copied = 0;
int grow_mode = 0;
- int is_unicode = php_stream_reads_unicode(stream);
+ int is_unicode = stream->readbuf_type == IS_UNICODE;
int split_surrogate = 0;
zstr bufstart = buf;
* than 8K, we waste 1 byte per additional 8K or so.
* That seems acceptable to me, to avoid making this code
* hard to follow */
- bufstart.s = erealloc(bufstart.s, PS_ULEN(stream->input_encoding, current_buf_size + cpysz + 1));
- buf.s = bufstart.s + PS_ULEN(stream->input_encoding, total_copied);
+ bufstart.s = erealloc(bufstart.s, PS_ULEN(stream->readbuf_type == IS_UNICODE, current_buf_size + cpysz + 1));
+ buf.s = bufstart.s + PS_ULEN(stream->readbuf_type == IS_UNICODE, total_copied);
current_buf_size += cpysz + 1;
} else {
if (cpysz >= maxlen - 1) {
size_t toread;
int skip = 0;
- if (!php_stream_reads_unicode(stream)) {
+ if (stream->readbuf_type != IS_UNICODE) {
return NULL;
}
/* Writes a buffer directly to a stream, using multiple of the chunk size */
static size_t _php_stream_write_buffer(php_stream *stream, int buf_type, zstr buf, int buflen TSRMLS_DC)
{
- size_t didwrite = 0, towrite, justwrote, shouldwrite, buflen_orig = buflen;
- zstr buf_orig = buf;
+ size_t didwrite = 0, towrite, justwrote, shouldwrite;
char *freeme = NULL;
/* if we have a seekable stream we need to ensure that data is written at the
stream->ops->seek(stream, stream->position, SEEK_SET, &stream->position TSRMLS_CC);
}
- if (stream->output_encoding && buf_type == IS_UNICODE) {
- char *dest;
- int destlen, num_conv;
- UErrorCode status = U_ZERO_ERROR;
-
- num_conv = zend_convert_from_unicode(stream->output_encoding, &dest, &destlen, buf.u, buflen, &status);
- if (U_FAILURE(status)) {
- int32_t offset = u_countChar32(buf.u, num_conv);
-
- zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", stream->output_encoding, ZEND_FROM_UNICODE, offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
- }
- freeme = buf.s = dest;
- buflen = destlen;
- } else {
- /* Sloppy handling, make it a binary buffer */
- if (buf_type != IS_STRING) {
- buflen = UBYTES(buflen);
- }
+ /* Sloppy handling, make it a binary buffer */
+ if (buf_type == IS_UNICODE) {
+ buflen = UBYTES(buflen);
}
shouldwrite = buflen;
}
}
-
- if (stream->output_encoding) {
- /* Map didwrite back to the original character count */
- if (didwrite == shouldwrite) {
- /* Everything wrote okay, no need to count */
- didwrite = buflen_orig;
- } else {
- UErrorCode status = U_ZERO_ERROR;
- char *t = freeme;
- const UChar *p = buf_orig.u;
-
- switch (ucnv_getType(stream->output_encoding)) {
- case UCNV_SBCS:
- case UCNV_LATIN_1:
- case UCNV_US_ASCII:
- /* 1:1 character->byte mapping, didwrite really does mean the number of characters written */
- break;
- default:
- /* Reconvert into junk buffer to see where conversion stops in source string */
- ucnv_resetFromUnicode(stream->output_encoding);
- ucnv_fromUnicode(stream->output_encoding, &t, t + didwrite, &p, p + buflen_orig, NULL, TRUE, &status);
- /* p stops at the first unconvertable UChar when t runs out of space */
- didwrite = p - buf_orig.u;
- }
- }
- } else if (buf_type == IS_UNICODE) {
+ if (buf_type == IS_UNICODE) {
/* Was slopily converted */
didwrite /= UBYTES(1);
}
if (stream && strchr(implicit_mode, 't') && UG(unicode)) {
if (strchr(implicit_mode, 'w') || strchr(implicit_mode, 'a') || strchr(implicit_mode, '+')) {
char *encoding = (context && context->output_encoding) ? context->output_encoding : "utf8";
- UErrorCode status = U_ZERO_ERROR;
-
- stream->output_encoding = ucnv_open(encoding, &status);
- if (U_FAILURE(status)) {
- switch (status) {
- case U_MEMORY_ALLOCATION_ERROR:
- php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
- "Unable to allocate memory for unicode output converter: %s", encoding);
- break;
- case U_FILE_ACCESS_ERROR:
- php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
- "Error loading unicode output converter: %s", encoding);
- break;
- default:
- php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
- "Unknown error starting unicode output converter: %s", encoding);
- }
- } else {
- /* UTODO: (Maybe?) Allow overriding the default error handlers on a per-stream basis via context params */
- zend_set_converter_error_mode(stream->output_encoding, ZEND_FROM_UNICODE, UG(from_error_mode));
- zend_set_converter_subst_char(stream->output_encoding, UG(from_subst_char));
- }
+
+ /* UTODO: (Maybe?) Allow overriding the default error handlers on a per-stream basis via context params */
+ php_stream_encoding_apply(stream, 1, encoding, UG(from_error_mode), UG(from_subst_char));
}
if (strchr(implicit_mode, 'r') || strchr(implicit_mode, '+')) {
char *encoding = (context && context->input_encoding) ? context->input_encoding : "utf8";
- UErrorCode status = U_ZERO_ERROR;
-
- stream->input_encoding = ucnv_open(encoding, &status);
- if (U_FAILURE(status)) {
- switch (status) {
- case U_MEMORY_ALLOCATION_ERROR:
- php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
- "Unable to allocate memory for unicode input converter: %s", encoding);
- break;
- case U_FILE_ACCESS_ERROR:
- php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
- "Error loading unicode input converter: %s", encoding);
- break;
- default:
- php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
- "Unknown error starting unicode input converter: %s", encoding);
- }
- }
- /* UTODO: If/When Input error handling gets implemented, set the options on success */
+
+ /* UTODO: (Maybe?) Allow overriding the default error handlers on a per-stream basis via context params */
+ php_stream_encoding_apply(stream, 0, encoding, UG(to_error_mode), NULL);
}
}
pefree(copy_of_path, persistent);
}
#endif
+
return stream;
}
/* }}} */
destp = destbuf = (char *)pemalloc(destlen, data->is_persistent);
ucnv_fromUnicode(data->conv, &destp, destbuf + destlen, (const UChar**)&src, src + remaining, NULL, FALSE, &errCode);
+ /* UTODO: Error catching */
new_bucket = php_stream_bucket_new(stream, destbuf, destp - destbuf, 1, data->is_persistent TSRMLS_CC);
php_stream_bucket_append(buckets_out, new_bucket TSRMLS_CC);
exit_status = PSFS_PASS_ON;
/* Spit it out! */
ucnv_fromUnicode(data->conv, &dest, destp, NULL, NULL, NULL, TRUE, &errCode);
+ /* UTODO: Error catching */
if (dest > d) {
php_stream_bucket *bucket = php_stream_bucket_new(stream, d, dest - d, 0, 0 TSRMLS_CC);
php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
destp = destbuf = (UChar *)pemalloc(destlen, data->is_persistent);
ucnv_toUnicode(data->conv, &destp, (UChar*)((char*)destbuf + destlen), (const char**)&src, src + remaining, NULL, FALSE, &errCode);
+ /* UTODO: Error catching */
new_bucket = php_stream_bucket_new_unicode(stream, destbuf, destp - destbuf, 1, data->is_persistent TSRMLS_CC);
php_stream_bucket_append(buckets_out, new_bucket TSRMLS_CC);
/* Spit it out! */
ucnv_toUnicode(data->conv, &dest, destp, NULL, NULL, NULL, TRUE, &errCode);
+ /* UTODO: Error catching */
if (dest > d) {
php_stream_bucket *bucket = php_stream_bucket_new_unicode(stream, d, dest - d, 0, 0 TSRMLS_CC);
php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
}
}
-static php_stream_filter_ops php_unicode_to_string_filter_ops = {
+php_stream_filter_ops php_unicode_to_string_filter_ops = {
php_unicode_to_string_filter,
php_unicode_filter_dtor,
"unicode.to.*",
PSFO_FLAG_ACCEPTS_UNICODE | PSFO_FLAG_OUTPUTS_STRING
};
-static php_stream_filter_ops php_unicode_from_string_filter_ops = {
+php_stream_filter_ops php_unicode_from_string_filter_ops = {
php_unicode_from_string_filter,
php_unicode_filter_dtor,
"unicode.from.*",
PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_UNICODE
};
-static php_stream_filter_ops php_unicode_tidy_filter_ops = {
+php_stream_filter_ops php_unicode_tidy_filter_ops = {
php_unicode_tidy_filter,
php_unicode_filter_dtor,
"unicode.tidy.*",
const char *charset, *direction;
php_stream_filter_ops *fops;
UErrorCode ucnvError = U_ZERO_ERROR;
+ /* Note: from_error_mode means from unicode to charset. from filter means from charset to unicode */
+ uint16_t err_mode = UG(from_error_mode);
char to_unicode = 0;
+ zval **tmpzval;
if (strncasecmp(filtername, "unicode.", sizeof("unicode.") - 1)) {
/* Never happens */
charset = direction + sizeof("to.") - 1;
} else if (strncmp(direction, "from.", sizeof("from.") - 1) == 0) {
fops = &php_unicode_from_string_filter_ops;
- to_unicode = 1;
charset = direction + sizeof("from.") - 1;
+ to_unicode = 1;
+ err_mode = UG(to_error_mode);
} else if (strncmp(direction, "tidy.", sizeof("tidy.") - 1) == 0) {
fops = &php_unicode_tidy_filter_ops;
charset = direction + sizeof("tidy.") - 1;
return NULL;
}
+ if (filterparams &&
+ Z_TYPE_P(filterparams) == IS_ARRAY &&
+ zend_hash_find(Z_ARRVAL_P(filterparams), "error_mode", sizeof("error_mode"), (void**)&tmpzval) == SUCCESS &&
+ tmpzval && *tmpzval) {
+ if (Z_TYPE_PP(tmpzval) == IS_LONG) {
+ err_mode = Z_LVAL_PP(tmpzval);
+ } else {
+ zval copyval = **tmpzval;
+ zval_copy_ctor(©val);
+ convert_to_long(©val);
+ err_mode = Z_LVAL(copyval);
+ }
+ }
+
+ zend_set_converter_error_mode(data->conv, to_unicode ? ZEND_TO_UNICODE : ZEND_FROM_UNICODE, err_mode);
+ if (!to_unicode) {
+ UChar *freeme = NULL;
+ UChar *subst_char = UG(from_subst_char);
+
+ if (filterparams &&
+ Z_TYPE_P(filterparams) == IS_ARRAY &&
+ zend_hash_find(Z_ARRVAL_P(filterparams), "subst_char", sizeof("subst_char"), (void**)&tmpzval) == SUCCESS &&
+ tmpzval && *tmpzval) {
+ if (Z_TYPE_PP(tmpzval) == IS_UNICODE) {
+ subst_char = Z_USTRVAL_PP(tmpzval);
+ } else {
+ zval copyval = **tmpzval;
+ zval_copy_ctor(©val);
+ convert_to_unicode(©val);
+ subst_char = freeme = Z_USTRVAL(copyval);
+ }
+ }
+
+ zend_set_converter_subst_char(data->conv, subst_char);
+
+ if (freeme) {
+ efree(freeme);
+ }
+ }
+
return php_stream_filter_alloc(fops, data, persistent);
}
php_open_temporary_file.c php_logos.c output.c internal_functions.c php_sprintf.c");
ADD_SOURCES("main/streams", "streams.c cast.c memory.c filter.c plain_wrapper.c \
- userspace.c transports.c xp_socket.c mmap.c");
+ userspace.c transports.c xp_socket.c mmap.c unicode_filter.c");
ADD_SOURCES("win32", "crypt_win32.c flock.c glob.c md5crypt.c pwd.c readdir.c \
registry.c select.c sendmail.c time.c wfile.c winutil.c wsyslog.c globals.c");