From: Sara Golemon Date: Wed, 12 Apr 2006 22:40:56 +0000 (+0000) Subject: Allow http:// wrapper to automatically apply correct unicode.from.* filter based... X-Git-Tag: RELEASE_1_3~78 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0b84c7363539189895118bd7d214bc221d2473f6;p=php Allow http:// wrapper to automatically apply correct unicode.from.* filter based on content-type header --- diff --git a/ext/standard/http_fopen_wrapper.c b/ext/standard/http_fopen_wrapper.c index c8ab32a3fc..81bc0be112 100644 --- a/ext/standard/http_fopen_wrapper.c +++ b/ext/standard/http_fopen_wrapper.c @@ -81,6 +81,47 @@ #define HTTP_HEADER_CONTENT_LENGTH 16 #define HTTP_HEADER_TYPE 32 +static inline char *php_http_detect_charset(char *http_header_line) +{ + char *s; + + /* Note: This is a fairly remedial parser which could be easily confused by invalid data + The worst case scenario from such confusion should only result in the unicode filter not + being applied. While unfortunate, it's more an issue of the server sending a bad header */ + for (s = strchr(http_header_line, ';'); s; s = strchr(s + 1, ';')) { + char *p = s; + + while (*(++p) == ' '); + if (strncmp(p, "charset", sizeof("charset") - 1) != 0) { + continue; + } + p += sizeof("charset") - 1; + + while (*p == ' ') p++; + if (*p != '=') { + continue; + } + + while (*(++p) == ' '); + if (*p == '"') { + s = p + 1; + if (!(p = strchr(s, '"'))) { + /* Bad things, unmatched quote */ + return NULL; + } + return estrndup(s, p - s); + break; + } + + /* Unquoted value */ + s = p; + while (*p && *p != ' ' && *p != ';') p++; + return estrndup(s, p - s); + } + + return NULL; +} + php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, char *path, char *mode, int options, char **opened_path, php_stream_context *context, int redirect_max, int header_init STREAMS_DC TSRMLS_DC) { php_stream *stream = NULL; @@ -104,6 +145,7 @@ php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, char *path, int transport_len, have_header = 0, request_fulluri = 0; char *protocol_version = NULL; int protocol_version_len = 3; /* Default: "1.0" */ + char *charset = NULL; tmp_line[0] = '\0'; @@ -543,6 +585,11 @@ php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, char *path, if (!strncasecmp(http_header_line, "Location: ", 10)) { strlcpy(location, http_header_line + 10, sizeof(location)); } else if (!strncasecmp(http_header_line, "Content-Type: ", 14)) { + + if (UG(unicode) && strchr(mode, 't')) { + charset = php_http_detect_charset(http_header_line + sizeof("Content-type: ")); + } + php_stream_notify_info(context, PHP_STREAM_NOTIFY_MIME_TYPE_IS, http_header_line + 14, 0); } else if (!strncasecmp(http_header_line, "Content-Length: ", 16)) { file_size = atoi(http_header_line + 16); @@ -572,6 +619,11 @@ php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, char *path, php_stream_close(stream); stream = NULL; + if (charset) { + efree(charset); + charset = NULL; + } + if (location[0] != '\0') { char new_path[HTTP_HEADER_BLOCK_SIZE]; @@ -684,6 +736,13 @@ out: } + if (charset) { + if (stream && UG(unicode) && strchr(mode, 't')) { + php_stream_encoding_apply(stream, 0, charset, UG(to_error_mode), NULL); + } + efree(charset); + } + return stream; } diff --git a/main/streams/filter.c b/main/streams/filter.c index 1e74cda667..79f3841d6f 100644 --- a/main/streams/filter.c +++ b/main/streams/filter.c @@ -550,6 +550,35 @@ PHPAPI int _php_stream_filter_output_prefer_unicode(php_stream_filter *filter TS return preferred ^ inverted; } +PHPAPI int _php_stream_filter_product(php_stream_filter_chain *chain, int type TSRMLS_DC) +{ + php_stream_filter *f; + + for (f = chain->head; f; f = f->next) { + if ((type == IS_STRING && (f->fops->flags & PSFO_FLAG_ACCEPTS_STRING) == 0) || + (type == IS_UNICODE && (f->fops->flags & PSFO_FLAG_ACCEPTS_UNICODE) == 0)) { + /* At some point, the type produced conflicts with the type accepted */ + return 0; + } + + if (f->fops->flags & PSFO_FLAG_OUTPUTS_OPPOSITE) { + type = (type == IS_STRING) ? IS_UNICODE : IS_STRING; + continue; + } + if ((f->fops->flags & PSFO_FLAG_OUTPUTS_SAME) || + (f->fops->flags & PSFO_FLAG_OUTPUTS_ANY)) { + continue; + } + if (f->fops->flags & PSFO_FLAG_OUTPUTS_UNICODE) { + type = IS_UNICODE; + continue; + } + type = IS_STRING; + } + + return type; +} + PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish TSRMLS_DC) { php_stream_bucket_brigade brig_a = { NULL, NULL }, brig_b = { NULL, NULL }, *inp = &brig_a, *outp = &brig_b, *brig_temp; diff --git a/main/streams/php_stream_filter_api.h b/main/streams/php_stream_filter_api.h index 1c131a3eb6..a4c999a9f9 100644 --- a/main/streams/php_stream_filter_api.h +++ b/main/streams/php_stream_filter_api.h @@ -153,6 +153,7 @@ PHPAPI void _php_stream_filter_prepend(php_stream_filter_chain *chain, php_strea PHPAPI void _php_stream_filter_append(php_stream_filter_chain *chain, php_stream_filter *filter TSRMLS_DC); PHPAPI int _php_stream_filter_check_chain(php_stream_filter_chain *chain TSRMLS_DC); PHPAPI int _php_stream_filter_output_prefer_unicode(php_stream_filter *filter TSRMLS_DC); +PHPAPI int _php_stream_filter_product(php_stream_filter_chain *chain, int type TSRMLS_DC); PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish TSRMLS_DC); PHPAPI php_stream_filter *php_stream_filter_remove(php_stream_filter *filter, int call_dtor TSRMLS_DC); PHPAPI void php_stream_filter_free(php_stream_filter *filter TSRMLS_DC); @@ -166,6 +167,7 @@ END_EXTERN_C() #define php_stream_filter_flush(filter, finish) _php_stream_filter_flush((filter), (finish) TSRMLS_CC) #define php_stream_filter_check_chain(chain) _php_stream_filter_check_chain((chain) TSRMLS_CC) #define php_stream_filter_output_prefer_unicode(filter) _php_stream_filter_output_prefer_unicode((filter) TSRMLS_CC) +#define php_stream_filter_product(chain, type) _php_stream_filter_product((chain), (type) TSRMLS_CC) #define php_stream_encoding_apply(stream, writechain, encoding, error_mode, subst) \ _php_stream_encoding_apply((stream), (writechain), (encoding), (error_mode), (subst) TSRMLS_CC) diff --git a/main/streams/streams.c b/main/streams/streams.c index 1a6b13d533..eafecc1a89 100755 --- a/main/streams/streams.c +++ b/main/streams/streams.c @@ -2305,13 +2305,17 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio /* Output encoding on text mode streams defaults to utf8 unless specified in context parameter */ if (stream && strchr(implicit_mode, 't') && UG(unicode)) { - if (strchr(implicit_mode, 'w') || strchr(implicit_mode, 'a') || strchr(implicit_mode, '+')) { + /* Only apply implicit unicode.to. filter if the wrapper didn't do it for us */ + if ((php_stream_filter_product(&stream->writefilters, IS_UNICODE) == IS_STRING) && + (strchr(implicit_mode, 'w') || strchr(implicit_mode, 'a') || strchr(implicit_mode, '+'))) { char *encoding = (context && context->output_encoding) ? context->output_encoding : "utf8"; /* UTODO: (Maybe?) Allow overriding the default error handlers on a per-stream basis via context params */ php_stream_encoding_apply(stream, 1, encoding, UG(from_error_mode), UG(from_subst_char)); } - if (strchr(implicit_mode, 'r') || strchr(implicit_mode, '+')) { + + /* Only apply implicit unicode.from. filter if the wrapper didn't do it for us */ + if ((stream->readbuf_type == IS_STRING) && (strchr(implicit_mode, 'r') || strchr(implicit_mode, '+'))) { char *encoding = (context && context->input_encoding) ? context->input_encoding : "utf8"; /* UTODO: (Maybe?) Allow overriding the default error handlers on a per-stream basis via context params */