From: Dmitry Stogov Date: Thu, 16 Apr 2009 10:16:27 +0000 (+0000) Subject: - Added "dechunk" filter which can decode HTTP responces with chunked transfer-encodi... X-Git-Tag: php-5.3.0RC2~161 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=679c6298e332c36f315c8ab8ac816336b602c94f;p=php - Added "dechunk" filter which can decode HTTP responces with chunked transfer-encoding. HTTP streams use this filter automatically in case "Transfer-Encoding: chunked" header presents in responce. It's possible to disable this behaviour using "http"=>array("auto_decode"=>0) in stream context - Fixed bug #47021 (SoapClient stumbles over WSDL delivered with "Transfer-Encoding: chunked") --- diff --git a/NEWS b/NEWS index 9efe1c6df6..c1c41e6014 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,11 @@ PHP NEWS - Upgraded bundled PCRE to version 7.9. (Nuno) - Added 'n' flag to fopen to allow passing O_NONBLOCK to the underlying open(2) system call. (Mikko) +- Added "dechunk" filter which can decode HTTP responces with chunked + transfer-encoding. HTTP streams use this filter automatically in case + "Transfer-Encoding: chunked" header presents in responce. It's possible to + disable this behaviour using "http"=>array("auto_decode"=>0) in stream + context. (Dmitry) - Fixed bug #47880 (crashes in call_user_func_array()). (Dmitry) - Fixed bug #47856 (stristr() converts needle to lower-case). (Ilia) @@ -28,6 +33,8 @@ PHP NEWS - Fixed bug #47516 (nowdoc can not be embed in heredoc but can be embed in double quote). (Dmitry) - Fixed bug #47038 (Memory leak in include). (Dmitry) +- Fixed bug #47021 (SoapClient stumbles over WSDL delivered with + "Transfer-Encoding: chunked"). (Dmitry) - Fixed bug #46108 (DateTime - Memory leak when unserializing). (Felipe) - Fixed bug #44861 (scrollable cursor don't work with pgsql). (Matteo) - Fixed bug #44409 (PDO::FETCH_SERIALIZE calls __construct()). (Matteo) diff --git a/ext/standard/filters.c b/ext/standard/filters.c index 98ac6c0528..c69e10f3e1 100644 --- a/ext/standard/filters.c +++ b/ext/standard/filters.c @@ -1897,6 +1897,220 @@ php_stream_filter_factory consumed_filter_factory = { /* }}} */ +/* {{{ chunked filter implementation */ +typedef enum _php_chunked_filter_state { + CHUNK_SIZE_START, + CHUNK_SIZE, + CHUNK_SIZE_EXT_START, + CHUNK_SIZE_EXT, + CHUNK_SIZE_CR, + CHUNK_SIZE_LF, + CHUNK_BODY, + CHUNK_BODY_CR, + CHUNK_BODY_LF, + CHUNK_TRAILER, + CHUNK_ERROR +} php_chunked_filter_state; + +typedef struct _php_chunked_filter_data { + php_chunked_filter_state state; + int chunk_size; + int persistent; +} php_chunked_filter_data; + +static int php_dechunk(char *buf, int len, php_chunked_filter_data *data) +{ + char *p = buf; + char *end = p + len; + char *out = buf; + int out_len = 0; + + while (p < end) { + switch (data->state) { + case CHUNK_SIZE_START: + data->chunk_size = 0; + case CHUNK_SIZE: + while (p < end) { + if (*p >= '0' && *p <= '9') { + data->chunk_size = (data->chunk_size * 16) + (*p - '0'); + } else if (*p >= 'A' && *p <= 'F') { + data->chunk_size = (data->chunk_size * 16) + (*p - 'A' + 10); + } else if (*p >= 'a' && *p <= 'f') { + data->chunk_size = (data->chunk_size * 16) + (*p - 'a' + 10); + } else if (data->state == CHUNK_SIZE_START) { + data->state = CHUNK_ERROR; + break; + } else { + data->state = CHUNK_SIZE_EXT_START; + break; + } + data->state = CHUNK_SIZE; + p++; + } + if (data->state == CHUNK_ERROR) { + continue; + } else if (p == end) { + return out_len; + } + case CHUNK_SIZE_EXT_START: + if (*p == ';'|| *p == '\r' || *p == '\n') { + data->state = CHUNK_SIZE_EXT; + } else { + data->state = CHUNK_ERROR; + continue; + } + case CHUNK_SIZE_EXT: + /* skip extension */ + while (p < end && *p != '\r' && *p != '\n') { + p++; + } + if (p == end) { + return out_len; + } + case CHUNK_SIZE_CR: + if (*p == '\r') { + p++; + if (p == end) { + data->state = CHUNK_SIZE_LF; + return out_len; + } + } + case CHUNK_SIZE_LF: + if (*p == '\n') { + p++; + if (data->chunk_size == 0) { + /* last chunk */ + data->state = CHUNK_TRAILER; + continue; + } else if (p == end) { + data->state = CHUNK_BODY; + return out_len; + } + } else { + data->state = CHUNK_ERROR; + continue; + } + case CHUNK_BODY: + if (end - p >= data->chunk_size) { + if (p != out) { + memmove(out, p, data->chunk_size); + } + out += data->chunk_size; + out_len += data->chunk_size; + p += data->chunk_size; + if (p == end) { + data->state = CHUNK_BODY_CR; + return out_len; + } + } else { + if (p != out) { + memmove(out, p, end - p); + } + data->chunk_size -= end - p; + out_len += end - p; + return out_len; + } + case CHUNK_BODY_CR: + if (*p == '\r') { + p++; + if (p == end) { + data->state = CHUNK_BODY_LF; + return out_len; + } + } + case CHUNK_BODY_LF: + if (*p == '\n') { + p++; + data->state = CHUNK_SIZE_START; + continue; + } else { + data->state = CHUNK_ERROR; + continue; + } + case CHUNK_TRAILER: + /* ignore trailer */ + p = end; + continue; + case CHUNK_ERROR: + if (p != out) { + memmove(out, p, end - p); + } + out_len += end - p; + return out_len; + } + } + return out_len; +} + +static php_stream_filter_status_t php_chunked_filter( + php_stream *stream, + php_stream_filter *thisfilter, + php_stream_bucket_brigade *buckets_in, + php_stream_bucket_brigade *buckets_out, + size_t *bytes_consumed, + int flags + TSRMLS_DC) +{ + php_stream_bucket *bucket; + size_t consumed = 0; + php_chunked_filter_data *data = (php_chunked_filter_data *) thisfilter->abstract; + + while (buckets_in->head) { + bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC); + consumed += bucket->buflen; + bucket->buflen = php_dechunk(bucket->buf, bucket->buflen, data); + php_stream_bucket_append(buckets_out, bucket TSRMLS_CC); + } + + if (bytes_consumed) { + *bytes_consumed = consumed; + } + + return PSFS_PASS_ON; +} + +static void php_chunked_dtor(php_stream_filter *thisfilter TSRMLS_DC) +{ + if (thisfilter && thisfilter->abstract) { + php_chunked_filter_data *data = (php_chunked_filter_data *) thisfilter->abstract; + pefree(data, data->persistent); + } +} + +static php_stream_filter_ops chunked_filter_ops = { + php_chunked_filter, + php_chunked_dtor, + "dechunk" +}; + +static php_stream_filter *chunked_filter_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC) +{ + php_stream_filter_ops *fops = NULL; + php_chunked_filter_data *data; + + if (strcasecmp(filtername, "dechunk")) { + return NULL; + } + + /* Create this filter */ + data = (php_chunked_filter_data *)pecalloc(1, sizeof(php_chunked_filter_data), persistent); + if (!data) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed allocating %zd bytes", sizeof(php_chunked_filter_data)); + return NULL; + } + data->state = CHUNK_SIZE_START; + data->chunk_size = 0; + data->persistent = persistent; + fops = &chunked_filter_ops; + + return php_stream_filter_alloc(fops, data, persistent); +} + +static php_stream_filter_factory chunked_filter_factory = { + chunked_filter_create +}; +/* }}} */ + static const struct { php_stream_filter_ops *ops; php_stream_filter_factory *factory; @@ -1907,6 +2121,7 @@ static const struct { { &strfilter_strip_tags_ops, &strfilter_strip_tags_factory }, { &strfilter_convert_ops, &strfilter_convert_factory }, { &consumed_filter_ops, &consumed_filter_factory }, + { &chunked_filter_ops, &chunked_filter_factory }, /* additional filters to go here */ { NULL, NULL } }; diff --git a/ext/standard/http_fopen_wrapper.c b/ext/standard/http_fopen_wrapper.c index 64b132b759..a3d55c84d0 100644 --- a/ext/standard/http_fopen_wrapper.c +++ b/ext/standard/http_fopen_wrapper.c @@ -111,6 +111,7 @@ php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, char *path, char *user_headers = NULL; int header_init = ((flags & HTTP_WRAPPER_HEADER_INIT) != 0); int redirected = ((flags & HTTP_WRAPPER_REDIRECTED) != 0); + php_stream_filter *transfer_encoding = NULL; tmp_line[0] = '\0'; @@ -597,6 +598,25 @@ php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, char *path, } else if (!strncasecmp(http_header_line, "Content-Length: ", 16)) { file_size = atoi(http_header_line + 16); php_stream_notify_file_size(context, file_size, http_header_line, 0); + } else if (!strncasecmp(http_header_line, "Transfer-Encoding: chunked", sizeof("Transfer-Encoding: chunked"))) { + + /* create filter to decode response body */ + if (!(options & STREAM_ONLY_GET_HEADERS)) { + long decode = 1; + + if (context && php_stream_context_get_option(context, "http", "auto_decode", &tmpzval) == SUCCESS) { + SEPARATE_ZVAL(tmpzval); + convert_to_boolean(*tmpzval); + decode = Z_LVAL_PP(tmpzval); + } + if (decode) { + transfer_encoding = php_stream_filter_create("dechunk", NULL, php_stream_is_persistent(stream) TSRMLS_CC); + if (transfer_encoding) { + /* don't store transfer-encodeing header */ + continue; + } + } + } } if (http_header_line[0] == '\0') { @@ -740,6 +760,11 @@ out: * the stream */ stream->position = 0; + if (transfer_encoding) { + php_stream_filter_append(&stream->readfilters, transfer_encoding); + } + } else if (transfer_encoding) { + php_stream_filter_free(transfer_encoding TSRMLS_CC); } return stream; diff --git a/ext/standard/tests/filters/chunked_001.phpt b/ext/standard/tests/filters/chunked_001.phpt new file mode 100644 index 0000000000..70dba3caa4 --- /dev/null +++ b/ext/standard/tests/filters/chunked_001.phpt @@ -0,0 +1,33 @@ +--TEST-- +Chunked encoding +--SKIPIF-- + +--FILE-- + +--EXPECT-- +string(0) "" +string(4) "test" +string(4) "test" +string(4) "test" +string(4) "test" +string(32) "0123456789abcdef0123456789abcdef" +string(32) "0123456789abcdef0123456789abcdef"