]> granicus.if.org Git - php/commitdiff
- Added "dechunk" filter which can decode HTTP responces with chunked transfer-encodi...
authorDmitry Stogov <dmitry@php.net>
Thu, 16 Apr 2009 10:16:27 +0000 (10:16 +0000)
committerDmitry Stogov <dmitry@php.net>
Thu, 16 Apr 2009 10:16:27 +0000 (10:16 +0000)
- Fixed bug #47021 (SoapClient stumbles over WSDL delivered with "Transfer-Encoding: chunked")

NEWS
ext/standard/filters.c
ext/standard/http_fopen_wrapper.c
ext/standard/tests/filters/chunked_001.phpt [new file with mode: 0644]

diff --git a/NEWS b/NEWS
index 9efe1c6df6261c362343013c6e29e7b20ba0f559..c1c41e6014e957041b72005662d1b9b8dcff5d9d 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,11 @@ PHP                                                                        NEWS
 - Upgraded bundled PCRE to version 7.9. (Nuno)
 - Added 'n' flag to fopen to allow passing O_NONBLOCK to the underlying
   open(2) system call. (Mikko)
+- Added "dechunk" filter which can decode HTTP responces with chunked
+  transfer-encoding. HTTP streams use this filter automatically in case
+  "Transfer-Encoding: chunked" header presents in responce. It's possible to
+  disable this behaviour using "http"=>array("auto_decode"=>0) in stream
+  context. (Dmitry)
 
 - Fixed bug #47880 (crashes in call_user_func_array()). (Dmitry)
 - Fixed bug #47856 (stristr() converts needle to lower-case). (Ilia)
@@ -28,6 +33,8 @@ PHP                                                                        NEWS
 - Fixed bug #47516 (nowdoc can not be embed in heredoc but can be embed in
   double quote). (Dmitry)
 - Fixed bug #47038 (Memory leak in include). (Dmitry)
+- Fixed bug #47021 (SoapClient stumbles over WSDL delivered with
+  "Transfer-Encoding: chunked"). (Dmitry)
 - Fixed bug #46108 (DateTime - Memory leak when unserializing). (Felipe)
 - Fixed bug #44861 (scrollable cursor don't work with pgsql). (Matteo)
 - Fixed bug #44409 (PDO::FETCH_SERIALIZE calls __construct()). (Matteo)
index 98ac6c0528e61d2b13f306a1af80651eb3d09a75..c69e10f3e1523338528cf85842fb94237a6d1bfd 100644 (file)
@@ -1897,6 +1897,220 @@ php_stream_filter_factory consumed_filter_factory = {
 
 /* }}} */
 
+/* {{{ chunked filter implementation */
+typedef enum _php_chunked_filter_state {
+       CHUNK_SIZE_START,
+       CHUNK_SIZE,
+       CHUNK_SIZE_EXT_START,
+       CHUNK_SIZE_EXT,
+       CHUNK_SIZE_CR,
+       CHUNK_SIZE_LF,
+       CHUNK_BODY,
+       CHUNK_BODY_CR,
+       CHUNK_BODY_LF,
+       CHUNK_TRAILER,
+       CHUNK_ERROR
+} php_chunked_filter_state;
+
+typedef struct _php_chunked_filter_data {
+       php_chunked_filter_state state;
+       int chunk_size;
+       int persistent;
+} php_chunked_filter_data;
+
+static int php_dechunk(char *buf, int len, php_chunked_filter_data *data)
+{
+       char *p = buf;
+       char *end = p + len;
+       char *out = buf;
+       int out_len = 0;
+
+       while (p < end) {
+               switch (data->state) {
+                       case CHUNK_SIZE_START:
+                               data->chunk_size = 0;
+                       case CHUNK_SIZE:
+                               while (p < end) {
+                                       if (*p >= '0' && *p <= '9') {
+                                               data->chunk_size = (data->chunk_size * 16) + (*p - '0');
+                                       } else if (*p >= 'A' && *p <= 'F') {
+                                               data->chunk_size = (data->chunk_size * 16) + (*p - 'A' + 10);
+                                       } else if (*p >= 'a' && *p <= 'f') {
+                                               data->chunk_size = (data->chunk_size * 16) + (*p - 'a' + 10);
+                                       } else if (data->state == CHUNK_SIZE_START) {
+                                               data->state = CHUNK_ERROR;
+                                               break;
+                                       } else {
+                                               data->state = CHUNK_SIZE_EXT_START;
+                                               break;
+                                       }
+                                       data->state = CHUNK_SIZE;
+                                       p++;
+                               }
+                               if (data->state == CHUNK_ERROR) {
+                                       continue;
+                               } else if (p == end) {
+                                       return out_len;
+                               }
+                       case CHUNK_SIZE_EXT_START:
+                               if (*p == ';'|| *p == '\r' || *p == '\n') {
+                                       data->state = CHUNK_SIZE_EXT;
+                               } else {
+                                       data->state = CHUNK_ERROR;
+                                       continue;
+                               }
+                       case CHUNK_SIZE_EXT:
+                               /* skip extension */
+                               while (p < end && *p != '\r' && *p != '\n') {
+                                       p++;
+                               }
+                               if (p == end) {
+                                       return out_len;
+                               }
+                       case CHUNK_SIZE_CR:
+                               if (*p == '\r') {
+                                       p++;
+                                       if (p == end) {
+                                               data->state = CHUNK_SIZE_LF;
+                                               return out_len;
+                                       }
+                               }
+                       case CHUNK_SIZE_LF:
+                               if (*p == '\n') {
+                                       p++;
+                                       if (data->chunk_size == 0) {
+                                               /* last chunk */
+                                               data->state = CHUNK_TRAILER;
+                                               continue;
+                                       } else if (p == end) {
+                                               data->state = CHUNK_BODY;
+                                               return out_len;
+                                       }
+                               } else {
+                                       data->state = CHUNK_ERROR;
+                                       continue;
+                               }
+                       case CHUNK_BODY:
+                               if (end - p >= data->chunk_size) {
+                                       if (p != out) {
+                                               memmove(out, p, data->chunk_size);
+                                       }
+                                       out += data->chunk_size;
+                                       out_len += data->chunk_size;
+                                       p += data->chunk_size;
+                                       if (p == end) {
+                                               data->state = CHUNK_BODY_CR;
+                                               return out_len;
+                                       }
+                               } else {
+                                       if (p != out) {
+                                               memmove(out, p, end - p);
+                                       }
+                                       data->chunk_size -= end - p;
+                                       out_len += end - p;
+                                       return out_len;
+                               }
+                       case CHUNK_BODY_CR:
+                               if (*p == '\r') {
+                                       p++;
+                                       if (p == end) {
+                                               data->state = CHUNK_BODY_LF;
+                                               return out_len;
+                                       }
+                               }
+                       case CHUNK_BODY_LF:
+                               if (*p == '\n') {
+                                       p++;
+                                       data->state = CHUNK_SIZE_START;
+                                       continue;
+                               } else {
+                                       data->state = CHUNK_ERROR;
+                                       continue;
+                               }
+                       case CHUNK_TRAILER:
+                               /* ignore trailer */
+                               p = end;
+                               continue;
+                       case CHUNK_ERROR:
+                               if (p != out) {
+                                       memmove(out, p, end - p);
+                               }
+                               out_len += end - p;
+                               return out_len; 
+               }
+       }
+       return out_len;
+}
+
+static php_stream_filter_status_t php_chunked_filter(
+       php_stream *stream,
+       php_stream_filter *thisfilter,
+       php_stream_bucket_brigade *buckets_in,
+       php_stream_bucket_brigade *buckets_out,
+       size_t *bytes_consumed,
+       int flags
+       TSRMLS_DC)
+{
+       php_stream_bucket *bucket;
+       size_t consumed = 0;
+       php_chunked_filter_data *data = (php_chunked_filter_data *) thisfilter->abstract;
+
+       while (buckets_in->head) {
+               bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC);
+               consumed += bucket->buflen;
+               bucket->buflen = php_dechunk(bucket->buf, bucket->buflen, data);        
+               php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
+       }
+
+       if (bytes_consumed) {
+               *bytes_consumed = consumed;
+       }
+       
+       return PSFS_PASS_ON;
+}
+
+static void php_chunked_dtor(php_stream_filter *thisfilter TSRMLS_DC)
+{
+       if (thisfilter && thisfilter->abstract) {
+               php_chunked_filter_data *data = (php_chunked_filter_data *) thisfilter->abstract;
+               pefree(data, data->persistent);
+       }
+}
+
+static php_stream_filter_ops chunked_filter_ops = {
+       php_chunked_filter,
+       php_chunked_dtor,
+       "dechunk"
+};
+
+static php_stream_filter *chunked_filter_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC)
+{
+       php_stream_filter_ops *fops = NULL;
+       php_chunked_filter_data *data;
+
+       if (strcasecmp(filtername, "dechunk")) {
+               return NULL;
+       }
+
+       /* Create this filter */
+       data = (php_chunked_filter_data *)pecalloc(1, sizeof(php_chunked_filter_data), persistent);
+       if (!data) {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed allocating %zd bytes", sizeof(php_chunked_filter_data));
+               return NULL;
+       }
+       data->state = CHUNK_SIZE_START;
+       data->chunk_size = 0;
+       data->persistent = persistent;
+       fops = &chunked_filter_ops;
+
+       return php_stream_filter_alloc(fops, data, persistent);
+}
+
+static php_stream_filter_factory chunked_filter_factory = {
+       chunked_filter_create
+};
+/* }}} */
+
 static const struct {
        php_stream_filter_ops *ops;
        php_stream_filter_factory *factory;
@@ -1907,6 +2121,7 @@ static const struct {
        { &strfilter_strip_tags_ops, &strfilter_strip_tags_factory },
        { &strfilter_convert_ops, &strfilter_convert_factory },
        { &consumed_filter_ops, &consumed_filter_factory },
+       { &chunked_filter_ops, &chunked_filter_factory },
        /* additional filters to go here */
        { NULL, NULL }
 };
index 64b132b759b92f54a36cc22c025f458165d67627..a3d55c84d08d21f729bfaeb1e6fa684db1a95128 100644 (file)
@@ -111,6 +111,7 @@ php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, char *path,
        char *user_headers = NULL;
        int header_init = ((flags & HTTP_WRAPPER_HEADER_INIT) != 0);
        int redirected = ((flags & HTTP_WRAPPER_REDIRECTED) != 0);
+       php_stream_filter *transfer_encoding = NULL;
 
        tmp_line[0] = '\0';
 
@@ -597,6 +598,25 @@ php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, char *path,
                        } else if (!strncasecmp(http_header_line, "Content-Length: ", 16)) {
                                file_size = atoi(http_header_line + 16);
                                php_stream_notify_file_size(context, file_size, http_header_line, 0);
+                       } else if (!strncasecmp(http_header_line, "Transfer-Encoding: chunked", sizeof("Transfer-Encoding: chunked"))) {
+
+                               /* create filter to decode response body */
+                               if (!(options & STREAM_ONLY_GET_HEADERS)) {
+                                       long decode = 1;
+
+                                       if (context && php_stream_context_get_option(context, "http", "auto_decode", &tmpzval) == SUCCESS) {
+                                               SEPARATE_ZVAL(tmpzval);
+                                               convert_to_boolean(*tmpzval);
+                                               decode = Z_LVAL_PP(tmpzval);
+                                       }
+                                       if (decode) {
+                                               transfer_encoding = php_stream_filter_create("dechunk", NULL, php_stream_is_persistent(stream) TSRMLS_CC);
+                                               if (transfer_encoding) {
+                                                       /* don't store transfer-encodeing header */
+                                                       continue;
+                                               }
+                                       }
+                               }
                        }
 
                        if (http_header_line[0] == '\0') {
@@ -740,6 +760,11 @@ out:
                 * the stream */
                stream->position = 0;
 
+               if (transfer_encoding) {
+                       php_stream_filter_append(&stream->readfilters, transfer_encoding);
+               }
+       } else if (transfer_encoding) {
+               php_stream_filter_free(transfer_encoding TSRMLS_CC);
        }
 
        return stream;
diff --git a/ext/standard/tests/filters/chunked_001.phpt b/ext/standard/tests/filters/chunked_001.phpt
new file mode 100644 (file)
index 0000000..70dba3c
--- /dev/null
@@ -0,0 +1,33 @@
+--TEST--
+Chunked encoding
+--SKIPIF--
+<?php
+$filters = stream_get_filters();
+if(! in_array( "dechunk", $filters )) die( "chunked filter not available." );
+?>
+--FILE--
+<?php
+$streams = array(
+       "data://text/plain,0\r\n",
+       "data://text/plain,2\r\nte\r\n2\r\nst\r\n0\r\n",
+       "data://text/plain,2\nte\n2\nst\n0\n",
+       "data://text/plain,2;a=1\nte\n2;a=2;b=3\r\nst\n0\n",
+       "data://text/plain,2\nte\n2\nst\n0\na=b\r\nc=d\n\r\n",
+       "data://text/plain,1f\n0123456789abcdef0123456789abcde\n1\nf\n0\n",
+       "data://text/plain,1E\n0123456789abcdef0123456789abcd\n2\nef\n0\n",
+);
+foreach ($streams as $name) {
+       $fp = fopen($name, "r");
+       stream_filter_append($fp, "dechunk", STREAM_FILTER_READ);
+       var_dump(stream_get_contents($fp));
+       fclose($fp);
+}
+?>
+--EXPECT--
+string(0) ""
+string(4) "test"
+string(4) "test"
+string(4) "test"
+string(4) "test"
+string(32) "0123456789abcdef0123456789abcdef"
+string(32) "0123456789abcdef0123456789abcdef"