From: Sara Golemon Date: Thu, 30 Mar 2006 00:22:51 +0000 (+0000) Subject: Make php_stream_copy_to_mem() unicode aware and X-Git-Tag: RELEASE_1_3~197 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=51b9a0f2693b7ff1e9dd725c9c6456e2a5d994e0;p=php Make php_stream_copy_to_mem() unicode aware and update userspace function file_get_contents(). Note: fgc()'s second parameter (use_include_path) has been changed to be a bitmask "flags" parameter instead. For the most commonly used values (TRUE, 1) this will continue functioning as expected since the value of FILE_USE_INCLUDE_PATH is (coincidentally) 1. The impact to other values should be noted in the migration6 guide. This change makes it possible to allow fgc() to return binary file contents (default) or unicode transcoded contents (using FILE_TEXT flag). --- diff --git a/ext/standard/file.c b/ext/standard/file.c index bd1750bf55..115171d06d 100644 --- a/ext/standard/file.c +++ b/ext/standard/file.c @@ -497,32 +497,32 @@ PHP_FUNCTION(get_meta_tags) /* }}} */ -/* {{{ proto string file_get_contents(string filename [, bool use_include_path [, resource context [, long offset [, long maxlen]]]]) +/* {{{ proto string file_get_contents(string filename [, long flags [, resource context [, long offset [, long maxlen]]]]) U Read the entire file into a string */ -/* UTODO: Accept unicode contents -- Maybe? Perhaps a binary fetch leaving the script to icu_ucnv_toUnicode() on its own is best? */ PHP_FUNCTION(file_get_contents) { char *filename; int filename_len; char *contents; + long flags = 0; zend_bool use_include_path = 0; php_stream *stream; int len; long offset = -1; - long maxlen = PHP_STREAM_COPY_ALL; + long maxlen = PHP_STREAM_COPY_ALL, real_maxlen; zval *zcontext = NULL; php_stream_context *context = NULL; /* Parse arguments */ - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|br!ll", - &filename, &filename_len, &use_include_path, &zcontext, &offset, &maxlen) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|lr!ll", + &filename, &filename_len, &flags, &zcontext, &offset, &maxlen) == FAILURE) { return; } context = php_stream_context_from_zval(zcontext, 0); - stream = php_stream_open_wrapper_ex(filename, "rb", - (use_include_path ? USE_PATH : 0) | REPORT_ERRORS, + stream = php_stream_open_wrapper_ex(filename, (flags & PHP_FILE_TEXT) ? "rt" : "rb", + ((flags & PHP_FILE_USE_INCLUDE_PATH) ? USE_PATH : 0) | REPORT_ERRORS, NULL, context); if (!stream) { RETURN_FALSE; @@ -533,9 +533,20 @@ PHP_FUNCTION(file_get_contents) RETURN_FALSE; } + if (maxlen <= 0 || stream->readbuf_type == IS_STRING) { + real_maxlen = maxlen; + } else { + /* Allows worst case scenario of each input char being turned into two UChars */ + real_maxlen = (maxlen * 2); + } + /* uses mmap if possible */ - if ((len = php_stream_copy_to_mem(stream, &contents, maxlen, 0)) > 0) { + len = php_stream_copy_to_mem_ex(stream, stream->readbuf_type, &contents, real_maxlen, maxlen, 0); + + if (stream->readbuf_type == IS_STRING && len > 0) { RETVAL_STRINGL(contents, len, 0); + } else if (stream->readbuf_type == IS_UNICODE && len > 0) { + RETVAL_UNICODEL(contents, len, 0); } else if (len == 0) { RETVAL_EMPTY_STRING(); } else { diff --git a/main/php_streams.h b/main/php_streams.h index 42cf5eb98d..47fafc8bbd 100755 --- a/main/php_streams.h +++ b/main/php_streams.h @@ -284,6 +284,7 @@ PHPAPI size_t _php_stream_read(php_stream *stream, char *buf, size_t count TSRML /* Convert using runtime_encoding if necessary -- return unicode */ PHPAPI size_t _php_stream_read_unicode(php_stream *stream, UChar *buf, int maxlen, int maxchars TSRMLS_DC); #define php_stream_read_unicode(stream, buf, maxlen) _php_stream_read_unicode((stream), (buf), (maxlen), -1 TSRMLS_CC) +#define php_stream_read_unicode_ex(stream, buf, maxlen, maxchars) _php_stream_read_unicode((stream), (buf), (maxlen), (maxchars) TSRMLS_CC) PHPAPI UChar *_php_stream_read_unicode_chars(php_stream *stream, int *pchars TSRMLS_DC); #define php_stream_read_unicode_chars(stream, pchars) _php_stream_read_unicode_chars((stream), (pchars) TSRMLS_CC) @@ -443,9 +444,12 @@ PHPAPI size_t _php_stream_copy_to_stream(php_stream *src, php_stream *dest, size /* read all data from stream and put into a buffer. Caller must free buffer when done. * The copy will use mmap if available. */ -PHPAPI size_t _php_stream_copy_to_mem(php_stream *src, char **buf, size_t maxlen, +PHPAPI size_t _php_stream_copy_to_mem_ex(php_stream *src, zend_uchar rettype, void **buf, size_t maxlen, size_t maxchars, int persistent STREAMS_DC TSRMLS_DC); -#define php_stream_copy_to_mem(src, buf, maxlen, persistent) _php_stream_copy_to_mem((src), (buf), (maxlen), (persistent) STREAMS_CC TSRMLS_CC) +#define php_stream_copy_to_mem(src, buf, maxlen, persistent) \ + _php_stream_copy_to_mem_ex((src), IS_STRING, (buf), (maxlen), -1, (persistent) STREAMS_CC TSRMLS_CC) +#define php_stream_copy_to_mem_ex(src, rettype, buf, maxlen, maxchars, persistent) \ + _php_stream_copy_to_mem_ex((src), (rettype), (buf), (maxlen), (maxchars), (persistent) STREAMS_CC TSRMLS_CC) /* output all data from a stream */ PHPAPI size_t _php_stream_passthru(php_stream * src STREAMS_DC TSRMLS_DC); diff --git a/main/streams/streams.c b/main/streams/streams.c index f9dbb88136..e6a666bd1f 100755 --- a/main/streams/streams.c +++ b/main/streams/streams.c @@ -1568,19 +1568,24 @@ PHPAPI size_t _php_stream_passthru(php_stream * stream STREAMS_DC TSRMLS_DC) } -PHPAPI size_t _php_stream_copy_to_mem(php_stream *src, char **buf, size_t maxlen, int persistent STREAMS_DC TSRMLS_DC) +PHPAPI size_t _php_stream_copy_to_mem_ex(php_stream *src, zend_uchar rettype, void **buf, size_t maxlen, size_t maxchars, int persistent STREAMS_DC TSRMLS_DC) { size_t ret = 0; - char *ptr; + zstr ptr; size_t len = 0, max_len; int step = CHUNK_SIZE; int min_room = CHUNK_SIZE / 4; php_stream_statbuf ssbuf; - if (buf) { + if (buf) { *buf = NULL; } + if (rettype != src->readbuf_type) { + /* UTODO: Introduce sloppy buffer conversion */ + return 0; + } + if (maxlen == 0) { return 0; } @@ -1590,6 +1595,7 @@ PHPAPI size_t _php_stream_copy_to_mem(php_stream *src, char **buf, size_t maxlen } if (php_stream_mmap_possible(src)) { + /* guarantees src->readbuf_type == IS_STRING */ char *p; size_t mapped; @@ -1600,7 +1606,7 @@ PHPAPI size_t _php_stream_copy_to_mem(php_stream *src, char **buf, size_t maxlen if (*buf) { memcpy(*buf, p, mapped); - (*buf)[mapped] = '\0'; + ((char*)(*buf))[mapped] = 0; } php_stream_mmap_unmap(src); @@ -1610,14 +1616,29 @@ PHPAPI size_t _php_stream_copy_to_mem(php_stream *src, char **buf, size_t maxlen } if (maxlen > 0) { - ptr = *buf = pemalloc_rel_orig(maxlen + 1, persistent); - while ((len < maxlen) & !php_stream_eof(src)) { - ret = php_stream_read(src, ptr, maxlen - len); - len += ret; - ptr += ret; + if (rettype == IS_UNICODE) { + ptr.u = *buf = pemalloc_rel_orig(UBYTES(maxlen + 1), persistent); + while ((len < maxlen) & !php_stream_eof(src)) { + int ulen; + + ret = php_stream_read_unicode_ex(src, ptr.u, maxlen - len, maxchars); + ulen = u_countChar32(ptr.u, ret); + len += ret; + ptr.u += ret; + maxchars -= ret; + } + *(ptr.u) = 0; + return len; + } else { + ptr.s = *buf = pemalloc_rel_orig(maxlen + 1, persistent); + while ((len < maxlen) & !php_stream_eof(src)) { + ret = php_stream_read(src, ptr.s, maxlen - len); + len += ret; + ptr.s += ret; + } + *(ptr.s) = 0; + return len; } - *ptr = '\0'; - return len; } /* avoid many reallocs by allocating a good sized chunk to begin with, if @@ -1632,21 +1653,49 @@ PHPAPI size_t _php_stream_copy_to_mem(php_stream *src, char **buf, size_t maxlen max_len = step; } - ptr = *buf = pemalloc_rel_orig(max_len, persistent); + if (rettype == IS_UNICODE) { + ptr.u = *buf = pemalloc_rel_orig(UBYTES(max_len + 1), persistent); - while((ret = php_stream_read(src, ptr, max_len - len))) { - len += ret; - if (len + min_room >= max_len) { - *buf = perealloc_rel_orig(*buf, max_len + step, persistent); - max_len += step; - ptr = *buf + len; - } else { - ptr += ret; + while((ret = php_stream_read_unicode_ex(src, ptr.u, max_len - len, maxchars))) { + int ulen = u_countChar32(ptr.u, ret); + + len += ret; + if (len + min_room >= max_len) { + *buf = perealloc_rel_orig(*buf, UBYTES(max_len + step), persistent); + max_len += step; + ptr.u = ((UChar*)(*buf)) + len; + } else { + ptr.u += ret; + } + maxchars -= ulen; + } + } else { + ptr.s = *buf = pemalloc_rel_orig(max_len + 1, persistent); + + while((ret = php_stream_read(src, ptr.s, max_len - len))) { + len += ret; + if (len + min_room >= max_len) { + *buf = perealloc_rel_orig(*buf, max_len + step, persistent); + max_len += step; + ptr.s = ((char*)(*buf)) + len; + } else { + ptr.s += ret; + } } } + if (len) { - *buf = perealloc_rel_orig(*buf, len + 1, persistent); - (*buf)[len] = '\0'; + if (rettype == IS_UNICODE) { + if ((max_len - len) > (2 * step)) { + *buf = perealloc_rel_orig(*buf, UBYTES(len + 1), persistent); + } + ((UChar*)(*buf))[len] = 0; + } else { + if ((max_len - len) > (2 * step)) { + *buf = perealloc_rel_orig(*buf, len + 1, persistent); + } + ((char*)(*buf))[len] = 0; + } } else { pefree(*buf, persistent); *buf = NULL;