]> granicus.if.org Git - php/commitdiff
More stream updates.
authorSara Golemon <pollita@php.net>
Tue, 14 Mar 2006 21:15:05 +0000 (21:15 +0000)
committerSara Golemon <pollita@php.net>
Tue, 14 Mar 2006 21:15:05 +0000 (21:15 +0000)
fgets() will work now as will anything which calls one of the
_php_stream_get_line() family of functions.
The one exception here is when the legacy defines are used on a unicode
stream.  At the moment they'll simply return NULL, I'll update these
to do sloppy conversion in a bit.

'make (u)test' still doesn't work, but it's a different doesn't work.

ext/standard/file.c
main/php_streams.h
main/streams/streams.c

index 53c36f45d94157f57bc9b05c26de93e11ccd2ec2..032ff77bf3dac63cdc462354d32198837410703c 100644 (file)
@@ -993,9 +993,8 @@ PHPAPI PHP_FUNCTION(fgets)
        zval *zstream;
        int argc = ZEND_NUM_ARGS();
        long length = -1;
-       UChar *buf = NULL;
-       int32_t num_chars = -1, num_bytes = -1;
-       int is_unicode;
+       zstr buf;
+       size_t retlen = 0;
 
        if (zend_parse_parameters(argc TSRMLS_CC, "r|l", &zstream, &length) == FAILURE) {
                RETURN_NULL();
@@ -1003,19 +1002,15 @@ PHPAPI PHP_FUNCTION(fgets)
 
        php_stream_from_zval(stream, &zstream);
 
-       if (length > 0) {
-               /* Don't try to short circuit this by just using num_chars in parse_parameters, long doesn't always mean 32-bit */
-               num_chars = length;
-       }
-
-       if ((buf = php_stream_u_get_line(stream, NULL, &num_bytes, &num_chars, &is_unicode)) == NULL) {
+       buf.v = php_stream_get_line_ex(stream, php_stream_reads_unicode(stream) ? IS_UNICODE : IS_STRING, NULL, 0, length, &retlen);
+       if (!buf.v) {
                RETURN_FALSE;
        }
 
-       if (is_unicode) {
-               RETURN_UNICODEL(buf, num_chars, 0);
+       if (php_stream_reads_unicode(stream)) {
+               RETURN_UNICODEL(buf.u, retlen, 0);
        } else {
-                       RETURN_STRINGL((char*)buf, num_bytes, 0);
+               RETURN_STRINGL(buf.s, retlen, 0);
        }
 }
 /* }}} */
index 9b6f7c4d072e04511210a32821b4134b95a16b52..1bb4533443f88e7babe70bd12e6fad586e6c0cad 100755 (executable)
@@ -320,10 +320,14 @@ PHPAPI int _php_stream_putc(php_stream *stream, int c TSRMLS_DC);
 PHPAPI int _php_stream_flush(php_stream *stream, int closing TSRMLS_DC);
 #define php_stream_flush(stream)       _php_stream_flush((stream), 0 TSRMLS_CC)
 
-PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t maxlen, size_t *returned_len TSRMLS_DC);
-#define php_stream_gets(stream, buf, maxlen)   _php_stream_get_line((stream), (buf), (maxlen), NULL TSRMLS_CC)
+PHPAPI void *_php_stream_get_line(php_stream *stream, int buf_type, zstr buf, size_t maxlen, size_t maxchars, size_t *returned_len TSRMLS_DC);
+#define php_stream_get_line(stream, buf, maxlen, retlen)       _php_stream_get_line((stream), IS_STRING, ZSTR(buf), (maxlen), 0, (retlen) TSRMLS_CC)
+#define php_stream_get_line_ex(stream, buf_type, buf, maxlen, maxchars, retlen) \
+                                                                                                                       _php_stream_get_line((stream), (buf_type), ZSTR(buf), (maxlen), (maxchars), (retlen) TSRMLS_CC)
+#define php_stream_gets(stream, buf, maxlen)                           _php_stream_get_line((stream), IS_STRING, ZSTR(buf), (maxlen), 0, NULL TSRMLS_CC)
+#define php_stream_gets_ex(stream, buf_type, buf, maxlen, maxchars) \
+                                                                                                                       _php_stream_get_line((stream), (buf_type), ZSTR(buf), (maxlen), (maxchars), NULL TSRMLS_CC)
 
-#define php_stream_get_line(stream, buf, maxlen, retlen) _php_stream_get_line((stream), (buf), (maxlen), (retlen) TSRMLS_CC)
 PHPAPI char *php_stream_get_record(php_stream *stream, size_t maxlen, size_t *returned_len, char *delim, size_t delim_len TSRMLS_DC);
 
 PHPAPI UChar *_php_stream_u_get_line(php_stream *stream, UChar *buf, int32_t *pmax_bytes, int32_t *pmax_chars, int *pis_unicode TSRMLS_DC);
index 68bf8f11fa661aa747d18262989384de51fd84ac..8157ca5188379882df7469d1aafc6dc8c3dbaf1b 100755 (executable)
@@ -955,18 +955,25 @@ PHPAPI void *php_stream_locate_eol(php_stream *stream, zstr zbuf, int buf_len TS
 
 /* If buf == NULL, the buffer will be allocated automatically and will be of an
  * appropriate length to hold the line, regardless of the line length, memory
- * permitting -- returned string will be up to (maxlen-1), last byte holding terminating NULL
- * Like php_stream_read(), this will treat unicode streams as ugly binary data (use with caution) */
-PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t maxlen,
-               size_t *returned_len TSRMLS_DC)
+ * permitting -- returned string will be up to (maxlen-1) units of (maxchars) characters, last byte holding terminating NULL
+ * Like php_stream_read(), this will (UTODO) treat unicode streams as ugly binary data (use with caution) */
+PHPAPI void *_php_stream_get_line(php_stream *stream, int buf_type, zstr buf, size_t maxlen, size_t maxchars, size_t *returned_len TSRMLS_DC)
 {
        size_t avail = 0;
        size_t current_buf_size = 0;
        size_t total_copied = 0;
        int grow_mode = 0;
-       char *bufstart = buf;
+       int is_unicode = php_stream_reads_unicode(stream);
+       int split_surrogate = 0;
+       zstr bufstart = buf;
 
-       if (buf == NULL) {
+       if ((buf_type == IS_STRING && is_unicode) ||
+               (buf_type == IS_UNICODE && !is_unicode)) {
+               /* UTODO: Allow sloppy conversion */
+               return NULL;
+       }
+
+       if (buf.v == NULL) {
                grow_mode = 1;
        } else if (maxlen == 0) {
                return NULL;
@@ -988,20 +995,39 @@ PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t maxlen,
        for (;;) {
                avail = stream->writepos - stream->readpos;
 
-               if (avail > 0) {
-                       size_t cpysz = 0;
-                       char *readptr;
-                       char *eol;
+               if (!split_surrogate && avail > 0) {
+                       size_t cpysz = avail;
+                       zstr readptr;
                        int done = 0;
 
-                       readptr = stream->readbuf.s + stream->readpos;
-                       eol = php_stream_locate_eol(stream, (zstr)NULL, 0 TSRMLS_CC);
+                       if (is_unicode) {
+                               UChar *eol;
+                               readptr.u = stream->readbuf.u + stream->readpos;
 
-                       if (eol) {
-                               cpysz = eol - readptr + 1;
-                               done = 1;
+                               eol = php_stream_locate_eol(stream, ZSTR(NULL), 0 TSRMLS_CC);
+                               if (eol) {
+                                       cpysz = eol - readptr.u + 1;
+                                       done = 1;
+                               }
+
+                               if (U16_IS_SURROGATE(readptr.u[cpysz - 1]) &&
+                                       U16_IS_SURROGATE_LEAD(readptr.u[cpysz - 1])) {
+                                       /* Don't orphan */
+                                       cpysz--;
+                                       if (!cpysz) {
+                                               /* Force the loop to land on fill_read_buffer */
+                                               split_surrogate = 1; /* must specifically be 1 */
+                                               continue;
+                                       }
+                               }
                        } else {
-                               cpysz = avail;
+                               char *eol;
+                               readptr.s = stream->readbuf.s + stream->readpos;
+                               eol = php_stream_locate_eol(stream, ZSTR(NULL), 0 TSRMLS_CC);
+                               if (eol) {
+                                       cpysz = eol - readptr.s + 1;
+                                       done = 1;
+                               }
                        }
 
                        if (grow_mode) {
@@ -1012,9 +1038,9 @@ PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t maxlen,
                                 * than 8K, we waste 1 byte per additional 8K or so.
                                 * That seems acceptable to me, to avoid making this code
                                 * hard to follow */
-                               bufstart = erealloc(bufstart, current_buf_size + cpysz + 1);
+                               bufstart.s = erealloc(bufstart.s, PS_ULEN(stream, current_buf_size + cpysz + 1));
+                               buf.s = bufstart.s + PS_ULEN(stream, total_copied);
                                current_buf_size += cpysz + 1;
-                               buf = bufstart + total_copied;
                        } else {
                                if (cpysz >= maxlen - 1) {
                                        cpysz = maxlen - 1;
@@ -1022,11 +1048,29 @@ PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t maxlen,
                                }
                        }
 
-                       memcpy(buf, readptr, cpysz);
+                       if (is_unicode) {
+                               int ulen = u_countChar32(readptr.u, cpysz);
+
+                               if (ulen > maxchars) {
+                                       int32_t i = 0;
+
+                                       ulen = maxchars;
+                                       U16_FWD_N(readptr.u, i, cpysz, ulen);
+                                       cpysz = i;
+                               }
+                               maxchars -= ulen;
+                               memcpy(buf.u, readptr.u, UBYTES(cpysz));
+                               buf.u += cpysz;
+                       } else {
+                               if (cpysz > maxchars) {
+                                       cpysz = maxchars;
+                               }
+                               memcpy(buf.s, readptr.s, cpysz);
+                               buf.s += cpysz;
+                       }
 
                        stream->position += cpysz;
                        stream->readpos += cpysz;
-                       buf += cpysz;
                        maxlen -= cpysz;
                        total_copied += cpysz;
 
@@ -1050,32 +1094,31 @@ PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t maxlen,
 
                        php_stream_fill_read_buffer(stream, toread TSRMLS_CC);
 
-                       if (stream->writepos - stream->readpos == 0) {
+                       if (stream->writepos - stream->readpos <= split_surrogate) {
                                break;
                        }
+                       split_surrogate = 0;
                }
        }
 
        if (total_copied == 0) {
                if (grow_mode) {
-                       assert(bufstart == NULL);
+                       assert(bufstart.v == NULL);
                }
                return NULL;
        }
 
-       buf[0] = '\0';
+       if (is_unicode) {
+               buf.u[0] = 0;
+       } else {
+               buf.s[0] = 0;
+       }
+
        if (returned_len) {
                *returned_len = total_copied;
        }
 
-       return bufstart;
-}
-
-PHPAPI UChar *_php_stream_u_get_line(php_stream *stream, UChar *buf, int32_t *pmax_bytes, int32_t *pmax_chars, int *pis_unicode TSRMLS_DC)
-{
-       /* TODO: Bring this back up to date */
-
-       return NULL;
+       return bufstart.s;
 }
 
 /* Same deal as php_stream_read() and php_stream_get_line()