]> granicus.if.org Git - php/commitdiff
- Add an escape parameter to fgetcsv to satisfy rfc4180 and bug #40501.
authorDavid Soria Parra <dsp@php.net>
Wed, 3 Oct 2007 10:31:04 +0000 (10:31 +0000)
committerDavid Soria Parra <dsp@php.net>
Wed, 3 Oct 2007 10:31:04 +0000 (10:31 +0000)
ext/spl/spl_directory.c
ext/spl/spl_directory.h
ext/standard/file.c
ext/standard/file.h
ext/standard/tests/file/bug40501.csv [new file with mode: 0644]
ext/standard/tests/file/bug40501.phpt [new file with mode: 0644]
ext/standard/tests/file/fgetcsv_error.phpt

index 42ea718685639e08b3512c239bfb8f99e149600d..e74dfd9062e2004cb33080ec48b6e9d8498a9710 100755 (executable)
@@ -1771,7 +1771,7 @@ static int spl_filesystem_file_call(spl_filesystem_object *intern, zend_function
        spl_filesystem_file_call(intern, func_ptr, pass_num_args, return_value, arg2 TSRMLS_CC); \
 }
 
-static int spl_filesystem_file_read_csv(spl_filesystem_object *intern, char delimiter, char enclosure, zval *return_value TSRMLS_DC) /* {{{ */
+static int spl_filesystem_file_read_csv(spl_filesystem_object *intern, char delimiter, char enclosure, char escape, zval *return_value TSRMLS_DC) /* {{{ */
 {
        int ret = SUCCESS;
        
@@ -1788,7 +1788,7 @@ static int spl_filesystem_file_read_csv(spl_filesystem_object *intern, char deli
                }
                ALLOC_INIT_ZVAL(intern->u.file.current_zval);
 
-               php_fgetcsv(intern->u.file.stream, delimiter, enclosure, buf_len, buf, intern->u.file.current_zval TSRMLS_CC);
+               php_fgetcsv(intern->u.file.stream, delimiter, enclosure, escape, buf_len, buf, intern->u.file.current_zval TSRMLS_CC);
                if (return_value) {
                        if (Z_TYPE_P(return_value) != IS_NULL) {
                                zval_dtor(return_value);
@@ -1814,7 +1814,7 @@ static int spl_filesystem_file_read_line_ex(zval * this_ptr, spl_filesystem_obje
                        return FAILURE;
                }
                if (intern->flags & SPL_FILE_OBJECT_READ_CSV) {
-                       return spl_filesystem_file_read_csv(intern, intern->u.file.delimiter, intern->u.file.enclosure, NULL TSRMLS_CC);
+                       return spl_filesystem_file_read_csv(intern, intern->u.file.delimiter, intern->u.file.enclosure, intern->u.file.escape, NULL TSRMLS_CC);
                } else {
                        zend_call_method_with_0_params(&this_ptr, Z_OBJCE_P(getThis()), &intern->u.file.func_getCurr, "getCurrentLine", &retval);
                }
@@ -2149,13 +2149,20 @@ SPL_METHOD(SplFileObject, func_name) \
 SPL_METHOD(SplFileObject, fgetcsv)
 {
        spl_filesystem_object *intern = (spl_filesystem_object*)zend_object_store_get_object(getThis() TSRMLS_CC);
-       char delimiter = intern->u.file.delimiter, enclosure = intern->u.file.enclosure;
-       char *delim, *enclo;
-       int d_len, e_len;
+       char delimiter = intern->u.file.delimiter, enclosure = intern->u.file.enclosure, escape = intern->u.file.escape;
+       char *delim, *enclo, *esc;
+       int d_len, e_len, esc_len;
        
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &delim, &d_len, &enclo, &e_len) == SUCCESS) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|sss", &delim, &d_len, &enclo, &e_len, &esc, &esc_len) == SUCCESS) {
                switch(ZEND_NUM_ARGS())
                {
+                   case 3:
+                               if (esc_len != 1) {
+                                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "escape must be a character");
+                                       RETURN_FALSE;
+                               }
+                               escape = esc[0];
+
                        case 2:
                                if (e_len != 1) {
                                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "enclosure must be a character");
@@ -2173,23 +2180,30 @@ SPL_METHOD(SplFileObject, fgetcsv)
                        case 0:
                                break;
                }
-               spl_filesystem_file_read_csv(intern, delimiter, enclosure, return_value TSRMLS_CC);
+               spl_filesystem_file_read_csv(intern, delimiter, enclosure, escape, return_value TSRMLS_CC);
        }
 }
 /* }}} */
 
-/* {{{ proto void SplFileObject::setCsvControl([string delimiter = ',' [, string enclosure = '"']])
+/* {{{ proto void SplFileObject::setCsvControl([string delimiter = ',' [, string enclosure = '"' [, string escape = '\\']]])
    Set the delimiter and enclosure character used in fgetcsv */
 SPL_METHOD(SplFileObject, setCsvControl)
 {
        spl_filesystem_object *intern = (spl_filesystem_object*)zend_object_store_get_object(getThis() TSRMLS_CC);
-       char delimiter = ',', enclosure = '"';
-       char *delim, *enclo;
-       int d_len, e_len;
+       char delimiter = ',', enclosure = '"', escape='\\';
+       char *delim, *enclo, *esc;
+       int d_len, e_len, esc_len;
        
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &delim, &d_len, &enclo, &e_len) == SUCCESS) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|sss", &delim, &d_len, &enclo, &e_len, &esc, &esc_len) == SUCCESS) {
                switch(ZEND_NUM_ARGS())
                {
+                    case 3:
+                               if (esc_len != 1) {
+                                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "escape must be a character");
+                                       RETURN_FALSE;
+                               }
+                               escape = esc[0];
+                /* no break */
                        case 2:
                                if (e_len != 1) {
                                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "enclosure must be a character");
@@ -2209,6 +2223,7 @@ SPL_METHOD(SplFileObject, setCsvControl)
                }
                intern->u.file.delimiter = delimiter;
                intern->u.file.enclosure = enclosure;
+               intern->u.file.escape    = escape;
        }
 }
 /* }}} */
index 633823e45b93c4f44a2da24e51ab9a341485cc76..94907e0ee6eeac92a979b962c8c4016a37ff9fdd 100755 (executable)
@@ -93,6 +93,7 @@ struct _spl_filesystem_object {
                        zend_function      *func_getCurr;
                        char               delimiter;
                        char               enclosure;
+                       char               escape;
                } file;
        } u;
        spl_filesystem_iterator    it;
index 3ea5ee8f8db9d7ff31c97c20a9194a65b1928122..14b0e4b8e8e928fd254864988c93da41341bdc56 100644 (file)
@@ -2260,12 +2260,12 @@ cleanup:
 /* }}} */
 
 PHPAPI void php_fgetcsv(php_stream *stream, /* {{{ */
-               char delimiter, char enclosure, 
-               size_t buf_len, char *buf,
-               zval *return_value TSRMLS_DC)
+                                               char delimiter, char enclosure, char escape,
+                                               size_t buf_len, char *buf,
+                                               zval *return_value TSRMLS_DC)
 {
-       char *delim = &delimiter, *enc = &enclosure, *buffer = buf;
-       int delim_len = 1, enc_len = 1, buffer_len = buf_len;
+       char *delim = &delimiter, *enc = &enclosure, *buffer = buf, *esc;
+       int delim_len = 1, enc_len = 1, esc_len = 1, buffer_len = buf_len;
        zend_uchar type = IS_STRING;
 
        if (stream) {
@@ -2285,22 +2285,30 @@ PHPAPI void php_fgetcsv(php_stream *stream, /* {{{ */
                        INIT_PZVAL(return_value);
                        return;
                }
+               if (FAILURE == zend_string_to_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), (UChar**)&esc, &esc_len, &escape, 1 TSRMLS_CC)) {
+                       efree(delim);
+                       efree(enc);
+                       INIT_PZVAL(return_value);
+                       return;
+               }
                if (FAILURE == zend_string_to_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), (UChar**)&buffer, &buffer_len, buf, buf_len TSRMLS_CC)) {
                        efree(delim);
                        efree(enc);
+                       efree(esc);
                        INIT_PZVAL(return_value);
                        return;
                }
 
-               php_u_fgetcsv(stream, (UChar*)delim, delim_len, (UChar*)enc, enc_len, &esc, 1,
+               php_u_fgetcsv(stream, (UChar*)delim, delim_len, (UChar*)enc, enc_len, (UChar*)esc, esc_len,
                                (UChar*)buffer, buffer_len, return_value TSRMLS_CC);
 
                /* Types converted, free storage */
                efree(delim);
                efree(enc);
+               efree(esc);
        } else {
                /* Binary stream with binary delimiter/enclosures/prefetch */
-               php_fgetcsv_ex(stream, delim, delim_len, enc, enc_len, "\\", 1, buffer, buffer_len, return_value TSRMLS_CC);
+               php_fgetcsv_ex(stream, delim, delim_len, enc, enc_len, esc, esc_len, buffer, buffer_len, return_value TSRMLS_CC);
        }
 }
 
@@ -2365,8 +2373,12 @@ ready_state:
                                }
 
                                /* Is it an escape character? */
-                               if (PHP_FGETCSV_BIN_CHECK(p, e, escape, escape_len)) {
-                                       /* Skip escape sequence and let next char be treated as literal */
+                               if ((PHP_FGETCSV_BIN_CHECK(p, e, escape, escape_len) && escape != enclosure)
+                                       || (PHP_FGETCSV_BIN_CHECK(p, e, escape, escape_len) 
+                                               && PHP_FGETCSV_BIN_CHECK(p+1, e, escape, escape_len) && escape == enclosure)) {
+                                       /* Skip escape sequence and let next char be treated as literal 
+                                          If enclosure is the same character as esacpe, it is considered as esacped
+                                          if it appears twice */
                                        p += escape_len;
                                        /* FALL THROUGH */
                                }
@@ -2569,8 +2581,12 @@ ready_state:
                                }
 
                                /* Is it an escape character? */
-                               if (PHP_FGETCSV_UNI_CHECK(p, e, escape, escape_len)) {
-                                       /* Skip escape sequence and let next char be treated as literal */
+                               if ((PHP_FGETCSV_UNI_CHECK(p, e, escape, escape_len) && escape != enclosure)
+                                       || (PHP_FGETCSV_UNI_CHECK(p, e, escape, escape_len) 
+                                               && PHP_FGETCSV_UNI_CHECK(p+1, e, escape, escape_len) && escape == enclosure)) {
+                                       /* Skip escape sequence and let next char be treated as literal 
+                                          If enclosure is the same character as esacpe, it is considered as esacped
+                                          if it appears twice */
                                        p += escape_len;
                                        /* FALL THROUGH */
                                }
index bee1decfe0adaecc60eba4f6da95512ed4266bcb..4dd0c66071be6dca5b90609eea17d4aaf44dbbcd 100644 (file)
@@ -78,7 +78,7 @@ PHPAPI int php_copy_file(char *src, char *dest TSRMLS_DC);
 PHPAPI int php_copy_file_ex(char *src, char *dest, int src_chk TSRMLS_DC);
 PHPAPI int php_mkdir_ex(char *dir, long mode, int options TSRMLS_DC);
 PHPAPI int php_mkdir(char *dir, long mode TSRMLS_DC);
-PHPAPI void php_fgetcsv(php_stream *stream, char delimiter, char enclosure, size_t buf_len, char *buf, zval *return_value TSRMLS_DC);
+PHPAPI void php_fgetcsv(php_stream *stream, char delimiter, char enclosure, char escape, size_t buf_len, char *buf, zval *return_value TSRMLS_DC);
 PHPAPI void php_fgetcsv_ex(php_stream *stream, char *delimiter, int delimiter_len, char *enclosure, int enclosure_len, char *escape, int escape_len,
                char *buffer, int buffer_len, zval *return_value TSRMLS_DC);
 PHPAPI void php_u_fgetcsv(php_stream *stream, UChar *delimiter, int delimiter_len, UChar *enclosure, int enclosure_len, UChar *escape, int escape_len,
diff --git a/ext/standard/tests/file/bug40501.csv b/ext/standard/tests/file/bug40501.csv
new file mode 100644 (file)
index 0000000..c786ed9
--- /dev/null
@@ -0,0 +1,2 @@
+"this element contains the delimiter, and ends with an odd number of
+backslashes (ex: 1)\",and it isn't the last element$
\ No newline at end of file
diff --git a/ext/standard/tests/file/bug40501.phpt b/ext/standard/tests/file/bug40501.phpt
new file mode 100644 (file)
index 0000000..110533f
--- /dev/null
@@ -0,0 +1,20 @@
+--TEST--
+Bug #40501 (fgetcsv() can't handle trailing odd number of backslashes)
+--FILE--
+<?php
+$file = dirname(__FILE__).'/bug40501.csv';
+
+$h = fopen($file, 'r');
+$data = fgetcsv($h, NULL, ',', '"', '"');
+fclose($h);
+
+var_dump($data);
+?>
+--EXPECT--
+array(2) {
+  [0]=>
+  string(88) "this element contains the delimiter, and ends with an odd number of
+backslashes (ex: 1)\"
+  [1]=>
+  string(30) "and it isn't the last element$"
+}
\ No newline at end of file
index d289431ff2314d54dce5ddb1c847c468a7485722..0ad42d62a2e9a478f27e138461bd4cd9ab02fbf8 100644 (file)
@@ -36,7 +36,7 @@ $invalid_args = array (
 for($loop_counter = 1; $loop_counter <= count($invalid_args); $loop_counter++) {
   echo "-- Iteration $loop_counter --\n";
   var_dump( fgetcsv($invalid_args[$loop_counter - 1]) ); // with default args
-  var_dump( fgetcsv($invalid_args[$loop_counter - 1], $len, $delim, $enclosure) ); // all args specified
+  var_dump( fgetcsv($invalid_args[$loop_counter - 1], $len, $delim, $enclosure, $escape) ); // all args specified
 }
 
 echo "Done\n";