#15927: Fix cvs.reader parsing of escaped \r\n with quoting off.

author R David Murray <rdmurray@bitdance.com>

Wed, 20 Mar 2013 02:41:47 +0000 (22:41 -0400)

committer R David Murray <rdmurray@bitdance.com>

Wed, 20 Mar 2013 02:41:47 +0000 (22:41 -0400)
author R David Murray <rdmurray@bitdance.com>
Wed, 20 Mar 2013 02:41:47 +0000 (22:41 -0400)
committer R David Murray <rdmurray@bitdance.com>
Wed, 20 Mar 2013 02:41:47 +0000 (22:41 -0400)
diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py

index 3dc3836015546a0461528393da7e301790a3f32a..974d73d02fc41305ea283a21a202a2b63ebaa40e 100644 (file)
--- a/Lib/test/test_csv.py
+++ b/Lib/test/test_csv.py
@@ -308,6 +308,15 @@ class Test_Csv(unittest.TestCase):
              for i, row in enumerate(csv.reader(fileobj)):
                  self.assertEqual(row, rows[i])
  
+    def test_roundtrip_escaped_unquoted_newlines(self):
+        with TemporaryFile("w+", newline='') as fileobj:
+            writer = csv.writer(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")
+            rows = [['a\nb','b'],['c','x\r\nd']]
+            writer.writerows(rows)
+            fileobj.seek(0)
+            for i, row in enumerate(csv.reader(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")):
+                self.assertEqual(row,rows[i])
+
  class TestDialectRegistry(unittest.TestCase):
      def test_registry_badargs(self):
          self.assertRaises(TypeError, csv.list_dialects, None)
diff --git a/Misc/ACKS b/Misc/ACKS

index 976fc56e0d4075ce5374519398a77abf826a1eff..600e0bf03c5129c650080391d288c1e21191d801 100644 (file)
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -591,6 +591,7 @@ Orjan Johansen
  Fredrik Johansson
  Gregory K. Johnson
  Kent Johnson
+Michael Johnson
  Simon Johnston
  Matt Joiner
  Thomas Jollans
diff --git a/Misc/NEWS b/Misc/NEWS

index d3e98b3e8664b4014218472cc41a8f5835e7a774..792b62f8fb3475dbbb03a97f711aa4b5ce2bf834 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -289,6 +289,9 @@ Core and Builtins
  Library
  -------
  
+- Issue #15927: CVS now correctly parses escaped newlines and carriage
+  when parsing with quoting turned off.
+
  - Issue #17467: add readline and readlines support to mock_open in
    unittest.mock.
  
diff --git a/Modules/_csv.c b/Modules/_csv.c

index 48a5cf809420129566b90d7ccad0e5a642745bf1..39f9d7d377e607903e19937958759a6c84c90d3b 100644 (file)
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@@ -51,7 +51,7 @@ static struct PyModuleDef _csvmodule;
  typedef enum {
      START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
      IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
-    EAT_CRNL
+    EAT_CRNL,AFTER_ESCAPED_CRNL
  } ParserState;
  
  typedef enum {
@@ -644,6 +644,12 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
          break;
  
      case ESCAPED_CHAR:
+        if (c == '\n' | c=='\r') {
+            if (parse_add_char(self, c) < 0)
+                return -1;
+            self->state = AFTER_ESCAPED_CRNL;
+            break;
+        }
          if (c == '\0')
              c = '\n';
          if (parse_add_char(self, c) < 0)
@@ -651,6 +657,11 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
          self->state = IN_FIELD;
          break;
  
+    case AFTER_ESCAPED_CRNL:
+        if (c == '\0')
+            break;
+        /*fallthru*/
+
      case IN_FIELD:
          /* in unquoted field */
          if (c == '\n' || c == '\r' || c == '\0') {
author	R David Murray <rdmurray@bitdance.com>
	Wed, 20 Mar 2013 02:41:47 +0000 (22:41 -0400)
committer	R David Murray <rdmurray@bitdance.com>
	Wed, 20 Mar 2013 02:41:47 +0000 (22:41 -0400)
Lib/test/test_csv.py		patch \| blob \| history
Misc/ACKS		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Modules/_csv.c		patch \| blob \| history