]> granicus.if.org Git - python/commitdiff
#18155: Regex-escape delimiter, in case it is a regex special char.
authorR David Murray <rdmurray@bitdance.com>
Sat, 29 Jun 2013 22:40:53 +0000 (18:40 -0400)
committerR David Murray <rdmurray@bitdance.com>
Sat, 29 Jun 2013 22:40:53 +0000 (18:40 -0400)
Patch by Vajrasky Kok, with slight modification to the tests by me.

Lib/csv.py
Lib/test/test_csv.py
Misc/ACKS
Misc/NEWS

index 8dfc77e31086f2df74f02304f957eaf0e44d33a1..da3bc44e7acc0fb6556a3ca3799dc638dbe8f5b0 100644 (file)
@@ -264,8 +264,9 @@ class Sniffer:
 
         # if we see an extra quote between delimiters, we've got a
         # double quoted format
-        dq_regexp = re.compile(r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
-                               {'delim':delim, 'quote':quotechar}, re.MULTILINE)
+        dq_regexp = re.compile(
+                               r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
+                               {'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
 
 
 
index 96f8aa7ee19a6fa6e90291da4e19dc5b1603824f..5e285865a792d1e2b0ec87481ed928a9992c30e9 100644 (file)
@@ -796,7 +796,7 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
 'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow'
 'Stonecutters ''Seafood'' and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
 """
-    header = '''\
+    header1 = '''\
 "venue","city","state","date","performers"
 '''
     sample3 = '''\
@@ -815,10 +815,35 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
     sample6 = "a|b|c\r\nd|e|f\r\n"
     sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n"
 
+# Issue 18155: Use a delimiter that is a special char to regex:
+
+    header2 = '''\
+"venue"+"city"+"state"+"date"+"performers"
+'''
+    sample8 = """\
+Harry's+ Arlington Heights+ IL+ 2/1/03+ Kimi Hayes
+Shark City+ Glendale Heights+ IL+ 12/28/02+ Prezence
+Tommy's Place+ Blue Island+ IL+ 12/28/02+ Blue Sunday/White Crow
+Stonecutters Seafood and Chop House+ Lemont+ IL+ 12/19/02+ Week Back
+"""
+    sample9 = """\
+'Harry''s'+ Arlington Heights'+ 'IL'+ '2/1/03'+ 'Kimi Hayes'
+'Shark City'+ Glendale Heights'+' IL'+ '12/28/02'+ 'Prezence'
+'Tommy''s Place'+ Blue Island'+ 'IL'+ '12/28/02'+ 'Blue Sunday/White Crow'
+'Stonecutters ''Seafood'' and Chop House'+ 'Lemont'+ 'IL'+ '12/19/02'+ 'Week Back'
+"""
+
     def test_has_header(self):
         sniffer = csv.Sniffer()
         self.assertEqual(sniffer.has_header(self.sample1), False)
-        self.assertEqual(sniffer.has_header(self.header+self.sample1), True)
+        self.assertEqual(sniffer.has_header(self.header1 + self.sample1),
+                         True)
+
+    def test_has_header_regex_special_delimiter(self):
+        sniffer = csv.Sniffer()
+        self.assertEqual(sniffer.has_header(self.sample8), False)
+        self.assertEqual(sniffer.has_header(self.header2 + self.sample8),
+                         True)
 
     def test_sniff(self):
         sniffer = csv.Sniffer()
@@ -852,13 +877,24 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
         dialect = sniffer.sniff(self.sample7)
         self.assertEqual(dialect.delimiter, "|")
         self.assertEqual(dialect.quotechar, "'")
+        dialect = sniffer.sniff(self.sample8)
+        self.assertEqual(dialect.delimiter, '+')
+        dialect = sniffer.sniff(self.sample9)
+        self.assertEqual(dialect.delimiter, '+')
+        self.assertEqual(dialect.quotechar, "'")
 
     def test_doublequote(self):
         sniffer = csv.Sniffer()
-        dialect = sniffer.sniff(self.header)
+        dialect = sniffer.sniff(self.header1)
+        self.assertFalse(dialect.doublequote)
+        dialect = sniffer.sniff(self.header2)
         self.assertFalse(dialect.doublequote)
         dialect = sniffer.sniff(self.sample2)
         self.assertTrue(dialect.doublequote)
+        dialect = sniffer.sniff(self.sample8)
+        self.assertFalse(dialect.doublequote)
+        dialect = sniffer.sniff(self.sample9)
+        self.assertTrue(dialect.doublequote)
 
 if not hasattr(sys, "gettotalrefcount"):
     if support.verbose: print("*** skipping leakage tests ***")
index 4e0fcbe2565e4dceebab2c7a76edf898a259b56b..71402b71ac1b45b180e58a07d0ffdc08a3fde15a 100644 (file)
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -656,6 +656,7 @@ Kubilay Kocak
 Greg Kochanski
 Damon Kohler
 Marko Kohtala
+Vajrasky Kok
 Guido Kollerie
 Jacek Konieczny
 Марк Коренберг
index e3070dfd412ba3b8166bc147049a7dabd0b61262..884ae94dde04ec7724ae73528fb7d926aea0780d 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -38,6 +38,10 @@ Core and Builtins
 Library
 -------
 
+- Issue #18155: The csv module now correctly handles csv files that use
+  a delimter character that has a special meaning in regexes, instead of
+  throwing an exception.
+
 - Issue #14360: encode_quopri can now be successfully used as an encoder
   when constructing a MIMEApplication object.
 
@@ -50,7 +54,7 @@ Library
 
 - Issue #18259: Declare sethostname in socketmodule.c for AIX
 
-- Issue #18167: cgi.FieldStorage no more fails to handle multipart/form-data
+- Issue #18167: cgi.FieldStorage no longer fails to handle multipart/form-data
   when \r\n appears at end of 65535 bytes without other newlines.
 
 - subprocess: Prevent a possible double close of parent pipe fds when the