]> granicus.if.org Git - python/commitdiff
Patch from Thomas Barr so that csv.Sniffer will set doublequote property.
authorSkip Montanaro <skip@pobox.com>
Mon, 28 Sep 2009 02:12:27 +0000 (02:12 +0000)
committerSkip Montanaro <skip@pobox.com>
Mon, 28 Sep 2009 02:12:27 +0000 (02:12 +0000)
Closes issue 6606.

Lib/csv.py
Lib/test/test_csv.py

index ff51a8648447c5dd198958944efc12ce5c09383d..3db5dac5db666380c03d77473fa05efcbbb075bf 100644 (file)
@@ -170,7 +170,7 @@ class Sniffer:
         Returns a dialect (or None) corresponding to the sample
         """
 
-        quotechar, delimiter, skipinitialspace = \
+        quotechar, doublequote, delimiter, skipinitialspace = \
                    self._guess_quote_and_delimiter(sample, delimiters)
         if not delimiter:
             delimiter, skipinitialspace = self._guess_delimiter(sample,
@@ -184,8 +184,8 @@ class Sniffer:
             lineterminator = '\r\n'
             quoting = QUOTE_MINIMAL
             # escapechar = ''
-            doublequote = False
 
+        dialect.doublequote = doublequote
         dialect.delimiter = delimiter
         # _csv.reader won't accept a quotechar of ''
         dialect.quotechar = quotechar or '"'
@@ -217,8 +217,8 @@ class Sniffer:
                 break
 
         if not matches:
-            return ('', None, 0) # (quotechar, delimiter, skipinitialspace)
-
+            # (quotechar, doublequote, delimiter, skipinitialspace)
+            return ('', False, None, 0)
         quotes = {}
         delims = {}
         spaces = 0
@@ -255,7 +255,19 @@ class Sniffer:
             delim = ''
             skipinitialspace = 0
 
-        return (quotechar, delim, skipinitialspace)
+        # if we see an extra quote between delimiters, we've got a
+        # double quoted format
+        dq_regexp = re.compile(r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
+                               {'delim':delim, 'quote':quotechar}, re.MULTILINE)
+
+
+
+        if dq_regexp.search(data):
+            doublequote = True
+        else:
+            doublequote = False
+
+        return (quotechar, doublequote, delim, skipinitialspace)
 
 
     def _guess_delimiter(self, data, delimiters):
index aeb68bbfdc5c90d364a5f02e4a50ba04a42ac7c9..cad59ac3d2929c26a0bb9838955a550a7682bbae 100644 (file)
@@ -891,7 +891,7 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
 'Harry''s':'Arlington Heights':'IL':'2/1/03':'Kimi Hayes'
 'Shark City':'Glendale Heights':'IL':'12/28/02':'Prezence'
 'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow'
-'Stonecutters Seafood and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
+'Stonecutters ''Seafood'' and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
 """
     header = '''\
 "venue","city","state","date","performers"
@@ -950,6 +950,13 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
         self.assertEqual(dialect.delimiter, "|")
         self.assertEqual(dialect.quotechar, "'")
 
+    def test_doublequote(self):
+        sniffer = csv.Sniffer()
+        dialect = sniffer.sniff(self.header)
+        self.assertFalse(dialect.doublequote)
+        dialect = sniffer.sniff(self.sample2)
+        self.assertTrue(dialect.doublequote)
+
 if not hasattr(sys, "gettotalrefcount"):
     if test_support.verbose: print "*** skipping leakage tests ***"
 else: