From: Skip Montanaro Date: Mon, 28 Sep 2009 02:12:27 +0000 (+0000) Subject: Patch from Thomas Barr so that csv.Sniffer will set doublequote property. X-Git-Tag: v2.7a1~453 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b4fd4d37a1b01620cd1bf66d61dcd481b13db411;p=python Patch from Thomas Barr so that csv.Sniffer will set doublequote property. Closes issue 6606. --- diff --git a/Lib/csv.py b/Lib/csv.py index ff51a86484..3db5dac5db 100644 --- a/Lib/csv.py +++ b/Lib/csv.py @@ -170,7 +170,7 @@ class Sniffer: Returns a dialect (or None) corresponding to the sample """ - quotechar, delimiter, skipinitialspace = \ + quotechar, doublequote, delimiter, skipinitialspace = \ self._guess_quote_and_delimiter(sample, delimiters) if not delimiter: delimiter, skipinitialspace = self._guess_delimiter(sample, @@ -184,8 +184,8 @@ class Sniffer: lineterminator = '\r\n' quoting = QUOTE_MINIMAL # escapechar = '' - doublequote = False + dialect.doublequote = doublequote dialect.delimiter = delimiter # _csv.reader won't accept a quotechar of '' dialect.quotechar = quotechar or '"' @@ -217,8 +217,8 @@ class Sniffer: break if not matches: - return ('', None, 0) # (quotechar, delimiter, skipinitialspace) - + # (quotechar, doublequote, delimiter, skipinitialspace) + return ('', False, None, 0) quotes = {} delims = {} spaces = 0 @@ -255,7 +255,19 @@ class Sniffer: delim = '' skipinitialspace = 0 - return (quotechar, delim, skipinitialspace) + # if we see an extra quote between delimiters, we've got a + # double quoted format + dq_regexp = re.compile(r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \ + {'delim':delim, 'quote':quotechar}, re.MULTILINE) + + + + if dq_regexp.search(data): + doublequote = True + else: + doublequote = False + + return (quotechar, doublequote, delim, skipinitialspace) def _guess_delimiter(self, data, delimiters): diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index aeb68bbfdc..cad59ac3d2 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -891,7 +891,7 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back 'Harry''s':'Arlington Heights':'IL':'2/1/03':'Kimi Hayes' 'Shark City':'Glendale Heights':'IL':'12/28/02':'Prezence' 'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow' -'Stonecutters Seafood and Chop House':'Lemont':'IL':'12/19/02':'Week Back' +'Stonecutters ''Seafood'' and Chop House':'Lemont':'IL':'12/19/02':'Week Back' """ header = '''\ "venue","city","state","date","performers" @@ -950,6 +950,13 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back self.assertEqual(dialect.delimiter, "|") self.assertEqual(dialect.quotechar, "'") + def test_doublequote(self): + sniffer = csv.Sniffer() + dialect = sniffer.sniff(self.header) + self.assertFalse(dialect.doublequote) + dialect = sniffer.sniff(self.sample2) + self.assertTrue(dialect.doublequote) + if not hasattr(sys, "gettotalrefcount"): if test_support.verbose: print "*** skipping leakage tests ***" else: