quotechar, delimiter, skipinitialspace = \
self._guess_quote_and_delimiter(sample, delimiters)
- if delimiter is None:
+ if not delimiter:
delimiter, skipinitialspace = self._guess_delimiter(sample,
delimiters)
+ if not delimiter:
+ raise Error, "Could not determine delimiter"
+
class dialect(Dialect):
_name = "sniffed"
lineterminator = '\r\n'
data[0].count("%c " % d))
return (d, skipinitialspace)
- # finally, just return the first damn character in the list
- delim = delims.keys()[0]
+ # nothing else indicates a preference, pick the character that
+ # dominates(?)
+ items = [(v,k) for (k,v) in delims.items()]
+ items.sort()
+ delim = items[-1][1]
+
skipinitialspace = (data[0].count(delim) ==
data[0].count("%c " % delim))
return (delim, skipinitialspace)
'''
sample5 = "aaa\tbbb\r\nAAA\t\r\nBBB\t\r\n"
+ sample6 = "a|b|c\r\nd|e|f\r\n"
+ sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n"
def test_has_header(self):
sniffer = csv.Sniffer()
self.assertEqual(dialect.delimiter, ";")
dialect = sniffer.sniff(self.sample5)
self.assertEqual(dialect.delimiter, "\t")
+ dialect = sniffer.sniff(self.sample6)
+ self.assertEqual(dialect.delimiter, "|")
+ dialect = sniffer.sniff(self.sample7)
+ self.assertEqual(dialect.delimiter, "|")
+ self.assertEqual(dialect.quotechar, "'")
if not hasattr(sys, "gettotalrefcount"):
if test_support.verbose: print "*** skipping leakage tests ***"