]> granicus.if.org Git - python/commitdiff
SF #965425: fix so hyphenated words surrounded by punctuation are
authorGreg Ward <gward@python.net>
Thu, 3 Jun 2004 01:59:41 +0000 (01:59 +0000)
committerGreg Ward <gward@python.net>
Thu, 3 Jun 2004 01:59:41 +0000 (01:59 +0000)
wrapped correctly.

Lib/test/test_textwrap.py
Lib/textwrap.py

index 5ff4bcca296084a3e601ae422a976ef4b00d546f..8c7279d8f81035a0a1fb83d8fdd236d73dcc900f 100644 (file)
@@ -1,5 +1,5 @@
 #
-# Test script for the textwrap module.
+# Test suite for the textwrap module.
 #
 # Original tests written by Greg Ward <gward@python.net>.
 # Converted to PyUnit by Peter Hansen <peter@engcorp.com>.
@@ -271,6 +271,23 @@ What a mess!
         self.check_split("foo --option-opt bar",
                          ["foo", " ", "--option-", "opt", " ", "bar"])
 
+    def test_punct_hyphens(self):
+        # Oh bother, SF #965425 found another problem with hyphens --
+        # hyphenated words in single quotes weren't handled correctly.
+        # In fact, the bug is that *any* punctuation around a hyphenated
+        # word was handled incorrectly, except for a leading "--", which
+        # was special-cased for Optik and Docutils.  So test a variety
+        # of styles of punctuation around a hyphenated word.
+        # (Actually this is based on an Optik bug report, #813077).
+        self.check_split("the 'wibble-wobble' widget",
+                         ['the', ' ', "'wibble-", "wobble'", ' ', 'widget'])
+        self.check_split('the "wibble-wobble" widget',
+                         ['the', ' ', '"wibble-', 'wobble"', ' ', 'widget'])
+        self.check_split("the (wibble-wobble) widget",
+                         ['the', ' ', "(wibble-", "wobble)", ' ', 'widget'])
+        self.check_split("the ['wibble-wobble'] widget",
+                         ['the', ' ', "['wibble-", "wobble']", ' ', 'widget'])
+
     def test_funky_parens (self):
         # Second part of SF bug #596434: long option strings inside
         # parentheses.
index d9df01928c61dbc77bacfc1dc960a5233d1b3b90..32ab10bfbacd71573ead7cd0c330a9ecd66dabfc 100644 (file)
@@ -79,11 +79,11 @@ class TextWrapper:
     #   Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
     # (after stripping out empty strings).
     wordsep_re = re.compile(r'(\s+|'                  # any whitespace
-                            r'-*\w{2,}-(?=\w{2,})|'   # hyphenated words
+                            r'[^\s\w]*\w{2,}-(?=\w{2,})|' # hyphenated words
                             r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')   # em-dash
 
-    # XXX will there be a locale-or-charset-aware version of
-    # string.lowercase in 2.3?
+    # XXX this is not locale- or charset-aware -- string.lowercase
+    # is US-ASCII only (and therefore English-only)
     sentence_end_re = re.compile(r'[%s]'              # lowercase letter
                                  r'[\.\!\?]'          # sentence-ending punct.
                                  r'[\"\']?'           # optional end-of-quote