Issue #18585: Add :func:`textwrap.shorten` to collapse and truncate a piece of text...

author Antoine Pitrou <solipsis@pitrou.net>

Mon, 12 Aug 2013 20:39:09 +0000 (22:39 +0200)

committer Antoine Pitrou <solipsis@pitrou.net>

Mon, 12 Aug 2013 20:39:09 +0000 (22:39 +0200)
author Antoine Pitrou <solipsis@pitrou.net>
Mon, 12 Aug 2013 20:39:09 +0000 (22:39 +0200)
committer Antoine Pitrou <solipsis@pitrou.net>
Mon, 12 Aug 2013 20:39:09 +0000 (22:39 +0200)
diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst

index c6252540707094c3736b954b0c8c30528d0e0561..486d5db84c9a7dfc29e877451114ed42bdce3511 100644 (file)
--- a/Doc/library/textwrap.rst
+++ b/Doc/library/textwrap.rst
@@ -10,11 +10,11 @@
  
  --------------
  
-The :mod:`textwrap` module provides two convenience functions, :func:`wrap` and
-:func:`fill`, as well as :class:`TextWrapper`, the class that does all the work,
-and two utility functions, :func:`dedent` and :func:`indent`.  If you're just wrapping or filling one
-or two  text strings, the convenience functions should be good enough;
-otherwise,  you should use an instance of :class:`TextWrapper` for efficiency.
+The :mod:`textwrap` module provides some convenience functions,
+as well as :class:`TextWrapper`, the class that does all the work.
+If you're just wrapping or filling one or two text strings, the convenience
+functions should be good enough; otherwise, you should use an instance of
+:class:`TextWrapper` for efficiency.
  
  .. function:: wrap(text, width=70, **kwargs)
  
@@ -39,19 +39,24 @@ otherwise,  you should use an instance of :class:`TextWrapper` for efficiency.
     In particular, :func:`fill` accepts exactly the same keyword arguments as
     :func:`wrap`.
  
-Both :func:`wrap` and :func:`fill` work by creating a :class:`TextWrapper`
-instance and calling a single method on it.  That instance is not reused, so for
-applications that wrap/fill many text strings, it will be more efficient for you
-to create your own :class:`TextWrapper` object.
  
-Text is preferably wrapped on whitespaces and right after the hyphens in
-hyphenated words; only then will long words be broken if necessary, unless
-:attr:`TextWrapper.break_long_words` is set to false.
+.. function:: shorten(text, width=70, *, placeholder=" (...)")
+
+   Collapse and truncate the given text to fit in the given width.
+
+   The text first has its whitespace collapsed.  If it then fits in
+   the *width*, it is returned unchanged.  Otherwise, as many words
+   as possible are joined and then the *placeholder* is appended::
+
+      >>> textwrap.shorten("Hello  world!", width=12)
+      'Hello world!'
+      >>> textwrap.shorten("Hello  world!", width=11)
+      'Hello (...)'
+      >>> textwrap.shorten("Hello world", width=10, placeholder="...")
+      'Hello...'
+
+   .. versionadded:: 3.4
  
-Two additional utility function, :func:`dedent` and :func:`indent`, are
-provided to remove indentation from strings that have unwanted whitespace
-to the left of the text and to add an arbitrary prefix to selected lines
-in a block of text.
  
  .. function:: dedent(text)
  
@@ -102,6 +107,16 @@ in a block of text.
        + world
  
  
+:func:`wrap`, :func:`fill` and :func:`shorten` work by creating a
+:class:`TextWrapper` instance and calling a single method on it.  That
+instance is not reused, so for applications that process many text
+strings, it may be more efficient to create your own
+:class:`TextWrapper` object.
+
+Text is preferably wrapped on whitespaces and right after the hyphens in
+hyphenated words; only then will long words be broken if necessary, unless
+:attr:`TextWrapper.break_long_words` is set to false.
+
  .. class:: TextWrapper(**kwargs)
  
     The :class:`TextWrapper` constructor accepts a number of optional keyword
@@ -235,7 +250,7 @@ in a block of text.
        was to always allow breaking hyphenated words.
  
  
-   :class:`TextWrapper` also provides two public methods, analogous to the
+   :class:`TextWrapper` also provides some public methods, analogous to the
     module-level convenience functions:
  
     .. method:: wrap(text)
@@ -252,3 +267,14 @@ in a block of text.
        Wraps the single paragraph in *text*, and returns a single string
        containing the wrapped paragraph.
  
+
+   .. function:: shorten(text, *, placeholder=" (...)")
+
+      Collapse and truncate the given text to fit in :attr:`width`
+      characters.
+
+      The text first has its whitespace collapsed.  If it then fits in
+      :attr:`width`, it is returned as-is.  Otherwise, as many words
+      as possible are joined and then the *placeholder* is appended.
+
+      .. versionadded:: 3.4
diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py

index c86f5cfae8d49ed2b60a592dcd6249ebe60bdcdc..20b7655f3069f5611dfcd185a365540dfb6c2121 100644 (file)
--- a/Lib/test/test_textwrap.py
+++ b/Lib/test/test_textwrap.py
@@ -9,9 +9,8 @@
  #
  
  import unittest
-from test import support
  
-from textwrap import TextWrapper, wrap, fill, dedent, indent
+from textwrap import TextWrapper, wrap, fill, dedent, indent, shorten
  
  
  class BaseTestCase(unittest.TestCase):
@@ -43,6 +42,10 @@ class BaseTestCase(unittest.TestCase):
                           "\nexpected %r\n"
                           "but got  %r" % (expect, result))
  
+    def check_shorten(self, text, width, expect, **kwargs):
+        result = shorten(text, width, **kwargs)
+        self.check(result, expect)
+
  
  class WrapTestCase(BaseTestCase):
  
@@ -777,12 +780,59 @@ class IndentTestCase(unittest.TestCase):
              self.assertEqual(indent(text, prefix, predicate), expect)
  
  
-def test_main():
-    support.run_unittest(WrapTestCase,
-                              LongWordTestCase,
-                              IndentTestCases,
-                              DedentTestCase,
-                              IndentTestCase)
+class ShortenTestCase(BaseTestCase):
+
+    def test_simple(self):
+        # Simple case: just words, spaces, and a bit of punctuation
+        text = "Hello there, how are you this fine day? I'm glad to hear it!"
+
+        self.check_shorten(text, 18, "Hello there, (...)")
+        self.check_shorten(text, len(text), text)
+        self.check_shorten(text, len(text) - 1,
+            "Hello there, how are you this fine day? "
+            "I'm glad to (...)")
+
+    def test_placeholder(self):
+        text = "Hello there, how are you this fine day? I'm glad to hear it!"
+
+        self.check_shorten(text, 17, "Hello there,$$", placeholder='$$')
+        self.check_shorten(text, 18, "Hello there, how$$", placeholder='$$')
+        self.check_shorten(text, 18, "Hello there, $$", placeholder=' $$')
+        self.check_shorten(text, len(text), text, placeholder='$$')
+        self.check_shorten(text, len(text) - 1,
+            "Hello there, how are you this fine day? "
+            "I'm glad to hear$$", placeholder='$$')
+
+    def test_empty_string(self):
+        self.check_shorten("", 6, "")
+
+    def test_whitespace(self):
+        # Whitespace collapsing
+        text = """
+            This is a  paragraph that  already has
+            line breaks and \t tabs too."""
+        self.check_shorten(text, 62,
+                             "This is a paragraph that already has line "
+                             "breaks and tabs too.")
+        self.check_shorten(text, 61,
+                             "This is a paragraph that already has line "
+                             "breaks and (...)")
+
+        self.check_shorten("hello      world!  ", 12, "hello world!")
+        self.check_shorten("hello      world!  ", 11, "hello (...)")
+        # The leading space is trimmed from the placeholder
+        # (it would be ugly otherwise).
+        self.check_shorten("hello      world!  ", 10, "(...)")
+
+    def test_width_too_small_for_placeholder(self):
+        wrapper = TextWrapper(width=8)
+        wrapper.shorten("x" * 20, placeholder="(......)")
+        with self.assertRaises(ValueError):
+            wrapper.shorten("x" * 20, placeholder="(.......)")
+
+    def test_first_word_too_long_but_placeholder_fits(self):
+        self.check_shorten("Helloo", 5, "(...)")
+
  
  if __name__ == '__main__':
-    test_main()
+    unittest.main()
diff --git a/Lib/textwrap.py b/Lib/textwrap.py

index 7024d4d245aed6e0c919c6150b37354eaef258f2..b19f124c2fb6293a51c7d71c20cc8c7024842ef7 100644 (file)
--- a/Lib/textwrap.py
+++ b/Lib/textwrap.py
@@ -19,6 +19,8 @@ __all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent']
  # since 0xa0 is not in range(128).
  _whitespace = '\t\n\x0b\x0c\r '
  
+_default_placeholder = ' (...)'
+
  class TextWrapper:
      """
      Object for wrapping/filling text.  The public interface consists of
@@ -277,6 +279,9 @@ class TextWrapper:
  
          return lines
  
+    def _split_chunks(self, text):
+        text = self._munge_whitespace(text)
+        return self._split(text)
  
      # -- Public interface ----------------------------------------------
  
@@ -289,8 +294,7 @@ class TextWrapper:
          and all other whitespace characters (including newline) are
          converted to space.
          """
-        text = self._munge_whitespace(text)
-        chunks = self._split(text)
+        chunks = self._split_chunks(text)
          if self.fix_sentence_endings:
              self._fix_sentence_endings(chunks)
          return self._wrap_chunks(chunks)
@@ -304,6 +308,36 @@ class TextWrapper:
          """
          return "\n".join(self.wrap(text))
  
+    def shorten(self, text, *, placeholder=_default_placeholder):
+        """shorten(text: str) -> str
+
+        Collapse and truncate the given text to fit in 'self.width' columns.
+        """
+        max_length = self.width
+        if max_length < len(placeholder.strip()):
+            raise ValueError("placeholder too large for max width")
+        sep = ' '
+        sep_len = len(sep)
+        parts = []
+        cur_len = 0
+        chunks = self._split_chunks(text)
+        for chunk in chunks:
+            if not chunk.strip():
+                continue
+            chunk_len = len(chunk) + sep_len if parts else len(chunk)
+            if cur_len + chunk_len > max_length:
+                break
+            parts.append(chunk)
+            cur_len += chunk_len
+        else:
+            # No truncation necessary
+            return sep.join(parts)
+        max_truncated_length = max_length - len(placeholder)
+        while parts and cur_len > max_truncated_length:
+            last = parts.pop()
+            cur_len -= len(last) + sep_len
+        return (sep.join(parts) + placeholder).strip()
+
  
  # -- Convenience interface ---------------------------------------------
  
@@ -332,6 +366,21 @@ def fill(text, width=70, **kwargs):
      w = TextWrapper(width=width, **kwargs)
      return w.fill(text)
  
+def shorten(text, width, *, placeholder=_default_placeholder, **kwargs):
+    """Collapse and truncate the given text to fit in the given width.
+
+    The text first has its whitespace collapsed.  If it then fits in
+    the *width*, it is returned as is.  Otherwise, as many words
+    as possible are joined and then the placeholder is appended::
+
+        >>> textwrap.shorten("Hello  world!", width=12)
+        'Hello world!'
+        >>> textwrap.shorten("Hello  world!", width=11)
+        'Hello (...)'
+    """
+    w = TextWrapper(width=width, **kwargs)
+    return w.shorten(text, placeholder=placeholder)
+
  
  # -- Loosely related functionality -------------------------------------
  
diff --git a/Misc/NEWS b/Misc/NEWS

index 28a0b580293e6e6d5cffc315bf679c50586c4b7b..29b0465355826ac5d565d5bd8af025a4f7bfdb0e 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -26,6 +26,9 @@ Core and Builtins
  Library
  -------
  
+- Issue #18585: Add :func:`textwrap.shorten` to collapse and truncate a
+  piece of text to a given length.
+
  - Issue #18598: Tweak exception message for importlib.import_module() to
    include the module name when a key argument is missing.
author	Antoine Pitrou <solipsis@pitrou.net>
	Mon, 12 Aug 2013 20:39:09 +0000 (22:39 +0200)
committer	Antoine Pitrou <solipsis@pitrou.net>
	Mon, 12 Aug 2013 20:39:09 +0000 (22:39 +0200)
Doc/library/textwrap.rst		patch \| blob \| history
Lib/test/test_textwrap.py		patch \| blob \| history
Lib/textwrap.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history