From: Serhiy Storchaka Date: Fri, 10 Oct 2014 08:06:31 +0000 (+0300) Subject: Issue 1519638: Now unmatched groups are replaced with empty strings in re.sub() X-Git-Tag: v3.5.0a1~718 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7438e4b56fa6a34a021f11e1220331e841419b96;p=python Issue 1519638: Now unmatched groups are replaced with empty strings in re.sub() and re.subn(). --- diff --git a/Doc/library/re.rst b/Doc/library/re.rst index edb24864f2..48e3006ef9 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -701,6 +701,9 @@ form. .. versionchanged:: 3.1 Added the optional flags argument. + .. versionchanged:: 3.5 + Unmatched groups are replaced with an empty string. + .. function:: subn(pattern, repl, string, count=0, flags=0) @@ -710,6 +713,9 @@ form. .. versionchanged:: 3.1 Added the optional flags argument. + .. versionchanged:: 3.5 + Unmatched groups are replaced with an empty string. + .. function:: escape(string) @@ -885,6 +891,8 @@ Match objects support the following methods and attributes: (``\g<1>``, ``\g``) are replaced by the contents of the corresponding group. + .. versionchanged:: 3.5 + Unmatched groups are replaced with an empty string. .. method:: match.group([group1, ...]) diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst index feca241cf0..319284a8cc 100644 --- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -223,6 +223,9 @@ re * Number of capturing groups in regular expression is no longer limited by 100. (Contributed by Serhiy Storchaka in :issue:`22437`.) +* Now unmatched groups are replaced with empty strings in :func:`re.sub` + and :func:`re.subn`. (Contributed by Serhiy Storchaka in :issue:`1519638`.) + shutil ------ diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index b9a1852823..063d1b7fda 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -880,14 +880,12 @@ def parse_template(source, pattern): def expand_template(template, match): g = match.group - sep = match.string[:0] + empty = match.string[:0] groups, literals = template literals = literals[:] try: for index, group in groups: - literals[index] = s = g(group) - if s is None: - raise error("unmatched group") + literals[index] = g(group) or empty except IndexError: raise error("invalid group reference") - return sep.join(literals) + return empty.join(literals) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index e5ad6cb6bc..0e4fa88f5d 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -225,9 +225,11 @@ class ReTests(unittest.TestCase): self.assertRaises(re.error, re.sub, '(?Px)', '\g', 'xx') self.assertRaises(re.error, re.sub, '(?Px)', '\g<>', 'xx') self.assertRaises(re.error, re.sub, '(?Px)', '\g<1a1>', 'xx') + self.assertRaises(re.error, re.sub, '(?Px)', r'\g<2>', 'xx') + self.assertRaises(re.error, re.sub, '(?Px)', r'\2', 'xx') self.assertRaises(IndexError, re.sub, '(?Px)', '\g', 'xx') - self.assertRaises(re.error, re.sub, '(?Px)|(?Py)', '\g', 'xx') - self.assertRaises(re.error, re.sub, '(?Px)|(?Py)', '\\2', 'xx') + self.assertEqual(re.sub('(?Px)|(?Py)', r'\g', 'xx'), '') + self.assertEqual(re.sub('(?Px)|(?Py)', r'\2', 'xx'), '') self.assertRaises(re.error, re.sub, '(?Px)', '\g<-1>', 'xx') # New valid/invalid identifiers in Python 3 self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx') @@ -439,6 +441,10 @@ class ReTests(unittest.TestCase): "first second") .expand(r"\2 \1 \g \g"), "second first second first") + self.assertEqual(re.match("(?Pfirst)|(?Psecond)", + "first") + .expand(r"\2 \g"), + " ") def test_repeat_minmax(self): self.assertIsNone(re.match("^(\w){1}$", "abc")) diff --git a/Misc/NEWS b/Misc/NEWS index 21ee7b2cbf..c3932d7eac 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -166,7 +166,10 @@ Core and Builtins Library ------- -- Issue $18615: sndhdr.what/whathdr now return a namedtuple. +- Issue 1519638: Now unmatched groups are replaced with empty strings in re.sub() + and re.subn(). + +- Issue #18615: sndhdr.what/whathdr now return a namedtuple. - Issue #22462: Fix pyexpat's creation of a dummy frame to make it appear in exception tracebacks.