]> granicus.if.org Git - python/commitdiff
Issue #22362: Forbidden ambiguous octal escapes out of range 0-0o377 in
authorSerhiy Storchaka <storchaka@gmail.com>
Tue, 23 Sep 2014 20:22:41 +0000 (23:22 +0300)
committerSerhiy Storchaka <storchaka@gmail.com>
Tue, 23 Sep 2014 20:22:41 +0000 (23:22 +0300)
regular expressions.

Lib/sre_parse.py
Lib/test/test_re.py
Misc/NEWS

index b56d437c3479d04fc1f0351071bad85c9d8d7be1..7fd145b6233265001c1f2ea423d09e8a20edfac8 100644 (file)
@@ -295,7 +295,11 @@ def _class_escape(source, escape):
         elif c in OCTDIGITS:
             # octal escape (up to three digits)
             escape += source.getwhile(2, OCTDIGITS)
-            return LITERAL, int(escape[1:], 8) & 0xff
+            c = int(escape[1:], 8)
+            if c > 0o377:
+                raise error('octal escape value %r outside of '
+                            'range 0-0o377' % escape)
+            return LITERAL, c
         elif c in DIGITS:
             raise ValueError
         if len(escape) == 2:
@@ -337,7 +341,7 @@ def _escape(source, escape, state):
         elif c == "0":
             # octal escape
             escape += source.getwhile(2, OCTDIGITS)
-            return LITERAL, int(escape[1:], 8) & 0xff
+            return LITERAL, int(escape[1:], 8)
         elif c in DIGITS:
             # octal escape *or* decimal group reference (sigh)
             if source.next in DIGITS:
@@ -346,7 +350,11 @@ def _escape(source, escape, state):
                     source.next in OCTDIGITS):
                     # got three octal digits; this is an octal escape
                     escape = escape + source.get()
-                    return LITERAL, int(escape[1:], 8) & 0xff
+                    c = int(escape[1:], 8)
+                    if c > 0o377:
+                        raise error('octal escape value %r outside of '
+                                    'range 0-0o377' % escape)
+                    return LITERAL, c
             # not an octal escape, so this is a group reference
             group = int(escape[1:])
             if group < state.groups:
@@ -837,7 +845,11 @@ def parse_template(source, pattern):
                         s.next in OCTDIGITS):
                         this += sget()
                         isoctal = True
-                        lappend(chr(int(this[1:], 8) & 0xff))
+                        c = int(this[1:], 8)
+                        if c > 0o377:
+                            raise error('octal escape value %r outside of '
+                                        'range 0-0o377' % this)
+                        lappend(chr(c))
                 if not isoctal:
                     addgroup(int(this[1:]))
             else:
index 0584f199c2bce74f86538bd3085de39323063f1a..d85b767f12ca3dc257a65d3a0ef4d247a7640e28 100644 (file)
@@ -154,8 +154,8 @@ class ReTests(unittest.TestCase):
         self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
         self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
 
-        self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
-        self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
+        self.assertRaises(re.error, re.sub, 'x', r'\400', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\777', 'x')
 
         self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
         self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
@@ -700,7 +700,7 @@ class ReTests(unittest.TestCase):
         self.assertTrue(re.match(r"\08", "\0008"))
         self.assertTrue(re.match(r"\01", "\001"))
         self.assertTrue(re.match(r"\018", "\0018"))
-        self.assertTrue(re.match(r"\567", chr(0o167)))
+        self.assertRaises(re.error, re.match, r"\567", "")
         self.assertRaises(re.error, re.match, r"\911", "")
         self.assertRaises(re.error, re.match, r"\x1", "")
         self.assertRaises(re.error, re.match, r"\x1z", "")
@@ -728,12 +728,13 @@ class ReTests(unittest.TestCase):
             self.assertTrue(re.match(r"[\U%08x]" % i, chr(i)))
             self.assertTrue(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
             self.assertTrue(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
-        self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
+        self.assertRaises(re.error, re.match, r"[\567]", "")
         self.assertRaises(re.error, re.match, r"[\911]", "")
         self.assertRaises(re.error, re.match, r"[\x1z]", "")
         self.assertRaises(re.error, re.match, r"[\u123z]", "")
         self.assertRaises(re.error, re.match, r"[\U0001234z]", "")
         self.assertRaises(re.error, re.match, r"[\U00110000]", "")
+        self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
 
     def test_sre_byte_literals(self):
         for i in [0, 8, 16, 32, 64, 127, 128, 255]:
@@ -749,7 +750,7 @@ class ReTests(unittest.TestCase):
         self.assertTrue(re.match(br"\08", b"\0008"))
         self.assertTrue(re.match(br"\01", b"\001"))
         self.assertTrue(re.match(br"\018", b"\0018"))
-        self.assertTrue(re.match(br"\567", bytes([0o167])))
+        self.assertRaises(re.error, re.match, br"\567", b"")
         self.assertRaises(re.error, re.match, br"\911", b"")
         self.assertRaises(re.error, re.match, br"\x1", b"")
         self.assertRaises(re.error, re.match, br"\x1z", b"")
@@ -766,6 +767,7 @@ class ReTests(unittest.TestCase):
             self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
         self.assertTrue(re.match(br"[\u]", b'u'))
         self.assertTrue(re.match(br"[\U]", b'U'))
+        self.assertRaises(re.error, re.match, br"[\567]", b"")
         self.assertRaises(re.error, re.match, br"[\911]", b"")
         self.assertRaises(re.error, re.match, br"[\x1z]", b"")
 
index 3a9d086fc55ebeabf2e83a5a38bd6e305f208b8a..9c6f7dbd6c58026cf5161efb387f0974a00154a9 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -137,6 +137,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #22362: Forbidden ambiguous octal escapes out of range 0-0o377 in
+  regular expressions.
+
 - Issue #20912: Now directories added to ZIP file have correct Unix and MS-DOS
   directory attributes.