]> granicus.if.org Git - python/commitdiff
Issue #13169: The maximal repetition number in a regular expression has been
authorSerhiy Storchaka <storchaka@gmail.com>
Sat, 16 Feb 2013 14:47:47 +0000 (16:47 +0200)
committerSerhiy Storchaka <storchaka@gmail.com>
Sat, 16 Feb 2013 14:47:47 +0000 (16:47 +0200)
increased from 65534 to 2147483647 (on 32-bit platform) or 4294967294 (on
64-bit).

Lib/sre_compile.py
Lib/sre_constants.py
Lib/sre_parse.py
Lib/test/test_re.py
Misc/NEWS
Modules/_sre.c
Modules/sre.h

index f52ea014597665f3fdd5750204614c4c1b0c02f4..46eac9c070a8f60ad5ec72dc22ca77b3e262c679 100644 (file)
@@ -13,6 +13,7 @@
 import _sre, sys
 import sre_parse
 from sre_constants import *
+from _sre import MAXREPEAT
 
 assert _sre.MAGIC == MAGIC, "SRE module mismatch"
 
index 417670bf8dbd79ba0195ee5d76f051df730cb474..71ccb23f6210eac46b3be7b0a3c338240cb22914 100644 (file)
 
 MAGIC = 20031017
 
-# max code word in this release
-
-MAXREPEAT = 65535
-
 # SRE standard exception (access as sre.error)
 # should this really be here?
 
index 19dd4fc4bce1cd1d9fd80246216a96d5aa7fc0a1..045a5ebfef50ea55384a92255b708f4f5be55927 100644 (file)
@@ -15,6 +15,7 @@
 import sys
 
 from sre_constants import *
+from _sre import MAXREPEAT
 
 SPECIAL_CHARS = ".\\[{()*+?^$|"
 REPEAT_CHARS = "*+?{"
@@ -505,10 +506,14 @@ def _parse(source, state):
                     continue
                 if lo:
                     min = int(lo)
+                    if min >= MAXREPEAT:
+                        raise OverflowError("the repetition number is too large")
                 if hi:
                     max = int(hi)
-                if max < min:
-                    raise error("bad repeat interval")
+                    if max >= MAXREPEAT:
+                        raise OverflowError("the repetition number is too large")
+                    if max < min:
+                        raise error("bad repeat interval")
             else:
                 raise error("not supported")
             # figure out which item to repeat
index 6b047e48dbb7fa2432fe4d277a50b39e9b2a0f17..f7e76dcfbade00827a6c852266156a4001183e3a 100644 (file)
@@ -1,4 +1,5 @@
-from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G
+from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
+        cpython_only
 import io
 import re
 from re import Scanner
@@ -883,6 +884,37 @@ class ReTests(unittest.TestCase):
         self.assertEqual(n, size + 1)
 
 
+    def test_repeat_minmax_overflow(self):
+        # Issue #13169
+        string = "x" * 100000
+        self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
+        self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
+        self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
+        # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
+        self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
+        self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
+        self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
+        self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
+
+    @cpython_only
+    def test_repeat_minmax_overflow_maxrepeat(self):
+        try:
+            from _sre import MAXREPEAT
+        except ImportError:
+            self.skipTest('requires _sre.MAXREPEAT constant')
+        string = "x" * 100000
+        self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
+        self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
+                         (0, 100000))
+        self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
+        self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
+        self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
+        self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
+
+
 def run_re_tests():
     from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
     if verbose:
index 4bc6584c043c994cce4175b5eb1a13dc9ad563a4..83405c424b7d99560882563b67e52d0913c81589 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -224,6 +224,10 @@ Core and Builtins
 Library
 -------
 
+- Issue #13169: The maximal repetition number in a regular expression has been
+  increased from 65534 to 2147483647 (on 32-bit platform) or 4294967294 (on
+  64-bit).
+
 - Issue #16743: Fix mmap overflow check on 32 bit Windows.
 
 - Issue #16800: tempfile.gettempdir() no longer left temporary files when
index 88bbf6a941e0485372b403b5139cad1f87ca3d6a..4421eae83acdc1438d50e24bb3e269920849f723 100644 (file)
@@ -517,7 +517,7 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
     Py_ssize_t i;
 
     /* adjust end */
-    if (maxcount < end - ptr && maxcount != 65535)
+    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
         end = ptr + maxcount;
 
     switch (pattern[0]) {
@@ -1132,7 +1132,7 @@ entrance:
             } else {
                 /* general case */
                 LASTMARK_SAVE();
-                while ((Py_ssize_t)ctx->pattern[2] == 65535
+                while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
                        || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
                     state->ptr = ctx->ptr;
                     DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
@@ -1218,7 +1218,7 @@ entrance:
             }
 
             if ((ctx->count < ctx->u.rep->pattern[2] ||
-                ctx->u.rep->pattern[2] == 65535) &&
+                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
                 state->ptr != ctx->u.rep->last_ptr) {
                 /* we may have enough matches, but if we can
                    match another item, do so */
@@ -1296,7 +1296,7 @@ entrance:
             LASTMARK_RESTORE();
 
             if (ctx->count >= ctx->u.rep->pattern[2]
-                && ctx->u.rep->pattern[2] != 65535)
+                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT)
                 RETURN_FAILURE;
 
             ctx->u.rep->count = ctx->count;
@@ -3072,7 +3072,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
                 GET_ARG; max = arg;
                 if (min > max)
                     FAIL;
-                if (max > 65535)
+                if (max > SRE_MAXREPEAT)
                     FAIL;
                 if (!_validate_inner(code, code+skip-4, groups))
                     FAIL;
@@ -3091,7 +3091,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
                 GET_ARG; max = arg;
                 if (min > max)
                     FAIL;
-                if (max > 65535)
+                if (max > SRE_MAXREPEAT)
                     FAIL;
                 if (!_validate_inner(code, code+skip-3, groups))
                     FAIL;
@@ -3979,6 +3979,12 @@ PyMODINIT_FUNC PyInit__sre(void)
         Py_DECREF(x);
     }
 
+    x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
+    if (x) {
+        PyDict_SetItemString(d, "MAXREPEAT", x);
+        Py_DECREF(x);
+    }
+
     x = PyUnicode_FromString(copyright);
     if (x) {
         PyDict_SetItemString(d, "copyright", x);
index d389b46dd136cac21dd0da6130d9bec9e405d897..68196985584f5afb6595997b118a2afefbd4525f 100644 (file)
 /* size of a code word (must be unsigned short or larger, and
    large enough to hold a UCS4 character) */
 #define SRE_CODE Py_UCS4
+#if SIZEOF_SIZE_T > 4
+# define SRE_MAXREPEAT (~(SRE_CODE)0)
+#else
+# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX + 1u)
+#endif
 
 typedef struct {
     PyObject_VAR_HEAD