]> granicus.if.org Git - python/commitdiff
Issue #16688: Fix backreferences did make case-insensitive regex fail on non-ASCII...
authorSerhiy Storchaka <storchaka@gmail.com>
Sat, 29 Dec 2012 21:38:48 +0000 (23:38 +0200)
committerSerhiy Storchaka <storchaka@gmail.com>
Sat, 29 Dec 2012 21:38:48 +0000 (23:38 +0200)
Patch by Matthew Barnett.

Lib/test/test_re.py
Misc/ACKS
Misc/NEWS
Modules/_sre.c

index 360ba7285c7364c1254e5f62ec12a6de352c9cb3..b945203633df8307906e1eb2adce4e0fbe878371 100644 (file)
@@ -968,6 +968,11 @@ class ReTests(unittest.TestCase):
         self.assertEqual(r, s)
         self.assertEqual(n, size + 1)
 
+    def test_bug_16688(self):
+        # Issue 16688: Backreferences make case-insensitive regex fail on
+        # non-ASCII strings.
+        self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
+        self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
 
 def run_re_tests():
     from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
index af9aa062503458cf61aa3784648ef749fbef67cc..c947a45590eb9e02b0a610922f64f264e2703dfe 100644 (file)
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -70,6 +70,7 @@ Anton Barkovsky
 Nick Barnes
 Quentin Barnes
 David Barnett
+Matthew Barnett
 Richard Barran
 Cesar Eduardo Barros
 Des Barry
index 2a22bb56e02468c9e123880569516a4bae119212..c2bfa00148e8c7a2097fffac9b1d06458bc0f5ff 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -124,6 +124,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #16688: Fix backreferences did make case-insensitive regex fail on
+  non-ASCII strings. Patch by Matthew Barnett.
+
 - Issue #16485: Fix file descriptor not being closed if file header patching
   fails on closing of aifc file.
 
index de3539658d255f391e531adcf78d82277b2180e7..aa56529f90f6424e9af3124d4db2920d34fbcccc 100644 (file)
@@ -492,7 +492,7 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
     Py_ssize_t i;
 
     /* adjust end */
-    if (maxcount < end - ptr && maxcount != 65535)
+    if (maxcount < (end - ptr) / state->charsize && maxcount != 65535)
         end = ptr + maxcount*state->charsize;
 
     switch (pattern[0]) {
@@ -583,7 +583,7 @@ SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
     Py_ssize_t i;
 
     /* check minimal length */
-    if (pattern[3] && (end - ptr) < pattern[3])
+    if (pattern[3] && (end - ptr)/state->charsize < pattern[3])
         return 0;
 
     /* check known prefix */
@@ -801,7 +801,7 @@ entrance:
         /* <INFO> <1=skip> <2=flags> <3=min> ... */
         if (ctx->pattern[3] && (end - ctx->ptr)/state->charsize < ctx->pattern[3]) {
             TRACE(("reject (got %d chars, need %d)\n",
-                   (end - ctx->ptr), ctx->pattern[3]));
+                   (end - ctx->ptr)/state->charsize, ctx->pattern[3]));
             RETURN_FAILURE;
         }
         ctx->pattern += ctx->pattern[1] + 1;
@@ -1329,9 +1329,10 @@ entrance:
                         RETURN_FAILURE;
                     while (p < e) {
                         if (ctx->ptr >= end ||
-                            state->lower(SRE_CHARGET(state, ctx->ptr, 0)) != state->lower(*p))
+                            state->lower(SRE_CHARGET(state, ctx->ptr, 0)) !=
+                            state->lower(SRE_CHARGET(state, p, 0)))
                             RETURN_FAILURE;
-                        p++;
+                        p += state->charsize;
                         ctx->ptr += state->charsize;
                     }
                 }