Issue #18684: Fixed reading out of the buffer in the re module.
authorSerhiy Storchaka <storchaka@gmail.com>
Mon, 6 Jul 2015 10:58:33 +0000 (13:58 +0300)
committerSerhiy Storchaka <storchaka@gmail.com>
Mon, 6 Jul 2015 10:58:33 +0000 (13:58 +0300)
Misc/NEWS
Modules/_sre.c
Modules/sre_lib.h

index 8713f3135fe03c36dfea55ecac99b3e26d4ea681..f064e0ae9b8713a136de220e56c5e1ba6d8d5ea0 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -66,6 +66,8 @@ Core and Builtins
 Library
 -------
 
+- Issue #18684: Fixed reading out of the buffer in the re module.
+
 - Issue #24259: tarfile now raises a ReadError if an archive is truncated
   inside a data segment.
 
index 300d883cf6155667d8c13826282825469da487ae..d6fcda18b694dbbecb13f15d2d942483e4635162 100644 (file)
@@ -883,7 +883,7 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
         }
 
         if (state.start == state.ptr) {
-            if (last == state.end)
+            if (last == state.end || state.ptr == state.end)
                 break;
             /* skip one character */
             state.start = (void*) ((char*) state.ptr + state.charsize);
@@ -1081,6 +1081,8 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
 
 next:
         /* move on */
+        if (state.ptr == state.end)
+            break;
         if (state.ptr == state.start)
             state.start = (void*) ((char*) state.ptr + state.charsize);
         else
@@ -2567,6 +2569,9 @@ scanner_match(ScannerObject* self, PyObject *unused)
     PyObject* match;
     Py_ssize_t status;
 
+    if (state->start == NULL)
+        Py_RETURN_NONE;
+
     state_reset(state);
 
     state->ptr = state->start;
@@ -2578,10 +2583,14 @@ scanner_match(ScannerObject* self, PyObject *unused)
     match = pattern_new_match((PatternObject*) self->pattern,
                                state, status);
 
-    if (status == 0 || state->ptr == state->start)
+    if (status == 0)
+        state->start = NULL;
+    else if (state->ptr != state->start)
+        state->start = state->ptr;
+    else if (state->ptr != state->end)
         state->start = (void*) ((char*) state->ptr + state->charsize);
     else
-        state->start = state->ptr;
+        state->start = NULL;
 
     return match;
 }
@@ -2594,6 +2603,9 @@ scanner_search(ScannerObject* self, PyObject *unused)
     PyObject* match;
     Py_ssize_t status;
 
+    if (state->start == NULL)
+        Py_RETURN_NONE;
+
     state_reset(state);
 
     state->ptr = state->start;
@@ -2605,10 +2617,14 @@ scanner_search(ScannerObject* self, PyObject *unused)
     match = pattern_new_match((PatternObject*) self->pattern,
                                state, status);
 
-    if (status == 0 || state->ptr == state->start)
+    if (status == 0)
+        state->start = NULL;
+    else if (state->ptr != state->start)
+        state->start = state->ptr;
+    else if (state->ptr != state->end)
         state->start = (void*) ((char*) state->ptr + state->charsize);
     else
-        state->start = state->ptr;
+        state->start = NULL;
 
     return match;
 }
index 5c6c5a559e6fa73a49ce04b9798f8164b3277e03..1049de4226cbb5d860e10933d00852fe3f60e5b2 100644 (file)
@@ -30,7 +30,7 @@ SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
                 SRE_IS_LINEBREAK((int) ptr[-1]));
 
     case SRE_AT_END:
-        return (((void*) (ptr+1) == state->end &&
+        return (((SRE_CHAR *)state->end - ptr == 1 &&
                  SRE_IS_LINEBREAK((int) ptr[0])) ||
                 ((void*) ptr == state->end));
 
@@ -1093,9 +1093,9 @@ entrance:
             /* <ASSERT> <skip> <back> <pattern> */
             TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
                    ctx->ptr, ctx->pattern[1]));
-            state->ptr = ctx->ptr - ctx->pattern[1];
-            if (state->ptr < state->beginning)
+            if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1])
                 RETURN_FAILURE;
+            state->ptr = ctx->ptr - ctx->pattern[1];
             DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2);
             RETURN_ON_FAILURE(ret);
             ctx->pattern += ctx->pattern[0];
@@ -1106,8 +1106,8 @@ entrance:
             /* <ASSERT_NOT> <skip> <back> <pattern> */
             TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
                    ctx->ptr, ctx->pattern[1]));
-            state->ptr = ctx->ptr - ctx->pattern[1];
-            if (state->ptr >= state->beginning) {
+            if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) {
+                state->ptr = ctx->ptr - ctx->pattern[1];
                 DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
                 if (ret) {
                     RETURN_ON_ERROR(ret);
@@ -1199,12 +1199,20 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
     SRE_CODE* overlap = NULL;
     int flags = 0;
 
+    if (ptr > end)
+        return 0;
+
     if (pattern[0] == SRE_OP_INFO) {
         /* optimization info block */
         /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
 
         flags = pattern[2];
 
+        if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
+            TRACE(("reject (got %u chars, need %u)\n",
+                   (unsigned int)(end - ptr), pattern[3]));
+            return 0;
+        }
         if (pattern[3] > 1) {
             /* adjust end point (but make sure we leave at least one
                character in there, so literal search will work) */
@@ -1322,15 +1330,18 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
                 break;
             ptr++;
         }
-    } else
+    } else {
         /* general case */
-        while (ptr <= end) {
+        assert(ptr <= end);
+        while (1) {
             TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
-            state->start = state->ptr = ptr++;
+            state->start = state->ptr = ptr;
             status = SRE(match)(state, pattern, 0);
-            if (status != 0)
+            if (status != 0 || ptr >= end)
                 break;
+            ptr++;
         }
+    }
 
     return status;
 }