]> granicus.if.org Git - icu/commitdiff
ICU-13631 Regex Address Sanitizer fix.
authorAndy Heninger <andy.heninger@gmail.com>
Thu, 8 Mar 2018 18:32:15 +0000 (18:32 +0000)
committerAndy Heninger <andy.heninger@gmail.com>
Thu, 8 Mar 2018 18:32:15 +0000 (18:32 +0000)
X-SVN-Rev: 41086

icu4c/source/i18n/rematch.cpp
icu4c/source/test/intltest/regextst.cpp
icu4c/source/test/intltest/regextst.h

index d01117f057b1016dc687901593a27f08082cf866..efa3909e5e9df5fa135d5fb3aa15990a977f1d58 100644 (file)
@@ -438,7 +438,7 @@ RegexMatcher &RegexMatcher::appendReplacement(UText *dest,
                         status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
                     }
                 }
-                        
+
             } else if (u_isdigit(nextChar)) {
                 // $n    Scan for a capture group number
                 int32_t numCaptureGroups = fPattern->fGroupMap->size();
@@ -459,7 +459,7 @@ RegexMatcher &RegexMatcher::appendReplacement(UText *dest,
                         break;
                     }
                     (void)UTEXT_NEXT32(replacement);
-                    groupNum=groupNum*10 + nextDigitVal; 
+                    groupNum=groupNum*10 + nextDigitVal;
                     ++numDigits;
                 }
             } else {
@@ -2187,7 +2187,7 @@ int32_t  RegexMatcher::split(UText *input,
                     break;
                 }
                 i++;
-                dest[i] = utext_extract_replace(fInputText, dest[i], 
+                dest[i] = utext_extract_replace(fInputText, dest[i],
                                                start64(groupNum, status), end64(groupNum, status), &status);
             }
 
@@ -5469,7 +5469,7 @@ GC_Done:
                 if (lbStartIdx < 0) {
                     // First time through loop.
                     lbStartIdx = fp->fInputIdx - minML;
-                    if (lbStartIdx > 0) {
+                    if (lbStartIdx > 0 && lbStartIdx < fInputLength) {
                         U16_SET_CP_START(inputBuf, 0, lbStartIdx);
                     }
                 } else {
@@ -5546,7 +5546,7 @@ GC_Done:
                 if (lbStartIdx < 0) {
                     // First time through loop.
                     lbStartIdx = fp->fInputIdx - minML;
-                    if (lbStartIdx > 0) {
+                    if (lbStartIdx > 0 && lbStartIdx < fInputLength) {
                         U16_SET_CP_START(inputBuf, 0, lbStartIdx);
                     }
                 } else {
@@ -5818,3 +5818,4 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexMatcher)
 U_NAMESPACE_END
 
 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
+
index 398bc68e2799fb957f3e362e3323310d8c76c0a7..b1d75537cfdc20bbfca56ef61ec7bb396282e5a1 100644 (file)
@@ -39,6 +39,7 @@
 #include "unicode/ustring.h"
 #include "unicode/utext.h"
 #include "unicode/utf16.h"
+#include "cstr.h"
 #include "regextst.h"
 #include "regexcmp.h"
 #include "uvector.h"
@@ -101,6 +102,7 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch
     TESTCASE_AUTO(NamedCapture);
     TESTCASE_AUTO(NamedCaptureLimits);
     TESTCASE_AUTO(TestBug12884);
+    TESTCASE_AUTO(TestBug13631);
     TESTCASE_AUTO_END;
 }
 
@@ -5806,4 +5808,28 @@ void RegexTest::TestBug12884() {
     REGEX_ASSERT(status == U_REGEX_TIME_OUT);
 }
 
+// Bug 13631. A find() of a pattern with a zero length look-behind assertions
+//            can cause a read past the end of the input text.
+//            The failure is seen when running this test with Clang's Addresss Sanitizer.
+
+void RegexTest::TestBug13631() {
+    const UChar *pats[] = { u"(?<!^)",
+                            u"(?<=^)",
+                            nullptr
+                          };
+    for (const UChar **pat=pats; *pat; ++pat) {
+        UErrorCode status = U_ZERO_ERROR;
+        UnicodeString upat(*pat);
+        RegexMatcher matcher(upat, 0, status);
+        const UChar s =u'a';
+        UText *ut = utext_openUChars(nullptr, &s, 1, &status);
+        REGEX_CHECK_STATUS;
+        matcher.reset(ut);
+        while (matcher.find()) {
+        }
+        utext_close(ut);
+    }
+}
+
+
 #endif  /* !UCONFIG_NO_REGULAR_EXPRESSIONS  */
index 3f90de0558d39385bc6ec07615282fd9b239b7b1..7e98cd6226fb4491a9fe4bf94215191bc5c4c516 100644 (file)
@@ -57,6 +57,7 @@ public:
     virtual void TestBug11371();
     virtual void TestBug11480();
     virtual void TestBug12884();
+    virtual void TestBug13631();
     
     // The following functions are internal to the regexp tests.
     virtual void assertUText(const char *expected, UText *actual, const char *file, int line);