From: Andy Heninger Date: Fri, 10 Feb 2017 23:30:24 +0000 (+0000) Subject: ICU-12930 Fix assertion failure in regex compile. X-Git-Tag: release-59-rc~150 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8e5c017e03832d5bddc08e4d766c1306a1050338;p=icu ICU-12930 Fix assertion failure in regex compile. X-SVN-Rev: 39663 --- diff --git a/icu4c/source/i18n/regexcmp.cpp b/icu4c/source/i18n/regexcmp.cpp index cdc5800ba45..4e9ad6ae4d7 100644 --- a/icu4c/source/i18n/regexcmp.cpp +++ b/icu4c/source/i18n/regexcmp.cpp @@ -2637,6 +2637,16 @@ void RegexCompile::findCaseInsensitiveStarters(UChar32 c, UnicodeSet *starterCh } +// Increment with overflow check. +// val and delta will both be positive. + +static int32_t safeIncrement(int32_t val, int32_t delta) { + if (INT32_MAX - val > delta) { + return val + delta; + } else { + return INT32_MAX; + } +} //------------------------------------------------------------------------------ @@ -2737,7 +2747,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->add(URX_VAL(op)); numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2750,7 +2760,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->addAll(*s); numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2787,7 +2797,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->addAll(*s); numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2802,7 +2812,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->addAll(sc); numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2819,7 +2829,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->addAll(s); numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2836,7 +2846,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->addAll(s); numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2855,7 +2865,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->addAll(s); numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2879,7 +2889,7 @@ void RegexCompile::matchStartType() { } numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2895,7 +2905,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->complement(); numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2975,7 +2985,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialStringLen = stringLen; } - currentLen += stringLen; + currentLen = safeIncrement(currentLen, stringLen); atStart = FALSE; } break; @@ -3000,7 +3010,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->addAll(s); numInitialStrings += 2; // Matching on an initial string not possible. } - currentLen += stringLen; + currentLen = safeIncrement(currentLen, stringLen); atStart = FALSE; } break; @@ -3258,7 +3268,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) { case URX_DOTANY_ALL: // . matches one or two. case URX_DOTANY: case URX_DOTANY_UNIX: - currentLen++; + currentLen = safeIncrement(currentLen, 1); break; @@ -3310,7 +3320,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) { { loc++; int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc); - currentLen += URX_VAL(stringLenOp); + currentLen = safeIncrement(currentLen, URX_VAL(stringLenOp)); } break; @@ -3323,7 +3333,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) { // Assume a min length of one for now. A min length of zero causes // optimization failures for a pattern like "string"+ // currentLen += URX_VAL(stringLenOp); - currentLen += 1; + currentLen = safeIncrement(currentLen, 1); } break; @@ -3433,18 +3443,6 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) { return currentLen; } -// Increment with overflow check. -// val and delta will both be positive. - -static int32_t safeIncrement(int32_t val, int32_t delta) { - if (INT32_MAX - val > delta) { - return val + delta; - } else { - return INT32_MAX; - } -} - - //------------------------------------------------------------------------------ // // maxMatchLength Calculate the length of the longest string that could diff --git a/icu4c/source/test/testdata/regextst.txt b/icu4c/source/test/testdata/regextst.txt index 276fad8d841..6873f4835c9 100644 --- a/icu4c/source/test/testdata/regextst.txt +++ b/icu4c/source/test/testdata/regextst.txt @@ -1346,6 +1346,16 @@ "(?spam**" "(?spam**" +# Bug #12930 +# +# Minimum Match Length computation, int32_t overflow on an empty set in the pattern. +# The empty set, with no match possible, has a min match length of INT32_MAX. +# Was incremented subsequently. Caused assertion failure on pattern compile. + +"[^\u0000-\U0010ffff]bc?" "bc no match" +"[^\u0000-\U0010ffff]?bc?" "<0>bc has a match" + + # Random debugging, Temporary #