From f2dfa7422e36390a1c5e40526a39a9224df29e9a Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Thu, 28 Aug 2014 01:19:29 +0000 Subject: [PATCH] ICU-10815 Fix for uregex_findNext() not setting U_REGEX_STOPPED_BY_CALLER X-SVN-Rev: 36260 --- icu4c/source/i18n/rematch.cpp | 145 +++++++------- icu4c/source/i18n/unicode/regex.h | 21 +- icu4c/source/i18n/uregex.cpp | 244 ++++++++++++------------ icu4c/source/test/cintltst/reapits.c | 191 ++++++++++++------- icu4c/source/test/intltest/regextst.cpp | 23 ++- 5 files changed, 348 insertions(+), 276 deletions(-) diff --git a/icu4c/source/i18n/rematch.cpp b/icu4c/source/i18n/rematch.cpp index ace985fa685..667cb84f69c 100644 --- a/icu4c/source/i18n/rematch.cpp +++ b/icu4c/source/i18n/rematch.cpp @@ -33,26 +33,6 @@ // #include // Needed for heapcheck testing - -// Find progress callback -// ---------------------- -// Macro to inline test & call to ReportFindProgress(). Eliminates unnecessary function call. -// -#define REGEXFINDPROGRESS_INTERRUPT(pos, status) \ - (fFindProgressCallbackFn != NULL) && (ReportFindProgress(pos, status) == FALSE) - - -// Smart Backtracking -// ------------------ -// When a failure would go back to a LOOP_C instruction, -// strings, characters, and setrefs scan backwards for a valid start -// character themselves, pop the stack, and save state, emulating the -// LOOP_C's effect but assured that the next character of input is a -// possible matching character. -// -// Good idea in theory; unfortunately it only helps out a few specific -// cases and slows the engine down a little in the rest. - U_NAMESPACE_BEGIN // Default limit for the size of the back track stack, to avoid system @@ -584,15 +564,33 @@ int32_t RegexMatcher::end(int32_t group, UErrorCode &err) const { // //-------------------------------------------------------------------------------- UBool RegexMatcher::find() { + if (U_FAILURE(fDeferredStatus)) { + return FALSE; + } + UErrorCode status = U_ZERO_ERROR; + UBool result = find(status); + return result; +} + +//-------------------------------------------------------------------------------- +// +// find() +// +//-------------------------------------------------------------------------------- +UBool RegexMatcher::find(UErrorCode &status) { // Start at the position of the last match end. (Will be zero if the // matcher has been reset.) // + if (U_FAILURE(status)) { + return FALSE; + } if (U_FAILURE(fDeferredStatus)) { + status = fDeferredStatus; return FALSE; } if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { - return findUsingChunk(); + return findUsingChunk(status); } int64_t startPos = fMatchEnd; @@ -653,8 +651,8 @@ UBool RegexMatcher::find() { // No optimization was found. // Try a match at each input position. for (;;) { - MatchAt(startPos, FALSE, fDeferredStatus); - if (U_FAILURE(fDeferredStatus)) { + MatchAt(startPos, FALSE, status); + if (U_FAILURE(status)) { return FALSE; } if (fMatch) { @@ -670,7 +668,7 @@ UBool RegexMatcher::find() { // Note that it's perfectly OK for a pattern to have a zero-length // match at the end of a string, so we must make sure that the loop // runs with startPos == testStartLimit the last time through. - if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) + if (findProgressInterrupt(startPos, status)) return FALSE; } U_ASSERT(FALSE); @@ -682,8 +680,8 @@ UBool RegexMatcher::find() { fMatch = FALSE; return FALSE; } - MatchAt(startPos, FALSE, fDeferredStatus); - if (U_FAILURE(fDeferredStatus)) { + MatchAt(startPos, FALSE, status); + if (U_FAILURE(status)) { return FALSE; } return fMatch; @@ -703,8 +701,8 @@ UBool RegexMatcher::find() { // and handle end of text in the following block. if (c >= 0 && ((c<256 && fPattern->fInitialChars8->contains(c)) || (c>=256 && fPattern->fInitialChars->contains(c)))) { - MatchAt(pos, FALSE, fDeferredStatus); - if (U_FAILURE(fDeferredStatus)) { + MatchAt(pos, FALSE, status); + if (U_FAILURE(status)) { return FALSE; } if (fMatch) { @@ -717,7 +715,7 @@ UBool RegexMatcher::find() { fHitEnd = TRUE; return FALSE; } - if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) + if (findProgressInterrupt(startPos, status)) return FALSE; } } @@ -735,8 +733,8 @@ UBool RegexMatcher::find() { c = UTEXT_NEXT32(fInputText); startPos = UTEXT_GETNATIVEINDEX(fInputText); if (c == theChar) { - MatchAt(pos, FALSE, fDeferredStatus); - if (U_FAILURE(fDeferredStatus)) { + MatchAt(pos, FALSE, status); + if (U_FAILURE(status)) { return FALSE; } if (fMatch) { @@ -749,7 +747,7 @@ UBool RegexMatcher::find() { fHitEnd = TRUE; return FALSE; } - if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) + if (findProgressInterrupt(startPos, status)) return FALSE; } } @@ -759,8 +757,8 @@ UBool RegexMatcher::find() { { UChar32 c; if (startPos == fAnchorStart) { - MatchAt(startPos, FALSE, fDeferredStatus); - if (U_FAILURE(fDeferredStatus)) { + MatchAt(startPos, FALSE, status); + if (U_FAILURE(status)) { return FALSE; } if (fMatch) { @@ -778,8 +776,8 @@ UBool RegexMatcher::find() { if (fPattern->fFlags & UREGEX_UNIX_LINES) { for (;;) { if (c == 0x0a) { - MatchAt(startPos, FALSE, fDeferredStatus); - if (U_FAILURE(fDeferredStatus)) { + MatchAt(startPos, FALSE, status); + if (U_FAILURE(status)) { return FALSE; } if (fMatch) { @@ -797,7 +795,7 @@ UBool RegexMatcher::find() { // Note that it's perfectly OK for a pattern to have a zero-length // match at the end of a string, so we must make sure that the loop // runs with startPos == testStartLimit the last time through. - if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) + if (findProgressInterrupt(startPos, status)) return FALSE; } } else { @@ -808,8 +806,8 @@ UBool RegexMatcher::find() { (void)UTEXT_NEXT32(fInputText); startPos = UTEXT_GETNATIVEINDEX(fInputText); } - MatchAt(startPos, FALSE, fDeferredStatus); - if (U_FAILURE(fDeferredStatus)) { + MatchAt(startPos, FALSE, status); + if (U_FAILURE(status)) { return FALSE; } if (fMatch) { @@ -827,7 +825,7 @@ UBool RegexMatcher::find() { // Note that it's perfectly OK for a pattern to have a zero-length // match at the end of a string, so we must make sure that the loop // runs with startPos == testStartLimit the last time through. - if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) + if (findProgressInterrupt(startPos, status)) return FALSE; } } @@ -864,7 +862,7 @@ UBool RegexMatcher::find(int64_t start, UErrorCode &status) { return FALSE; } fMatchEnd = nativeStart; - return find(); + return find(status); } @@ -874,7 +872,7 @@ UBool RegexMatcher::find(int64_t start, UErrorCode &status) { // entire string is available in the UText's chunk buffer. // //-------------------------------------------------------------------------------- -UBool RegexMatcher::findUsingChunk() { +UBool RegexMatcher::findUsingChunk(UErrorCode &status) { // Start at the position of the last match end. (Will be zero if the // matcher has been reset. // @@ -931,8 +929,8 @@ UBool RegexMatcher::findUsingChunk() { // No optimization was found. // Try a match at each input position. for (;;) { - MatchChunkAt(startPos, FALSE, fDeferredStatus); - if (U_FAILURE(fDeferredStatus)) { + MatchChunkAt(startPos, FALSE, status); + if (U_FAILURE(status)) { return FALSE; } if (fMatch) { @@ -946,7 +944,7 @@ UBool RegexMatcher::findUsingChunk() { // Note that it's perfectly OK for a pattern to have a zero-length // match at the end of a string, so we must make sure that the loop // runs with startPos == testLen the last time through. - if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) + if (findProgressInterrupt(startPos, status)) return FALSE; } U_ASSERT(FALSE); @@ -958,8 +956,8 @@ UBool RegexMatcher::findUsingChunk() { fMatch = FALSE; return FALSE; } - MatchChunkAt(startPos, FALSE, fDeferredStatus); - if (U_FAILURE(fDeferredStatus)) { + MatchChunkAt(startPos, FALSE, status); + if (U_FAILURE(status)) { return FALSE; } return fMatch; @@ -974,8 +972,8 @@ UBool RegexMatcher::findUsingChunk() { U16_NEXT(inputBuf, startPos, fActiveLimit, c); // like c = inputBuf[startPos++]; if ((c<256 && fPattern->fInitialChars8->contains(c)) || (c>=256 && fPattern->fInitialChars->contains(c))) { - MatchChunkAt(pos, FALSE, fDeferredStatus); - if (U_FAILURE(fDeferredStatus)) { + MatchChunkAt(pos, FALSE, status); + if (U_FAILURE(status)) { return FALSE; } if (fMatch) { @@ -987,7 +985,7 @@ UBool RegexMatcher::findUsingChunk() { fHitEnd = TRUE; return FALSE; } - if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) + if (findProgressInterrupt(startPos, status)) return FALSE; } } @@ -1003,8 +1001,8 @@ UBool RegexMatcher::findUsingChunk() { int32_t pos = startPos; U16_NEXT(inputBuf, startPos, fActiveLimit, c); // like c = inputBuf[startPos++]; if (c == theChar) { - MatchChunkAt(pos, FALSE, fDeferredStatus); - if (U_FAILURE(fDeferredStatus)) { + MatchChunkAt(pos, FALSE, status); + if (U_FAILURE(status)) { return FALSE; } if (fMatch) { @@ -1016,7 +1014,7 @@ UBool RegexMatcher::findUsingChunk() { fHitEnd = TRUE; return FALSE; } - if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) + if (findProgressInterrupt(startPos, status)) return FALSE; } } @@ -1026,8 +1024,8 @@ UBool RegexMatcher::findUsingChunk() { { UChar32 c; if (startPos == fAnchorStart) { - MatchChunkAt(startPos, FALSE, fDeferredStatus); - if (U_FAILURE(fDeferredStatus)) { + MatchChunkAt(startPos, FALSE, status); + if (U_FAILURE(status)) { return FALSE; } if (fMatch) { @@ -1040,8 +1038,8 @@ UBool RegexMatcher::findUsingChunk() { for (;;) { c = inputBuf[startPos-1]; if (c == 0x0a) { - MatchChunkAt(startPos, FALSE, fDeferredStatus); - if (U_FAILURE(fDeferredStatus)) { + MatchChunkAt(startPos, FALSE, status); + if (U_FAILURE(status)) { return FALSE; } if (fMatch) { @@ -1057,7 +1055,7 @@ UBool RegexMatcher::findUsingChunk() { // Note that it's perfectly OK for a pattern to have a zero-length // match at the end of a string, so we must make sure that the loop // runs with startPos == testLen the last time through. - if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) + if (findProgressInterrupt(startPos, status)) return FALSE; } } else { @@ -1068,8 +1066,8 @@ UBool RegexMatcher::findUsingChunk() { if (c == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) { startPos++; } - MatchChunkAt(startPos, FALSE, fDeferredStatus); - if (U_FAILURE(fDeferredStatus)) { + MatchChunkAt(startPos, FALSE, status); + if (U_FAILURE(status)) { return FALSE; } if (fMatch) { @@ -1085,7 +1083,7 @@ UBool RegexMatcher::findUsingChunk() { // Note that it's perfectly OK for a pattern to have a zero-length // match at the end of a string, so we must make sure that the loop // runs with startPos == testLen the last time through. - if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) + if (findProgressInterrupt(startPos, status)) return FALSE; } } @@ -1172,8 +1170,8 @@ UnicodeString RegexMatcher::group(int32_t groupNum, UErrorCode &status) const { // Return deep (mutable) clone -// Technology Preview (as an API), but note that the UnicodeString API is implemented -// using this function. +// Technology Preview (as an API), but note that the UnicodeString API is implemented +// using this function. UText *RegexMatcher::group(int32_t groupNum, UText *dest, UErrorCode &status) const { if (U_FAILURE(status)) { return dest; @@ -2625,25 +2623,20 @@ void RegexMatcher::IncrementTime(UErrorCode &status) { //-------------------------------------------------------------------------------- // -// ReportFindProgress This function is called once for each advance in the target +// findProgressInterrupt This function is called once for each advance in the target // string from the find() function, and calls the user progress callback // function if there is one installed. // -// NOTE: -// -// If the match operation needs to be aborted because the user -// callback asked for it, just set an error status. -// The engine will pick that up and stop in its outer loop. +// Return: TRUE if the find operation is to be terminated. +// FALSE if the find operation is to continue running. // //-------------------------------------------------------------------------------- -UBool RegexMatcher::ReportFindProgress(int64_t matchIndex, UErrorCode &status) { - if (fFindProgressCallbackFn != NULL) { - if ((*fFindProgressCallbackFn)(fFindProgressCallbackContext, matchIndex) == FALSE) { - status = U_ZERO_ERROR /*U_REGEX_STOPPED_BY_CALLER*/; - return FALSE; - } +UBool RegexMatcher::findProgressInterrupt(int64_t pos, UErrorCode &status) { + if (fFindProgressCallbackFn && !(*fFindProgressCallbackFn)(fFindProgressCallbackContext, pos)) { + status = U_REGEX_STOPPED_BY_CALLER; + return TRUE; } - return TRUE; + return FALSE; } //-------------------------------------------------------------------------------- diff --git a/icu4c/source/i18n/unicode/regex.h b/icu4c/source/i18n/unicode/regex.h index 7b85d7040f2..950088e192b 100644 --- a/icu4c/source/i18n/unicode/regex.h +++ b/icu4c/source/i18n/unicode/regex.h @@ -801,6 +801,21 @@ public: virtual UBool find(); + /** + * Find the next pattern match in the input string. + * The find begins searching the input at the location following the end of + * the previous match, or at the start of the string if there is no previous match. + * If a match is found, start(), end() and group() + * will provide more information regarding the match. + *

Note that if the input string is changed by the application, + * use find(startPos, status) instead of find(), because the saved starting + * position may not be valid with the altered input string.

+ * @param status A reference to a UErrorCode to receive any errors. + * @return TRUE if a match is found. + * @stable @internal + */ + virtual UBool find(UErrorCode &status); + /** * Resets this RegexMatcher and then attempts to find the next substring of the * input string that matches the pattern, starting at the specified index. @@ -1744,11 +1759,13 @@ private: REStackFrame *resetStack(); inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status); void IncrementTime(UErrorCode &status); - UBool ReportFindProgress(int64_t matchIndex, UErrorCode &status); + + // Call user find callback function, if set. Return TRUE if operation should be interrupted. + inline UBool findProgressInterrupt(int64_t matchIndex, UErrorCode &status); int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const; - UBool findUsingChunk(); + UBool findUsingChunk(UErrorCode &status); void MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status); UBool isChunkWordBoundary(int32_t pos); diff --git a/icu4c/source/i18n/uregex.cpp b/icu4c/source/i18n/uregex.cpp index c05b7d09d4d..01951234b9c 100644 --- a/icu4c/source/i18n/uregex.cpp +++ b/icu4c/source/i18n/uregex.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2004-2013, International Business Machines +* Copyright (C) 2004-2014, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: uregex.cpp @@ -144,7 +144,7 @@ uregex_open( const UChar *pattern, re->fPatStringLen = patternLength; u_memcpy(patBuf, pattern, actualPatLen); patBuf[actualPatLen] = 0; - + UText patText = UTEXT_INITIALIZER; utext_openUChars(&patText, patBuf, patternLength, status); @@ -157,7 +157,7 @@ uregex_open( const UChar *pattern, re->fPat = RegexPattern::compile(&patText, flags, *status); } utext_close(&patText); - + if (U_FAILURE(*status)) { goto ErrorExit; } @@ -186,7 +186,7 @@ uregex_openUText(UText *pattern, uint32_t flags, UParseError *pe, UErrorCode *status) { - + if (U_FAILURE(*status)) { return NULL; } @@ -194,19 +194,19 @@ uregex_openUText(UText *pattern, *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } - + int64_t patternNativeLength = utext_nativeLength(pattern); - + if (patternNativeLength == 0) { *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } - + RegularExpression *re = new RegularExpression; - + UErrorCode lengthStatus = U_ZERO_ERROR; int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus); - + u_atomic_int32_t *refC = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t)); UChar *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1)); if (re == NULL || refC == NULL || patBuf == NULL) { @@ -218,7 +218,7 @@ uregex_openUText(UText *pattern, } re->fPatRefCount = refC; *re->fPatRefCount = 1; - + // // Make a copy of the pattern string, so we can return it later if asked. // For compiling the pattern, we will use a read-only UText wrapper @@ -227,10 +227,10 @@ uregex_openUText(UText *pattern, re->fPatString = patBuf; re->fPatStringLen = pattern16Length; utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status); - + UText patText = UTEXT_INITIALIZER; utext_openUChars(&patText, patBuf, pattern16Length, status); - + // // Compile the pattern // @@ -240,11 +240,11 @@ uregex_openUText(UText *pattern, re->fPat = RegexPattern::compile(&patText, flags, *status); } utext_close(&patText); - + if (U_FAILURE(*status)) { goto ErrorExit; } - + // // Create the matcher object // @@ -252,11 +252,11 @@ uregex_openUText(UText *pattern, if (U_SUCCESS(*status)) { return (URegularExpression*)re; } - + ErrorExit: delete re; return NULL; - + } //---------------------------------------------------------------------------------------- @@ -280,7 +280,7 @@ uregex_close(URegularExpression *re2) { // uregex_clone // //---------------------------------------------------------------------------------------- -U_CAPI URegularExpression * U_EXPORT2 +U_CAPI URegularExpression * U_EXPORT2 uregex_clone(const URegularExpression *source2, UErrorCode *status) { RegularExpression *source = (RegularExpression*)source2; if (validateRE(source, FALSE, status) == FALSE) { @@ -300,7 +300,7 @@ uregex_clone(const URegularExpression *source2, UErrorCode *status) { } clone->fPat = source->fPat; - clone->fPatRefCount = source->fPatRefCount; + clone->fPatRefCount = source->fPatRefCount; clone->fPatString = source->fPatString; clone->fPatStringLen = source->fPatStringLen; umtx_atomic_inc(source->fPatRefCount); @@ -317,12 +317,12 @@ uregex_clone(const URegularExpression *source2, UErrorCode *status) { // uregex_pattern // //------------------------------------------------------------------------------ -U_CAPI const UChar * U_EXPORT2 +U_CAPI const UChar * U_EXPORT2 uregex_pattern(const URegularExpression *regexp2, int32_t *patLength, UErrorCode *status) { RegularExpression *regexp = (RegularExpression*)regexp2; - + if (validateRE(regexp, FALSE, status) == FALSE) { return NULL; } @@ -351,7 +351,7 @@ uregex_patternUText(const URegularExpression *regexp2, // uregex_flags // //------------------------------------------------------------------------------ -U_CAPI int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uregex_flags(const URegularExpression *regexp2, UErrorCode *status) { RegularExpression *regexp = (RegularExpression*)regexp2; if (validateRE(regexp, FALSE, status) == FALSE) { @@ -367,7 +367,7 @@ uregex_flags(const URegularExpression *regexp2, UErrorCode *status) { // uregex_setText // //------------------------------------------------------------------------------ -U_CAPI void U_EXPORT2 +U_CAPI void U_EXPORT2 uregex_setText(URegularExpression *regexp2, const UChar *text, int32_t textLength, @@ -380,15 +380,15 @@ uregex_setText(URegularExpression *regexp2, *status = U_ILLEGAL_ARGUMENT_ERROR; return; } - + if (regexp->fOwnsText && regexp->fText != NULL) { uprv_free((void *)regexp->fText); } - + regexp->fText = text; regexp->fTextLength = textLength; regexp->fOwnsText = FALSE; - + UText input = UTEXT_INITIALIZER; utext_openUChars(&input, text, textLength, status); regexp->fMatcher->reset(&input); @@ -401,7 +401,7 @@ uregex_setText(URegularExpression *regexp2, // uregex_setUText // //------------------------------------------------------------------------------ -U_CAPI void U_EXPORT2 +U_CAPI void U_EXPORT2 uregex_setUText(URegularExpression *regexp2, UText *text, UErrorCode *status) { @@ -413,11 +413,11 @@ uregex_setUText(URegularExpression *regexp2, *status = U_ILLEGAL_ARGUMENT_ERROR; return; } - + if (regexp->fOwnsText && regexp->fText != NULL) { uprv_free((void *)regexp->fText); } - + regexp->fText = NULL; // only fill it in on request regexp->fTextLength = -1; regexp->fOwnsText = TRUE; @@ -431,7 +431,7 @@ uregex_setUText(URegularExpression *regexp2, // uregex_getText // //------------------------------------------------------------------------------ -U_CAPI const UChar * U_EXPORT2 +U_CAPI const UChar * U_EXPORT2 uregex_getText(URegularExpression *regexp2, int32_t *textLength, UErrorCode *status) { @@ -439,7 +439,7 @@ uregex_getText(URegularExpression *regexp2, if (validateRE(regexp, FALSE, status) == FALSE) { return NULL; } - + if (regexp->fText == NULL) { // need to fill in the text UText *inputText = regexp->fMatcher->inputText(); @@ -452,13 +452,13 @@ uregex_getText(URegularExpression *regexp2, UErrorCode lengthStatus = U_ZERO_ERROR; regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1)); - + utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status); regexp->fText = inputChars; regexp->fOwnsText = TRUE; // should already be set but just in case } } - + if (textLength != NULL) { *textLength = regexp->fTextLength; } @@ -471,7 +471,7 @@ uregex_getText(URegularExpression *regexp2, // uregex_getUText // //------------------------------------------------------------------------------ -U_CAPI UText * U_EXPORT2 +U_CAPI UText * U_EXPORT2 uregex_getUText(URegularExpression *regexp2, UText *dest, UErrorCode *status) { @@ -488,7 +488,7 @@ uregex_getUText(URegularExpression *regexp2, // uregex_refreshUText // //------------------------------------------------------------------------------ -U_CAPI void U_EXPORT2 +U_CAPI void U_EXPORT2 uregex_refreshUText(URegularExpression *regexp2, UText *text, UErrorCode *status) { @@ -505,14 +505,14 @@ uregex_refreshUText(URegularExpression *regexp2, // uregex_matches // //------------------------------------------------------------------------------ -U_CAPI UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 uregex_matches(URegularExpression *regexp2, int32_t startIndex, UErrorCode *status) { return uregex_matches64( regexp2, (int64_t)startIndex, status); } -U_CAPI UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 uregex_matches64(URegularExpression *regexp2, int64_t startIndex, UErrorCode *status) { @@ -535,14 +535,14 @@ uregex_matches64(URegularExpression *regexp2, // uregex_lookingAt // //------------------------------------------------------------------------------ -U_CAPI UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 uregex_lookingAt(URegularExpression *regexp2, int32_t startIndex, UErrorCode *status) { return uregex_lookingAt64( regexp2, (int64_t)startIndex, status); } -U_CAPI UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 uregex_lookingAt64(URegularExpression *regexp2, int64_t startIndex, UErrorCode *status) { @@ -566,16 +566,16 @@ uregex_lookingAt64(URegularExpression *regexp2, // uregex_find // //------------------------------------------------------------------------------ -U_CAPI UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 uregex_find(URegularExpression *regexp2, - int32_t startIndex, + int32_t startIndex, UErrorCode *status) { return uregex_find64( regexp2, (int64_t)startIndex, status); } -U_CAPI UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 uregex_find64(URegularExpression *regexp2, - int64_t startIndex, + int64_t startIndex, UErrorCode *status) { RegularExpression *regexp = (RegularExpression*)regexp2; UBool result = FALSE; @@ -584,7 +584,7 @@ uregex_find64(URegularExpression *regexp2, } if (startIndex == -1) { regexp->fMatcher->resetPreserveRegion(); - result = regexp->fMatcher->find(); + result = regexp->fMatcher->find(*status); } else { result = regexp->fMatcher->find(startIndex, *status); } @@ -597,14 +597,14 @@ uregex_find64(URegularExpression *regexp2, // uregex_findNext // //------------------------------------------------------------------------------ -U_CAPI UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 uregex_findNext(URegularExpression *regexp2, UErrorCode *status) { RegularExpression *regexp = (RegularExpression*)regexp2; if (validateRE(regexp, TRUE, status) == FALSE) { return FALSE; } - UBool result = regexp->fMatcher->find(); + UBool result = regexp->fMatcher->find(*status); return result; } @@ -613,7 +613,7 @@ uregex_findNext(URegularExpression *regexp2, // uregex_groupCount // //------------------------------------------------------------------------------ -U_CAPI int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uregex_groupCount(URegularExpression *regexp2, UErrorCode *status) { RegularExpression *regexp = (RegularExpression*)regexp2; @@ -630,7 +630,7 @@ uregex_groupCount(URegularExpression *regexp2, // uregex_group // //------------------------------------------------------------------------------ -U_CAPI int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uregex_group(URegularExpression *regexp2, int32_t groupNum, UChar *dest, @@ -644,11 +644,11 @@ uregex_group(URegularExpression *regexp2, *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } - + if (destCapacity == 0 || regexp->fText != NULL) { // If preflighting or if we already have the text as UChars, // this is a little cheaper than going through uregex_groupUTextDeep() - + // // Pick up the range of characters from the matcher // @@ -660,7 +660,7 @@ uregex_group(URegularExpression *regexp2, // // Trim length based on buffer capacity - // + // int32_t fullLength = endIx - startIx; int32_t copyLength = fullLength; if (copyLength < destCapacity) { @@ -671,7 +671,7 @@ uregex_group(URegularExpression *regexp2, copyLength = destCapacity; *status = U_BUFFER_OVERFLOW_ERROR; } - + // // Copy capture group to user's buffer // @@ -696,7 +696,7 @@ uregex_group(URegularExpression *regexp2, // uregex_groupUText // //------------------------------------------------------------------------------ -U_CAPI UText * U_EXPORT2 +U_CAPI UText * U_EXPORT2 uregex_groupUText(URegularExpression *regexp2, int32_t groupNum, UText *dest, @@ -716,7 +716,7 @@ uregex_groupUText(URegularExpression *regexp2, // uregex_groupUTextDeep // //------------------------------------------------------------------------------ -U_CAPI UText * U_EXPORT2 +U_CAPI UText * U_EXPORT2 uregex_groupUTextDeep(URegularExpression *regexp2, int32_t groupNum, UText *dest, @@ -738,7 +738,7 @@ uregex_groupUTextDeep(URegularExpression *regexp2, UErrorCode emptyTextStatus = U_ZERO_ERROR; return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus)); } - + if (dest) { utext_replace(dest, 0, utext_nativeLength(dest), ®exp->fText[startIx], endIx - startIx, status); } else { @@ -747,7 +747,7 @@ uregex_groupUTextDeep(URegularExpression *regexp2, dest = utext_clone(NULL, &groupText, TRUE, FALSE, status); utext_close(&groupText); } - + return dest; } else { return regexp->fMatcher->group(groupNum, dest, *status); @@ -759,14 +759,14 @@ uregex_groupUTextDeep(URegularExpression *regexp2, // uregex_start // //------------------------------------------------------------------------------ -U_CAPI int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uregex_start(URegularExpression *regexp2, int32_t groupNum, UErrorCode *status) { return (int32_t)uregex_start64( regexp2, groupNum, status); } -U_CAPI int64_t U_EXPORT2 +U_CAPI int64_t U_EXPORT2 uregex_start64(URegularExpression *regexp2, int32_t groupNum, UErrorCode *status) { @@ -783,14 +783,14 @@ uregex_start64(URegularExpression *regexp2, // uregex_end // //------------------------------------------------------------------------------ -U_CAPI int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uregex_end(URegularExpression *regexp2, int32_t groupNum, UErrorCode *status) { return (int32_t)uregex_end64( regexp2, groupNum, status); } -U_CAPI int64_t U_EXPORT2 +U_CAPI int64_t U_EXPORT2 uregex_end64(URegularExpression *regexp2, int32_t groupNum, UErrorCode *status) { @@ -807,14 +807,14 @@ uregex_end64(URegularExpression *regexp2, // uregex_reset // //------------------------------------------------------------------------------ -U_CAPI void U_EXPORT2 +U_CAPI void U_EXPORT2 uregex_reset(URegularExpression *regexp2, int32_t index, UErrorCode *status) { uregex_reset64( regexp2, (int64_t)index, status); } -U_CAPI void U_EXPORT2 +U_CAPI void U_EXPORT2 uregex_reset64(URegularExpression *regexp2, int64_t index, UErrorCode *status) { @@ -831,7 +831,7 @@ uregex_reset64(URegularExpression *regexp2, // uregex_setRegion // //------------------------------------------------------------------------------ -U_CAPI void U_EXPORT2 +U_CAPI void U_EXPORT2 uregex_setRegion(URegularExpression *regexp2, int32_t regionStart, int32_t regionLimit, @@ -839,7 +839,7 @@ uregex_setRegion(URegularExpression *regexp2, uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status); } -U_CAPI void U_EXPORT2 +U_CAPI void U_EXPORT2 uregex_setRegion64(URegularExpression *regexp2, int64_t regionStart, int64_t regionLimit, @@ -857,7 +857,7 @@ uregex_setRegion64(URegularExpression *regexp2, // uregex_setRegionAndStart // //------------------------------------------------------------------------------ -U_CAPI void U_EXPORT2 +U_CAPI void U_EXPORT2 uregex_setRegionAndStart(URegularExpression *regexp2, int64_t regionStart, int64_t regionLimit, @@ -875,13 +875,13 @@ uregex_setRegionAndStart(URegularExpression *regexp2, // uregex_regionStart // //------------------------------------------------------------------------------ -U_CAPI int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uregex_regionStart(const URegularExpression *regexp2, UErrorCode *status) { return (int32_t)uregex_regionStart64(regexp2, status); } -U_CAPI int64_t U_EXPORT2 +U_CAPI int64_t U_EXPORT2 uregex_regionStart64(const URegularExpression *regexp2, UErrorCode *status) { RegularExpression *regexp = (RegularExpression*)regexp2; @@ -897,13 +897,13 @@ uregex_regionStart64(const URegularExpression *regexp2, // uregex_regionEnd // //------------------------------------------------------------------------------ -U_CAPI int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uregex_regionEnd(const URegularExpression *regexp2, UErrorCode *status) { return (int32_t)uregex_regionEnd64(regexp2, status); } -U_CAPI int64_t U_EXPORT2 +U_CAPI int64_t U_EXPORT2 uregex_regionEnd64(const URegularExpression *regexp2, UErrorCode *status) { RegularExpression *regexp = (RegularExpression*)regexp2; @@ -919,7 +919,7 @@ uregex_regionEnd64(const URegularExpression *regexp2, // uregex_hasTransparentBounds // //------------------------------------------------------------------------------ -U_CAPI UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 uregex_hasTransparentBounds(const URegularExpression *regexp2, UErrorCode *status) { RegularExpression *regexp = (RegularExpression*)regexp2; @@ -935,7 +935,7 @@ uregex_hasTransparentBounds(const URegularExpression *regexp2, // uregex_useTransparentBounds // //------------------------------------------------------------------------------ -U_CAPI void U_EXPORT2 +U_CAPI void U_EXPORT2 uregex_useTransparentBounds(URegularExpression *regexp2, UBool b, UErrorCode *status) { @@ -952,7 +952,7 @@ uregex_useTransparentBounds(URegularExpression *regexp2, // uregex_hasAnchoringBounds // //------------------------------------------------------------------------------ -U_CAPI UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 uregex_hasAnchoringBounds(const URegularExpression *regexp2, UErrorCode *status) { RegularExpression *regexp = (RegularExpression*)regexp2; @@ -968,7 +968,7 @@ uregex_hasAnchoringBounds(const URegularExpression *regexp2, // uregex_useAnchoringBounds // //------------------------------------------------------------------------------ -U_CAPI void U_EXPORT2 +U_CAPI void U_EXPORT2 uregex_useAnchoringBounds(URegularExpression *regexp2, UBool b, UErrorCode *status) { @@ -985,7 +985,7 @@ uregex_useAnchoringBounds(URegularExpression *regexp2, // uregex_hitEnd // //------------------------------------------------------------------------------ -U_CAPI UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 uregex_hitEnd(const URegularExpression *regexp2, UErrorCode *status) { RegularExpression *regexp = (RegularExpression*)regexp2; @@ -1001,7 +1001,7 @@ uregex_hitEnd(const URegularExpression *regexp2, // uregex_requireEnd // //------------------------------------------------------------------------------ -U_CAPI UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 uregex_requireEnd(const URegularExpression *regexp2, UErrorCode *status) { RegularExpression *regexp = (RegularExpression*)regexp2; @@ -1017,7 +1017,7 @@ uregex_requireEnd(const URegularExpression *regexp2, // uregex_setTimeLimit // //------------------------------------------------------------------------------ -U_CAPI void U_EXPORT2 +U_CAPI void U_EXPORT2 uregex_setTimeLimit(URegularExpression *regexp2, int32_t limit, UErrorCode *status) { @@ -1034,7 +1034,7 @@ uregex_setTimeLimit(URegularExpression *regexp2, // uregex_getTimeLimit // //------------------------------------------------------------------------------ -U_CAPI int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uregex_getTimeLimit(const URegularExpression *regexp2, UErrorCode *status) { int32_t retVal = 0; @@ -1052,7 +1052,7 @@ uregex_getTimeLimit(const URegularExpression *regexp2, // uregex_setStackLimit // //------------------------------------------------------------------------------ -U_CAPI void U_EXPORT2 +U_CAPI void U_EXPORT2 uregex_setStackLimit(URegularExpression *regexp2, int32_t limit, UErrorCode *status) { @@ -1069,7 +1069,7 @@ uregex_setStackLimit(URegularExpression *regexp2, // uregex_getStackLimit // //------------------------------------------------------------------------------ -U_CAPI int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uregex_getStackLimit(const URegularExpression *regexp2, UErrorCode *status) { int32_t retVal = 0; @@ -1103,7 +1103,7 @@ uregex_setMatchCallback(URegularExpression *regexp2, // uregex_getMatchCallback // //------------------------------------------------------------------------------ -U_CAPI void U_EXPORT2 +U_CAPI void U_EXPORT2 uregex_getMatchCallback(const URegularExpression *regexp2, URegexMatchCallback **callback, const void **context, @@ -1137,7 +1137,7 @@ uregex_setFindProgressCallback(URegularExpression *regexp2, // uregex_getMatchCallback // //------------------------------------------------------------------------------ -U_CAPI void U_EXPORT2 +U_CAPI void U_EXPORT2 uregex_getFindProgressCallback(const URegularExpression *regexp2, URegexFindProgressCallback **callback, const void **context, @@ -1154,7 +1154,7 @@ uregex_getFindProgressCallback(const URegularExpression *regexp2, // uregex_replaceAll // //------------------------------------------------------------------------------ -U_CAPI int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uregex_replaceAll(URegularExpression *regexp2, const UChar *replacementText, int32_t replacementLength, @@ -1187,7 +1187,7 @@ uregex_replaceAll(URegularExpression *regexp2, &destBuf, &destCapacity, status); } len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status); - + if (U_FAILURE(findStatus)) { // If anything went wrong with the findNext(), make that error trump // whatever may have happened with the append() operations. @@ -1204,7 +1204,7 @@ uregex_replaceAll(URegularExpression *regexp2, // uregex_replaceAllUText // //------------------------------------------------------------------------------ -U_CAPI UText * U_EXPORT2 +U_CAPI UText * U_EXPORT2 uregex_replaceAllUText(URegularExpression *regexp2, UText *replacementText, UText *dest, @@ -1217,18 +1217,18 @@ uregex_replaceAllUText(URegularExpression *regexp2, *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } - + dest = regexp->fMatcher->replaceAll(replacementText, dest, *status); return dest; } - + //------------------------------------------------------------------------------ // // uregex_replaceFirst // //------------------------------------------------------------------------------ -U_CAPI int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uregex_replaceFirst(URegularExpression *regexp2, const UChar *replacementText, int32_t replacementLength, @@ -1251,7 +1251,7 @@ uregex_replaceFirst(URegularExpression *regexp2, uregex_reset(regexp2, 0, status); findSucceeded = uregex_find(regexp2, 0, status); if (findSucceeded) { - len = uregex_appendReplacement(regexp2, replacementText, replacementLength, + len = uregex_appendReplacement(regexp2, replacementText, replacementLength, &destBuf, &destCapacity, status); } len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status); @@ -1265,7 +1265,7 @@ uregex_replaceFirst(URegularExpression *regexp2, // uregex_replaceFirstUText // //------------------------------------------------------------------------------ -U_CAPI UText * U_EXPORT2 +U_CAPI UText * U_EXPORT2 uregex_replaceFirstUText(URegularExpression *regexp2, UText *replacementText, UText *dest, @@ -1278,7 +1278,7 @@ uregex_replaceFirstUText(URegularExpression *regexp2, *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } - + dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status); return dest; } @@ -1308,7 +1308,7 @@ class RegexCImpl { UChar **destBuf, int32_t *destCapacity, UErrorCode *status); - + inline static int32_t split(RegularExpression *regexp, UChar *destBuf, int32_t destCapacity, @@ -1364,7 +1364,7 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp, return 0; } if (replacementText == NULL || replacementLength < -1 || - destCapacity == NULL || destBuf == NULL || + destCapacity == NULL || destBuf == NULL || (*destBuf == NULL && *destCapacity > 0) || *destCapacity < 0) { *status = U_ILLEGAL_ARGUMENT_ERROR; @@ -1381,7 +1381,7 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp, int32_t capacity = *destCapacity; int32_t destIdx = 0; int32_t i; - + // If it wasn't supplied by the caller, get the length of the replacement text. // TODO: slightly smarter logic in the copy loop could watch for the NUL on // the fly and avoid this step. @@ -1405,7 +1405,7 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp, } for (i=lastMatchEnd; ifText[i], &destIdx, dest, capacity); - } + } } else { UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, @@ -1420,7 +1420,7 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp, UChar c = replacementText[replIdx]; replIdx++; if (c != DOLLARSIGN && c != BACKSLASH) { - // Common case, no substitution, no escaping, + // Common case, no substitution, no escaping, // just copy the char to the dest buf. appendToBuf(c, &destIdx, dest, capacity); continue; @@ -1439,9 +1439,9 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp, if (c==0x55/*U*/ || c==0x75/*u*/) { // We have a \udddd or \Udddddddd escape sequence. - UChar32 escapedChar = + UChar32 escapedChar = u_unescapeAt(uregex_ucstr_unescape_charAt, - &replIdx, // Index is updated by unescapeAt + &replIdx, // Index is updated by unescapeAt replacementLength, // Length of replacement text (void *)replacementText); @@ -1527,7 +1527,7 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp, } else { *status = U_BUFFER_OVERFLOW_ERROR; } - + // // Return an updated dest buffer and capacity to the caller. // @@ -1554,14 +1554,14 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp, // // appendReplacement the actual API function, // -U_CAPI int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uregex_appendReplacement(URegularExpression *regexp2, const UChar *replacementText, int32_t replacementLength, UChar **destBuf, int32_t *destCapacity, UErrorCode *status) { - + RegularExpression *regexp = (RegularExpression*)regexp2; return RegexCImpl::appendReplacement( regexp, replacementText, replacementLength,destBuf, destCapacity, status); @@ -1570,7 +1570,7 @@ uregex_appendReplacement(URegularExpression *regexp2, // // uregex_appendReplacementUText...can just use the normal C++ method // -U_CAPI void U_EXPORT2 +U_CAPI void U_EXPORT2 uregex_appendReplacementUText(URegularExpression *regexp2, UText *replText, UText *dest, @@ -1603,8 +1603,8 @@ int32_t RegexCImpl::appendTail(RegularExpression *regexp, if (validateRE(regexp, TRUE, status) == FALSE) { return 0; } - - if (destCapacity == NULL || destBuf == NULL || + + if (destCapacity == NULL || destBuf == NULL || (*destBuf == NULL && *destCapacity > 0) || *destCapacity < 0) { @@ -1617,7 +1617,7 @@ int32_t RegexCImpl::appendTail(RegularExpression *regexp, int32_t destIdx = 0; int32_t destCap = *destCapacity; UChar *dest = *destBuf; - + if (regexp->fText != NULL) { int32_t srcIdx; int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd); @@ -1629,7 +1629,7 @@ int32_t RegexCImpl::appendTail(RegularExpression *regexp, UErrorCode status = U_ZERO_ERROR; srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status); } - + for (;;) { U_ASSERT(destIdx >= 0); @@ -1655,11 +1655,11 @@ int32_t RegexCImpl::appendTail(RegularExpression *regexp, } srcIdx++; destIdx++; - } + } } else { int64_t srcIdx; if (m->fMatch) { - // The most recent call to find() succeeded. + // The most recent call to find() succeeded. srcIdx = m->fMatchEnd; } else { // The last call to find() on this matcher failed(). @@ -1710,7 +1710,7 @@ int32_t RegexCImpl::appendTail(RegularExpression *regexp, // // appendTail the actual API function // -U_CAPI int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uregex_appendTail(URegularExpression *regexp2, UChar **destBuf, int32_t *destCapacity, @@ -1723,7 +1723,7 @@ uregex_appendTail(URegularExpression *regexp2, // // uregex_appendTailUText...can just use the normal C++ method // -U_CAPI UText * U_EXPORT2 +U_CAPI UText * U_EXPORT2 uregex_appendTailUText(URegularExpression *regexp2, UText *dest, UErrorCode *status) { @@ -1815,19 +1815,19 @@ int32_t RegexCImpl::split(RegularExpression *regexp, i = destFieldsCapacity-1; destIdx = (int32_t)(destFields[i] - destFields[0]); } - + destFields[i] = &destBuf[destIdx]; destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen, &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status); } break; } - + if (regexp->fMatcher->find()) { // We found another delimiter. Move everything from where we started looking // up until the start of the delimiter into the next output string. destFields[i] = &destBuf[destIdx]; - + destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart, &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus); if (tStatus == U_BUFFER_OVERFLOW_ERROR) { @@ -1836,7 +1836,7 @@ int32_t RegexCImpl::split(RegularExpression *regexp, *status = tStatus; } nextOutputStringStart = regexp->fMatcher->fMatchEnd; - + // If the delimiter pattern has capturing parentheses, the captured // text goes out into the next n destination strings. int32_t groupNum; @@ -1846,14 +1846,14 @@ int32_t RegexCImpl::split(RegularExpression *regexp, break; } i++; - + // Set up to extract the capture group contents into the dest buffer. destFields[i] = &destBuf[destIdx]; tStatus = U_ZERO_ERROR; - int32_t t = uregex_group((URegularExpression*)regexp, - groupNum, - destFields[i], - REMAINING_CAPACITY(destIdx, destCapacity), + int32_t t = uregex_group((URegularExpression*)regexp, + groupNum, + destFields[i], + REMAINING_CAPACITY(destIdx, destCapacity), &tStatus); destIdx += t + 1; // Record the space used in the output string buffer. // +1 for the NUL that terminates the string. @@ -1865,7 +1865,7 @@ int32_t RegexCImpl::split(RegularExpression *regexp, } if (nextOutputStringStart == inputLen) { - // The delimiter was at the end of the string. + // The delimiter was at the end of the string. // Output an empty string, and then we are done. if (destIdx < destCapacity) { destBuf[destIdx] = 0; @@ -1910,7 +1910,7 @@ int32_t RegexCImpl::split(RegularExpression *regexp, // // uregex_split The actual API function // -U_CAPI int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uregex_split(URegularExpression *regexp2, UChar *destBuf, int32_t destCapacity, @@ -1929,15 +1929,15 @@ uregex_split(URegularExpression *regexp2, *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } - + return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status); } - + // // uregex_splitUText...can just use the normal C++ method // -U_CAPI int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uregex_splitUText(URegularExpression *regexp2, UText *destFields[], int32_t destFieldsCapacity, diff --git a/icu4c/source/test/cintltst/reapits.c b/icu4c/source/test/cintltst/reapits.c index ee18abfb966..33897f30008 100644 --- a/icu4c/source/test/cintltst/reapits.c +++ b/icu4c/source/test/cintltst/reapits.c @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 2004-2013, International Business Machines Corporation and + * Copyright (c) 2004-2014, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /******************************************************************************** @@ -29,11 +29,13 @@ #include "unicode/utext.h" #include "cintltst.h" +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) + #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ -log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}} +log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}} #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ -log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}} +log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}} /* * TEST_SETUP and TEST_TEARDOWN @@ -158,6 +160,7 @@ static void TestBug4315(void); static void TestUTextAPI(void); static void TestRefreshInput(void); static void TestBug8421(void); +static void TestBug10815(void); void addURegexTest(TestNode** root); @@ -168,6 +171,7 @@ void addURegexTest(TestNode** root) addTest(root, &TestUTextAPI, "regex/TestUTextAPI"); addTest(root, &TestRefreshInput, "regex/TestRefreshInput"); addTest(root, &TestBug8421, "regex/TestBug8421"); + addTest(root, &TestBug10815, "regex/TestBug10815"); } /* @@ -204,7 +208,7 @@ static void TestRegexCAPI(void) { memset(&minus1, -1, sizeof(minus1)); /* Mimimalist open/close */ - u_uastrncpy(pat, "abc*", sizeof(pat)/2); + u_uastrncpy(pat, "abc*", LENGTHOF(pat)); re = uregex_open(pat, -1, 0, 0, &status); if (U_FAILURE(status)) { log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); @@ -256,7 +260,7 @@ static void TestRegexCAPI(void) { /* The TEST_ASSERT_SUCCESS above should change too... */ if(U_SUCCESS(status)) { - u_uastrncpy(pat, "abc*", sizeof(pat)/2); + u_uastrncpy(pat, "abc*", LENGTHOF(pat)); TEST_ASSERT(u_strcmp(pat, p) == 0); TEST_ASSERT(len==(int32_t)strlen("abc*")); } @@ -296,8 +300,8 @@ static void TestRegexCAPI(void) { TEST_ASSERT_SUCCESS(status); TEST_ASSERT(clone3 != NULL); - u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); - u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); + u_uastrncpy(testString1, "abcccd", LENGTHOF(pat)); + u_uastrncpy(testString2, "xxxabcccd", LENGTHOF(pat)); status = U_ZERO_ERROR; uregex_setText(clone1, testString1, -1, &status); @@ -328,7 +332,7 @@ static void TestRegexCAPI(void) { { const UChar *resultPat; int32_t resultLen; - u_uastrncpy(pat, "hello", sizeof(pat)/2); + u_uastrncpy(pat, "hello", LENGTHOF(pat)); status = U_ZERO_ERROR; re = uregex_open(pat, -1, 0, NULL, &status); resultPat = uregex_pattern(re, &resultLen, &status); @@ -394,10 +398,10 @@ static void TestRegexCAPI(void) { UChar text2[50]; UBool result; - u_uastrncpy(text1, "abcccd", sizeof(text1)/2); - u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); + u_uastrncpy(text1, "abcccd", LENGTHOF(text1)); + u_uastrncpy(text2, "abcccxd", LENGTHOF(text2)); status = U_ZERO_ERROR; - u_uastrncpy(pat, "abc*d", sizeof(pat)/2); + u_uastrncpy(pat, "abc*d", LENGTHOF(pat)); re = uregex_open(pat, -1, 0, NULL, &status); TEST_ASSERT_SUCCESS(status); @@ -449,10 +453,10 @@ static void TestRegexCAPI(void) { const UChar *result; int32_t textLength; - u_uastrncpy(text1, "abcccd", sizeof(text1)/2); - u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); + u_uastrncpy(text1, "abcccd", LENGTHOF(text1)); + u_uastrncpy(text2, "abcccxd", LENGTHOF(text2)); status = U_ZERO_ERROR; - u_uastrncpy(pat, "abc*d", sizeof(pat)/2); + u_uastrncpy(pat, "abc*d", LENGTHOF(pat)); re = uregex_open(pat, -1, 0, NULL, &status); uregex_setText(re, text1, -1, &status); @@ -486,9 +490,9 @@ static void TestRegexCAPI(void) { int len; UChar nullString[] = {0,0,0}; - u_uastrncpy(text1, "abcccde", sizeof(text1)/2); + u_uastrncpy(text1, "abcccde", LENGTHOF(text1)); status = U_ZERO_ERROR; - u_uastrncpy(pat, "abc*d", sizeof(pat)/2); + u_uastrncpy(pat, "abc*d", LENGTHOF(pat)); re = uregex_open(pat, -1, 0, NULL, &status); uregex_setText(re, text1, -1, &status); @@ -538,7 +542,7 @@ static void TestRegexCAPI(void) { { UChar text1[50]; UBool result; - u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); + u_uastrncpy(text1, "012rx5rx890rxrx...", LENGTHOF(text1)); status = U_ZERO_ERROR; re = uregex_openC("rx", 0, NULL, &status); @@ -621,7 +625,7 @@ static void TestRegexCAPI(void) { UChar buf[80]; UBool result; int32_t resultSz; - u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); + u_uastrncpy(text1, "noise abc interior def, and this is off the end", LENGTHOF(text1)); status = U_ZERO_ERROR; re = uregex_openC("abc(.*?)def", 0, NULL, &status); @@ -634,21 +638,21 @@ static void TestRegexCAPI(void) { /* Capture Group 0, the full match. Should succeed. */ status = U_ZERO_ERROR; - resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status); + resultSz = uregex_group(re, 0, buf, LENGTHOF(buf), &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING("abc interior def", buf, TRUE); TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); /* Capture group #1. Should succeed. */ status = U_ZERO_ERROR; - resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status); + resultSz = uregex_group(re, 1, buf, LENGTHOF(buf), &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING(" interior ", buf, TRUE); TEST_ASSERT(resultSz == (int32_t)strlen(" interior ")); /* Capture group out of range. Error. */ status = U_ZERO_ERROR; - uregex_group(re, 2, buf, sizeof(buf)/2, &status); + uregex_group(re, 2, buf, LENGTHOF(buf), &status); TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); /* NULL buffer, pure pre-flight */ @@ -692,7 +696,7 @@ static void TestRegexCAPI(void) { TEST_ASSERT(uregex_regionStart(re, &status) == 3); TEST_ASSERT(uregex_regionEnd(re, &status) == 6); TEST_ASSERT(uregex_findNext(re, &status)); - TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3) + TEST_ASSERT(uregex_group(re, 0, resultString, LENGTHOF(resultString), &status) == 3) TEST_ASSERT_STRING("345", resultString, TRUE); TEST_TEARDOWN; @@ -816,9 +820,9 @@ static void TestRegexCAPI(void) { UChar replText[80]; UChar buf[80]; int32_t resultSz; - u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); - u_uastrncpy(text2, "No match here.", sizeof(text2)/2); - u_uastrncpy(replText, "<$1>", sizeof(replText)/2); + u_uastrncpy(text1, "Replace xaax x1x x...x.", LENGTHOF(text1)); + u_uastrncpy(text2, "No match here.", LENGTHOF(text2)); + u_uastrncpy(replText, "<$1>", LENGTHOF(replText)); status = U_ZERO_ERROR; re = uregex_openC("x(.*?)x", 0, NULL, &status); @@ -826,7 +830,7 @@ static void TestRegexCAPI(void) { /* Normal case, with match */ uregex_setText(re, text1, -1, &status); - resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); + resultSz = uregex_replaceFirst(re, replText, -1, buf, LENGTHOF(buf), &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING("Replace x1x x...x.", buf, TRUE); TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); @@ -834,7 +838,7 @@ static void TestRegexCAPI(void) { /* No match. Text should copy to output with no changes. */ status = U_ZERO_ERROR; uregex_setText(re, text2, -1, &status); - resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); + resultSz = uregex_replaceFirst(re, replText, -1, buf, LENGTHOF(buf), &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING("No match here.", buf, TRUE); TEST_ASSERT(resultSz == (int32_t)strlen("No match here.")); @@ -896,10 +900,10 @@ static void TestRegexCAPI(void) { int32_t expectedResultSize2; int32_t i; - u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); - u_uastrncpy(text2, "No match here.", sizeof(text2)/2); - u_uastrncpy(replText, "<$1>", sizeof(replText)/2); - u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2); + u_uastrncpy(text1, "Replace xaax x1x x...x.", LENGTHOF(text1)); + u_uastrncpy(text2, "No match here.", LENGTHOF(text2)); + u_uastrncpy(replText, "<$1>", LENGTHOF(replText)); + u_uastrncpy(replText2, "<<$1>>", LENGTHOF(replText2)); expectedResultSize = strlen(expectedResult); expectedResultSize2 = strlen(expectedResult2); @@ -909,7 +913,7 @@ static void TestRegexCAPI(void) { /* Normal case, with match */ uregex_setText(re, text1, -1, &status); - resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); + resultSize = uregex_replaceAll(re, replText, -1, buf, LENGTHOF(buf), &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING(expectedResult, buf, TRUE); TEST_ASSERT(resultSize == expectedResultSize); @@ -917,7 +921,7 @@ static void TestRegexCAPI(void) { /* No match. Text should copy to output with no changes. */ status = U_ZERO_ERROR; uregex_setText(re, text2, -1, &status); - resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); + resultSize = uregex_replaceAll(re, replText, -1, buf, LENGTHOF(buf), &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING("No match here.", buf, TRUE); TEST_ASSERT(resultSize == u_strlen(text2)); @@ -1001,15 +1005,15 @@ static void TestRegexCAPI(void) { re = uregex_openC(".*", 0, 0, &status); TEST_ASSERT_SUCCESS(status); - u_uastrncpy(text, "whatever", sizeof(text)/2); - u_uastrncpy(repl, "some other", sizeof(repl)/2); + u_uastrncpy(text, "whatever", LENGTHOF(text)); + u_uastrncpy(repl, "some other", LENGTHOF(repl)); uregex_setText(re, text, -1, &status); /* match covers whole target string */ uregex_find(re, 0, &status); TEST_ASSERT_SUCCESS(status); bufPtr = buf; - bufCap = sizeof(buf) / 2; + bufCap = LENGTHOF(buf); uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING("some other", buf, TRUE); @@ -1018,8 +1022,8 @@ static void TestRegexCAPI(void) { uregex_find(re, 0, &status); TEST_ASSERT_SUCCESS(status); bufPtr = buf; - bufCap = sizeof(buf) / 2; - u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); + bufCap = LENGTHOF(buf); + u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", LENGTHOF(repl)); uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); @@ -1054,8 +1058,8 @@ static void TestRegexCAPI(void) { int32_t spaceNeeded; int32_t sz; - u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2); - u_uastrncpy(text2, "No match here.", sizeof(text2)/2); + u_uastrncpy(textToSplit, "first : second: third", LENGTHOF(textToSplit)); + u_uastrncpy(text2, "No match here.", LENGTHOF(text2)); status = U_ZERO_ERROR; re = uregex_openC(":", 0, NULL, &status); @@ -1070,7 +1074,7 @@ static void TestRegexCAPI(void) { if (U_SUCCESS(status)) { memset(fields, -1, sizeof(fields)); numFields = - uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); + uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 10, &status); TEST_ASSERT_SUCCESS(status); /* The TEST_ASSERT_SUCCESS call above should change too... */ @@ -1102,7 +1106,7 @@ static void TestRegexCAPI(void) { if(U_SUCCESS(status)) { memset(fields, -1, sizeof(fields)); numFields = - uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); + uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 2, &status); TEST_ASSERT_SUCCESS(status); /* The TEST_ASSERT_SUCCESS call above should change too... */ @@ -1160,7 +1164,7 @@ static void TestRegexCAPI(void) { int32_t spaceNeeded; int32_t sz; - u_uastrncpy(textToSplit, "first second third", sizeof(textToSplit)/2); + u_uastrncpy(textToSplit, "first second third", LENGTHOF(textToSplit)); status = U_ZERO_ERROR; re = uregex_openC("<(.*?)>", 0, NULL, &status); @@ -1172,7 +1176,7 @@ static void TestRegexCAPI(void) { if(U_SUCCESS(status)) { memset(fields, -1, sizeof(fields)); numFields = - uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); + uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 10, &status); TEST_ASSERT_SUCCESS(status); /* The TEST_ASSERT_SUCCESS call above should change too... */ @@ -1193,7 +1197,7 @@ static void TestRegexCAPI(void) { status = U_ZERO_ERROR; memset(fields, -1, sizeof(fields)); numFields = - uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); + uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 2, &status); TEST_ASSERT_SUCCESS(status); /* The TEST_ASSERT_SUCCESS call above should change too... */ @@ -1211,7 +1215,7 @@ static void TestRegexCAPI(void) { status = U_ZERO_ERROR; memset(fields, -1, sizeof(fields)); numFields = - uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status); + uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 3, &status); TEST_ASSERT_SUCCESS(status); /* The TEST_ASSERT_SUCCESS call above should change too... */ @@ -1230,7 +1234,7 @@ static void TestRegexCAPI(void) { status = U_ZERO_ERROR; memset(fields, -1, sizeof(fields)); numFields = - uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status); + uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 5, &status); TEST_ASSERT_SUCCESS(status); /* The TEST_ASSERT_SUCCESS call above should change too... */ @@ -1257,7 +1261,7 @@ static void TestRegexCAPI(void) { if(U_SUCCESS(status)) { memset(fields, -1, sizeof(fields)); numFields = - uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status); + uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 9, &status); TEST_ASSERT_SUCCESS(status); /* The TEST_ASSERT_SUCCESS call above should change too... */ @@ -1464,8 +1468,8 @@ static void TestUTextAPI(void) { TEST_ASSERT_SUCCESS(status); TEST_ASSERT(clone3 != NULL); - u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); - u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); + u_uastrncpy(testString1, "abcccd", LENGTHOF(pat)); + u_uastrncpy(testString2, "xxxabcccd", LENGTHOF(pat)); status = U_ZERO_ERROR; uregex_setText(clone1, testString1, -1, &status); @@ -1499,7 +1503,7 @@ static void TestUTextAPI(void) { UText *resultText; const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */ const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */ - u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */ + u_uastrncpy(pat, "hello", LENGTHOF(pat)); /* for comparison */ status = U_ZERO_ERROR; utext_openUTF8(&patternText, str_hello, -1, &status); @@ -1602,7 +1606,7 @@ static void TestUTextAPI(void) { status = U_ZERO_ERROR; utext_openUTF8(&text1, str_abcccd, -1, &status); - u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2); + u_uastrncpy(text2Chars, str_abcccxd, LENGTHOF(text2Chars)); utext_openUChars(&text2, text2Chars, -1, &status); utext_openUTF8(&patternText, str_abcd, -1, &status); @@ -1698,7 +1702,7 @@ static void TestUTextAPI(void) { { UChar text1[50]; UBool result; - u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); + u_uastrncpy(text1, "012rx5rx890rxrx...", LENGTHOF(text1)); status = U_ZERO_ERROR; re = uregex_openC("rx", 0, NULL, &status); @@ -1762,7 +1766,7 @@ static void TestUTextAPI(void) { const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */ - u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); + u_uastrncpy(text1, "noise abc interior def, and this is off the end", LENGTHOF(text1)); status = U_ZERO_ERROR; re = uregex_openC("abc(.*?)def", 0, NULL, &status); @@ -1840,8 +1844,8 @@ static void TestUTextAPI(void) { const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */ status = U_ZERO_ERROR; - u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); - u_uastrncpy(text2, "No match here.", sizeof(text2)/2); + u_uastrncpy(text1, "Replace xaax x1x x...x.", LENGTHOF(text1)); + u_uastrncpy(text2, "No match here.", LENGTHOF(text2)); utext_openUTF8(&replText, str_1x, -1, &status); re = uregex_openC("x(.*?)x", 0, NULL, &status); @@ -1886,8 +1890,8 @@ static void TestUTextAPI(void) { const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <1> <...>. */ const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ status = U_ZERO_ERROR; - u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); - u_uastrncpy(text2, "No match here.", sizeof(text2)/2); + u_uastrncpy(text1, "Replace xaax x1x x...x.", LENGTHOF(text1)); + u_uastrncpy(text2, "No match here.", LENGTHOF(text2)); utext_openUTF8(&replText, str_1, -1, &status); re = uregex_openC("x(.*?)x", 0, NULL, &status); @@ -1926,15 +1930,15 @@ static void TestUTextAPI(void) { re = uregex_openC(".*", 0, 0, &status); TEST_ASSERT_SUCCESS(status); - u_uastrncpy(text, "whatever", sizeof(text)/2); - u_uastrncpy(repl, "some other", sizeof(repl)/2); + u_uastrncpy(text, "whatever", LENGTHOF(text)); + u_uastrncpy(repl, "some other", LENGTHOF(repl)); uregex_setText(re, text, -1, &status); /* match covers whole target string */ uregex_find(re, 0, &status); TEST_ASSERT_SUCCESS(status); bufPtr = buf; - bufCap = sizeof(buf) / 2; + bufCap = LENGTHOF(buf); uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING("some other", buf, TRUE); @@ -1943,8 +1947,8 @@ static void TestUTextAPI(void) { uregex_find(re, 0, &status); TEST_ASSERT_SUCCESS(status); bufPtr = buf; - bufCap = sizeof(buf) / 2; - u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); + bufCap = LENGTHOF(buf); + u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", LENGTHOF(repl)); uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); @@ -1967,8 +1971,8 @@ static void TestUTextAPI(void) { int32_t numFields; int32_t i; - u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2); - u_uastrncpy(text2, "No match here.", sizeof(text2)/2); + u_uastrncpy(textToSplit, "first : second: third", LENGTHOF(textToSplit)); + u_uastrncpy(text2, "No match here.", LENGTHOF(text2)); status = U_ZERO_ERROR; re = uregex_openC(":", 0, NULL, &status); @@ -2043,7 +2047,7 @@ static void TestUTextAPI(void) { int32_t numFields; int32_t i; - u_uastrncpy(textToSplit, "first second third", sizeof(textToSplit)/2); + u_uastrncpy(textToSplit, "first second third", LENGTHOF(textToSplit)); status = U_ZERO_ERROR; re = uregex_openC("<(.*?)>", 0, NULL, &status); @@ -2266,5 +2270,60 @@ static void TestBug8421(void) { uregex_close(re); } +static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) { + return FALSE; +} + +static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) { + return FALSE; +} + +static void TestBug10815() { + /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER + * when the callback function specified by uregex_setMatchCallback() returns FALSE + */ + URegularExpression *re; + UErrorCode status = U_ZERO_ERROR; + UChar text[100]; + + + // findNext() with a find progress callback function. + + re = uregex_openC(".z", 0, 0, &status); + TEST_ASSERT_SUCCESS(status); + + u_uastrncpy(text, "Hello, World.", LENGTHOF(text)); + uregex_setText(re, text, -1, &status); + TEST_ASSERT_SUCCESS(status); + + uregex_setFindProgressCallback(re, FindCallback, NULL, &status); + TEST_ASSERT_SUCCESS(status); + + uregex_findNext(re, &status); + TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); + + uregex_close(re); + + // findNext() with a match progress callback function. + + status = U_ZERO_ERROR; + re = uregex_openC("((xxx)*)*y", 0, 0, &status); + TEST_ASSERT_SUCCESS(status); + + // Pattern + this text gives an exponential time match. Without the callback to stop the match, + // it will appear to be stuck in a (near) infinite loop. + u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", LENGTHOF(text)); + uregex_setText(re, text, -1, &status); + TEST_ASSERT_SUCCESS(status); + + uregex_setMatchCallback(re, MatchCallback, NULL, &status); + TEST_ASSERT_SUCCESS(status); + + uregex_findNext(re, &status); + TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); + + uregex_close(re); +} + #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ diff --git a/icu4c/source/test/intltest/regextst.cpp b/icu4c/source/test/intltest/regextst.cpp index bc7e3afed27..00c4763e874 100644 --- a/icu4c/source/test/intltest/regextst.cpp +++ b/icu4c/source/test/intltest/regextst.cpp @@ -2035,7 +2035,7 @@ void RegexTest::API_Match_UTF8() { utext_openUnicodeString(&destText, &dest, &status); UText *result; //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ - // Test shallow-clone API + // Test shallow-clone API int64_t group_len; result = matcher->group((UText *)NULL, group_len, status); REGEX_CHECK_STATUS; @@ -4826,6 +4826,9 @@ struct progressCallBackContext { void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;}; }; +// call-back function for find(). +// Return TRUE to continue the find(). +// Return FALSE to stop the find(). U_CDECL_BEGIN static UBool U_CALLCONV testProgressCallBackFn(const void *context, int64_t matchIndex) { @@ -4861,7 +4864,7 @@ void RegexTest::FindProgressCallbacks() { const void *returnedContext; URegexFindProgressCallback *returnedFn; UErrorCode status = U_ZERO_ERROR; - RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long. + RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)\\2)x"), 0, status); REGEX_CHECK_STATUS; matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status); REGEX_CHECK_STATUS; @@ -4870,10 +4873,10 @@ void RegexTest::FindProgressCallbacks() { REGEX_ASSERT(returnedFn == testProgressCallBackFn); REGEX_ASSERT(returnedContext == &cbInfo); - // A short-running match should NOT invoke the callback. + // A find that matches on the initial position does NOT invoke the callback. status = U_ZERO_ERROR; cbInfo.reset(100); - UnicodeString s = "abxxx"; + UnicodeString s = "aaxxx"; matcher.reset(s); #if 0 matcher.setTrace(TRUE); @@ -4882,7 +4885,8 @@ void RegexTest::FindProgressCallbacks() { REGEX_CHECK_STATUS; REGEX_ASSERT(cbInfo.numCalls == 0); - // A medium running match that causes matcher.find() to invoke our callback for each index. + // A medium running find() that causes matcher.find() to invoke our callback for each index, + // but not so many times that we interrupt the operation. status = U_ZERO_ERROR; s = "aaaaaaaaaaaaaaaaaaab"; cbInfo.reset(s.length()); // Some upper limit for number of calls that is greater than size of our input string @@ -4897,22 +4901,21 @@ void RegexTest::FindProgressCallbacks() { cbInfo.reset(s1.length() - 5); // Bail early somewhere near the end of input string matcher.reset(s1); REGEX_ASSERT(matcher.find(0, status)==FALSE); - REGEX_CHECK_STATUS; + REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5); -#if 0 // Now a match that will succeed, but after an interruption status = U_ZERO_ERROR; UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx"; cbInfo.reset(s2.length() - 10); // Bail early somewhere near the end of input string matcher.reset(s2); REGEX_ASSERT(matcher.find(0, status)==FALSE); - REGEX_CHECK_STATUS; + REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); // Now retry the match from where left off cbInfo.maxCalls = 100; // No callback limit + status = U_ZERO_ERROR; REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status)); REGEX_CHECK_STATUS; -#endif } @@ -5317,7 +5320,7 @@ void RegexTest::TestBug11049() { TestCase11049("A|B|C", "a string \\ud800\\udc00", FALSE, __LINE__); TestCase11049("A|B|C", "string matches at end C", TRUE, __LINE__); - // Test again with a pattern starting with a single character, + // Test again with a pattern starting with a single character, // which takes a different code path than starting with an OR expression, // but with similar logic. TestCase11049("C", "a string \\ud800\\udc00", FALSE, __LINE__); -- 2.40.0