ICU-10815 Fix for uregex_findNext() not setting U_REGEX_STOPPED_BY_CALLER

author Andy Heninger <andy.heninger@gmail.com>

Thu, 28 Aug 2014 01:19:29 +0000 (01:19 +0000)

committer Andy Heninger <andy.heninger@gmail.com>

Thu, 28 Aug 2014 01:19:29 +0000 (01:19 +0000)
author Andy Heninger <andy.heninger@gmail.com>
Thu, 28 Aug 2014 01:19:29 +0000 (01:19 +0000)
committer Andy Heninger <andy.heninger@gmail.com>
Thu, 28 Aug 2014 01:19:29 +0000 (01:19 +0000)
diff --git a/icu4c/source/i18n/rematch.cpp b/icu4c/source/i18n/rematch.cpp

index ace985fa685557a6a5b513d63a61b08d8d0a7c84..667cb84f69ccd532e33d9fce6bd95bab1755e165 100644 (file)
--- a/icu4c/source/i18n/rematch.cpp
+++ b/icu4c/source/i18n/rematch.cpp
@@ -33,26 +33,6 @@
  
  // #include <malloc.h>        // Needed for heapcheck testing
  
-
-// Find progress callback
-// ----------------------
-// Macro to inline test & call to ReportFindProgress().  Eliminates unnecessary function call.
-//
-#define REGEXFINDPROGRESS_INTERRUPT(pos, status)     \
-    (fFindProgressCallbackFn != NULL) && (ReportFindProgress(pos, status) == FALSE)
-
-
-// Smart Backtracking
-// ------------------
-// When a failure would go back to a LOOP_C instruction,
-// strings, characters, and setrefs scan backwards for a valid start
-// character themselves, pop the stack, and save state, emulating the
-// LOOP_C's effect but assured that the next character of input is a
-// possible matching character.
-//
-// Good idea in theory; unfortunately it only helps out a few specific
-// cases and slows the engine down a little in the rest.
-
  U_NAMESPACE_BEGIN
  
  // Default limit for the size of the back track stack, to avoid system
@@ -584,15 +564,33 @@ int32_t RegexMatcher::end(int32_t group, UErrorCode &err) const {
  //
  //--------------------------------------------------------------------------------
  UBool RegexMatcher::find() {
+    if (U_FAILURE(fDeferredStatus)) {
+        return FALSE;
+    }
+    UErrorCode status = U_ZERO_ERROR;
+    UBool result = find(status);
+    return result;
+}
+
+//--------------------------------------------------------------------------------
+//
+//   find()
+//
+//--------------------------------------------------------------------------------
+UBool RegexMatcher::find(UErrorCode &status) {
      // Start at the position of the last match end.  (Will be zero if the
      //   matcher has been reset.)
      //
+    if (U_FAILURE(status)) {
+        return FALSE;
+    }
      if (U_FAILURE(fDeferredStatus)) {
+        status = fDeferredStatus;
          return FALSE;
      }
  
      if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
-        return findUsingChunk();
+        return findUsingChunk(status);
      }
  
      int64_t startPos = fMatchEnd;
@@ -653,8 +651,8 @@ UBool RegexMatcher::find() {
          // No optimization was found.
          //  Try a match at each input position.
          for (;;) {
-            MatchAt(startPos, FALSE, fDeferredStatus);
-            if (U_FAILURE(fDeferredStatus)) {
+            MatchAt(startPos, FALSE, status);
+            if (U_FAILURE(status)) {
                  return FALSE;
              }
              if (fMatch) {
@@ -670,7 +668,7 @@ UBool RegexMatcher::find() {
              // Note that it's perfectly OK for a pattern to have a zero-length
              //   match at the end of a string, so we must make sure that the loop
              //   runs with startPos == testStartLimit the last time through.
-            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
+            if  (findProgressInterrupt(startPos, status))
                  return FALSE;
          }
          U_ASSERT(FALSE);
@@ -682,8 +680,8 @@ UBool RegexMatcher::find() {
              fMatch = FALSE;
              return FALSE;
          }
-        MatchAt(startPos, FALSE, fDeferredStatus);
-        if (U_FAILURE(fDeferredStatus)) {
+        MatchAt(startPos, FALSE, status);
+        if (U_FAILURE(status)) {
              return FALSE;
          }
          return fMatch;
@@ -703,8 +701,8 @@ UBool RegexMatcher::find() {
                  // and handle end of text in the following block.
                  if (c >= 0 && ((c<256 && fPattern->fInitialChars8->contains(c)) ||
                                (c>=256 && fPattern->fInitialChars->contains(c)))) {
-                    MatchAt(pos, FALSE, fDeferredStatus);
-                    if (U_FAILURE(fDeferredStatus)) {
+                    MatchAt(pos, FALSE, status);
+                    if (U_FAILURE(status)) {
                          return FALSE;
                      }
                      if (fMatch) {
@@ -717,7 +715,7 @@ UBool RegexMatcher::find() {
                      fHitEnd = TRUE;
                      return FALSE;
                  }
-                   if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
+                if  (findProgressInterrupt(startPos, status))
                      return FALSE;
              }
          }
@@ -735,8 +733,8 @@ UBool RegexMatcher::find() {
                  c = UTEXT_NEXT32(fInputText);
                  startPos = UTEXT_GETNATIVEINDEX(fInputText);
                  if (c == theChar) {
-                    MatchAt(pos, FALSE, fDeferredStatus);
-                    if (U_FAILURE(fDeferredStatus)) {
+                    MatchAt(pos, FALSE, status);
+                    if (U_FAILURE(status)) {
                          return FALSE;
                      }
                      if (fMatch) {
@@ -749,7 +747,7 @@ UBool RegexMatcher::find() {
                      fHitEnd = TRUE;
                      return FALSE;
                  }
-                   if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
+                if  (findProgressInterrupt(startPos, status))
                      return FALSE;
             }
          }
@@ -759,8 +757,8 @@ UBool RegexMatcher::find() {
          {
              UChar32  c;
              if (startPos == fAnchorStart) {
-                MatchAt(startPos, FALSE, fDeferredStatus);
-                if (U_FAILURE(fDeferredStatus)) {
+                MatchAt(startPos, FALSE, status);
+                if (U_FAILURE(status)) {
                      return FALSE;
                  }
                  if (fMatch) {
@@ -778,8 +776,8 @@ UBool RegexMatcher::find() {
              if (fPattern->fFlags & UREGEX_UNIX_LINES) {
                  for (;;) {
                      if (c == 0x0a) {
-                            MatchAt(startPos, FALSE, fDeferredStatus);
-                            if (U_FAILURE(fDeferredStatus)) {
+                            MatchAt(startPos, FALSE, status);
+                            if (U_FAILURE(status)) {
                                  return FALSE;
                              }
                              if (fMatch) {
@@ -797,7 +795,7 @@ UBool RegexMatcher::find() {
                      // Note that it's perfectly OK for a pattern to have a zero-length
                      //   match at the end of a string, so we must make sure that the loop
                      //   runs with startPos == testStartLimit the last time through.
-                           if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
+                    if  (findProgressInterrupt(startPos, status))
                          return FALSE;
                  }
              } else {
@@ -808,8 +806,8 @@ UBool RegexMatcher::find() {
                                  (void)UTEXT_NEXT32(fInputText);
                                  startPos = UTEXT_GETNATIVEINDEX(fInputText);
                              }
-                            MatchAt(startPos, FALSE, fDeferredStatus);
-                            if (U_FAILURE(fDeferredStatus)) {
+                            MatchAt(startPos, FALSE, status);
+                            if (U_FAILURE(status)) {
                                  return FALSE;
                              }
                              if (fMatch) {
@@ -827,7 +825,7 @@ UBool RegexMatcher::find() {
                      // Note that it's perfectly OK for a pattern to have a zero-length
                      //   match at the end of a string, so we must make sure that the loop
                      //   runs with startPos == testStartLimit the last time through.
-                           if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
+                    if  (findProgressInterrupt(startPos, status))
                          return FALSE;
                  }
              }
@@ -864,7 +862,7 @@ UBool RegexMatcher::find(int64_t start, UErrorCode &status) {
          return FALSE;
      }
      fMatchEnd = nativeStart;
-    return find();
+    return find(status);
  }
  
  
@@ -874,7 +872,7 @@ UBool RegexMatcher::find(int64_t start, UErrorCode &status) {
  //                       entire string is available in the UText's chunk buffer.
  //
  //--------------------------------------------------------------------------------
-UBool RegexMatcher::findUsingChunk() {
+UBool RegexMatcher::findUsingChunk(UErrorCode &status) {
      // Start at the position of the last match end.  (Will be zero if the
      //   matcher has been reset.
      //
@@ -931,8 +929,8 @@ UBool RegexMatcher::findUsingChunk() {
          // No optimization was found.
          //  Try a match at each input position.
          for (;;) {
-            MatchChunkAt(startPos, FALSE, fDeferredStatus);
-            if (U_FAILURE(fDeferredStatus)) {
+            MatchChunkAt(startPos, FALSE, status);
+            if (U_FAILURE(status)) {
                  return FALSE;
              }
              if (fMatch) {
@@ -946,7 +944,7 @@ UBool RegexMatcher::findUsingChunk() {
              // Note that it's perfectly OK for a pattern to have a zero-length
              //   match at the end of a string, so we must make sure that the loop
              //   runs with startPos == testLen the last time through.
-            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
+            if  (findProgressInterrupt(startPos, status))
                  return FALSE;
          }
          U_ASSERT(FALSE);
@@ -958,8 +956,8 @@ UBool RegexMatcher::findUsingChunk() {
              fMatch = FALSE;
              return FALSE;
          }
-        MatchChunkAt(startPos, FALSE, fDeferredStatus);
-        if (U_FAILURE(fDeferredStatus)) {
+        MatchChunkAt(startPos, FALSE, status);
+        if (U_FAILURE(status)) {
              return FALSE;
          }
          return fMatch;
@@ -974,8 +972,8 @@ UBool RegexMatcher::findUsingChunk() {
              U16_NEXT(inputBuf, startPos, fActiveLimit, c);  // like c = inputBuf[startPos++];
              if ((c<256 && fPattern->fInitialChars8->contains(c)) ||
                  (c>=256 && fPattern->fInitialChars->contains(c))) {
-                MatchChunkAt(pos, FALSE, fDeferredStatus);
-                if (U_FAILURE(fDeferredStatus)) {
+                MatchChunkAt(pos, FALSE, status);
+                if (U_FAILURE(status)) {
                      return FALSE;
                  }
                  if (fMatch) {
@@ -987,7 +985,7 @@ UBool RegexMatcher::findUsingChunk() {
                  fHitEnd = TRUE;
                  return FALSE;
              }
-            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
+            if  (findProgressInterrupt(startPos, status))
                  return FALSE;
          }
      }
@@ -1003,8 +1001,8 @@ UBool RegexMatcher::findUsingChunk() {
              int32_t pos = startPos;
              U16_NEXT(inputBuf, startPos, fActiveLimit, c);  // like c = inputBuf[startPos++];
              if (c == theChar) {
-                MatchChunkAt(pos, FALSE, fDeferredStatus);
-                if (U_FAILURE(fDeferredStatus)) {
+                MatchChunkAt(pos, FALSE, status);
+                if (U_FAILURE(status)) {
                      return FALSE;
                  }
                  if (fMatch) {
@@ -1016,7 +1014,7 @@ UBool RegexMatcher::findUsingChunk() {
                  fHitEnd = TRUE;
                  return FALSE;
              }
-            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
+            if  (findProgressInterrupt(startPos, status))
                  return FALSE;
          }
      }
@@ -1026,8 +1024,8 @@ UBool RegexMatcher::findUsingChunk() {
      {
          UChar32  c;
          if (startPos == fAnchorStart) {
-            MatchChunkAt(startPos, FALSE, fDeferredStatus);
-            if (U_FAILURE(fDeferredStatus)) {
+            MatchChunkAt(startPos, FALSE, status);
+            if (U_FAILURE(status)) {
                  return FALSE;
              }
              if (fMatch) {
@@ -1040,8 +1038,8 @@ UBool RegexMatcher::findUsingChunk() {
              for (;;) {
                  c = inputBuf[startPos-1];
                  if (c == 0x0a) {
-                    MatchChunkAt(startPos, FALSE, fDeferredStatus);
-                    if (U_FAILURE(fDeferredStatus)) {
+                    MatchChunkAt(startPos, FALSE, status);
+                    if (U_FAILURE(status)) {
                          return FALSE;
                      }
                      if (fMatch) {
@@ -1057,7 +1055,7 @@ UBool RegexMatcher::findUsingChunk() {
                  // Note that it's perfectly OK for a pattern to have a zero-length
                  //   match at the end of a string, so we must make sure that the loop
                  //   runs with startPos == testLen the last time through.
-                   if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
+                if  (findProgressInterrupt(startPos, status))
                      return FALSE;
              }
          } else {
@@ -1068,8 +1066,8 @@ UBool RegexMatcher::findUsingChunk() {
                      if (c == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) {
                          startPos++;
                      }
-                    MatchChunkAt(startPos, FALSE, fDeferredStatus);
-                    if (U_FAILURE(fDeferredStatus)) {
+                    MatchChunkAt(startPos, FALSE, status);
+                    if (U_FAILURE(status)) {
                          return FALSE;
                      }
                      if (fMatch) {
@@ -1085,7 +1083,7 @@ UBool RegexMatcher::findUsingChunk() {
                  // Note that it's perfectly OK for a pattern to have a zero-length
                  //   match at the end of a string, so we must make sure that the loop
                  //   runs with startPos == testLen the last time through.
-                   if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
+                if  (findProgressInterrupt(startPos, status))
                      return FALSE;
              }
          }
@@ -1172,8 +1170,8 @@ UnicodeString RegexMatcher::group(int32_t groupNum, UErrorCode &status) const {
  
  
  //  Return deep (mutable) clone
-//             Technology Preview (as an API), but note that the UnicodeString API is implemented
-//             using this function.
+//      Technology Preview (as an API), but note that the UnicodeString API is implemented
+//      using this function.
  UText *RegexMatcher::group(int32_t groupNum, UText *dest, UErrorCode &status) const {
      if (U_FAILURE(status)) {
          return dest;
@@ -2625,25 +2623,20 @@ void RegexMatcher::IncrementTime(UErrorCode &status) {
  
  //--------------------------------------------------------------------------------
  //
-//   ReportFindProgress     This function is called once for each advance in the target
+//   findProgressInterrupt  This function is called once for each advance in the target
  //                          string from the find() function, and calls the user progress callback
  //                          function if there is one installed.
  //
-//                          NOTE:
-//
-//                          If the match operation needs to be aborted because the user
-//                          callback asked for it, just set an error status.
-//                          The engine will pick that up and stop in its outer loop.
+//         Return:  TRUE if the find operation is to be terminated.
+//                  FALSE if the find operation is to continue running.
  //
  //--------------------------------------------------------------------------------
-UBool RegexMatcher::ReportFindProgress(int64_t matchIndex, UErrorCode &status) {
-    if (fFindProgressCallbackFn != NULL) {
-        if ((*fFindProgressCallbackFn)(fFindProgressCallbackContext, matchIndex) == FALSE) {
-            status = U_ZERO_ERROR /*U_REGEX_STOPPED_BY_CALLER*/;
-            return FALSE;
-        }
+UBool RegexMatcher::findProgressInterrupt(int64_t pos, UErrorCode &status) {
+    if (fFindProgressCallbackFn && !(*fFindProgressCallbackFn)(fFindProgressCallbackContext, pos)) {
+        status = U_REGEX_STOPPED_BY_CALLER;
+        return TRUE;
      }
-    return TRUE;
+    return FALSE;
  }
  
  //--------------------------------------------------------------------------------
diff --git a/icu4c/source/i18n/unicode/regex.h b/icu4c/source/i18n/unicode/regex.h

index 7b85d7040f2a92711f1092381546b2d777046274..950088e192b040ca6b71f063352921f7d7b3b073 100644 (file)
--- a/icu4c/source/i18n/unicode/regex.h
+++ b/icu4c/source/i18n/unicode/regex.h
@@ -801,6 +801,21 @@ public:
      virtual UBool find();
  
  
+   /**
+    *  Find the next pattern match in the input string.
+    *  The find begins searching the input at the location following the end of
+    *  the previous match, or at the start of the string if there is no previous match.
+    *  If a match is found, <code>start(), end()</code> and <code>group()</code>
+    *  will provide more information regarding the match.
+    *  <p>Note that if the input string is changed by the application,
+    *     use find(startPos, status) instead of find(), because the saved starting
+    *     position may not be valid with the altered input string.</p>
+    *  @param   status  A reference to a UErrorCode to receive any errors.
+    *  @return  TRUE if a match is found.
+    *  @stable @internal
+    */
+    virtual UBool find(UErrorCode &status);
+
     /**
      *   Resets this RegexMatcher and then attempts to find the next substring of the
      *   input string that matches the pattern, starting at the specified index.
@@ -1744,11 +1759,13 @@ private:
      REStackFrame        *resetStack();
      inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status);
      void                 IncrementTime(UErrorCode &status);
-    UBool                ReportFindProgress(int64_t matchIndex, UErrorCode &status);
+
+    // Call user find callback function, if set. Return TRUE if operation should be interrupted.
+    inline UBool         findProgressInterrupt(int64_t matchIndex, UErrorCode &status);
      
      int64_t              appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const;
      
-    UBool                findUsingChunk();
+    UBool                findUsingChunk(UErrorCode &status);
      void                 MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status);
      UBool                isChunkWordBoundary(int32_t pos);
  
diff --git a/icu4c/source/i18n/uregex.cpp b/icu4c/source/i18n/uregex.cpp

index c05b7d09d4d2fe27089b049f7f5c2f2f5a358962..01951234b9c0fead09307ee16038722ff28517bf 100644 (file)
--- a/icu4c/source/i18n/uregex.cpp
+++ b/icu4c/source/i18n/uregex.cpp
@@ -1,6 +1,6 @@
  /*
  *******************************************************************************
-*   Copyright (C) 2004-2013, International Business Machines
+*   Copyright (C) 2004-2014, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *******************************************************************************
  *   file name:  uregex.cpp
@@ -144,7 +144,7 @@ uregex_open( const  UChar          *pattern,
      re->fPatStringLen = patternLength;
      u_memcpy(patBuf, pattern, actualPatLen);
      patBuf[actualPatLen] = 0;
-    
+
      UText patText = UTEXT_INITIALIZER;
      utext_openUChars(&patText, patBuf, patternLength, status);
  
@@ -157,7 +157,7 @@ uregex_open( const  UChar          *pattern,
          re->fPat = RegexPattern::compile(&patText, flags, *status);
      }
      utext_close(&patText);
-    
+
      if (U_FAILURE(*status)) {
          goto ErrorExit;
      }
@@ -186,7 +186,7 @@ uregex_openUText(UText          *pattern,
                   uint32_t        flags,
                   UParseError    *pe,
                   UErrorCode     *status) {
-    
+
      if (U_FAILURE(*status)) {
          return NULL;
      }
@@ -194,19 +194,19 @@ uregex_openUText(UText          *pattern,
          *status = U_ILLEGAL_ARGUMENT_ERROR;
          return NULL;
      }
-    
+
      int64_t patternNativeLength = utext_nativeLength(pattern);
-    
+
      if (patternNativeLength == 0) {
          *status = U_ILLEGAL_ARGUMENT_ERROR;
          return NULL;
      }
-    
+
      RegularExpression *re     = new RegularExpression;
-    
+
      UErrorCode lengthStatus = U_ZERO_ERROR;
      int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus);
-    
+
      u_atomic_int32_t   *refC   = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
      UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1));
      if (re == NULL || refC == NULL || patBuf == NULL) {
@@ -218,7 +218,7 @@ uregex_openUText(UText          *pattern,
      }
      re->fPatRefCount = refC;
      *re->fPatRefCount = 1;
-    
+
      //
      // Make a copy of the pattern string, so we can return it later if asked.
      //    For compiling the pattern, we will use a read-only UText wrapper
@@ -227,10 +227,10 @@ uregex_openUText(UText          *pattern,
      re->fPatString    = patBuf;
      re->fPatStringLen = pattern16Length;
      utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
-    
+
      UText patText = UTEXT_INITIALIZER;
      utext_openUChars(&patText, patBuf, pattern16Length, status);
-    
+
      //
      // Compile the pattern
      //
@@ -240,11 +240,11 @@ uregex_openUText(UText          *pattern,
          re->fPat = RegexPattern::compile(&patText, flags, *status);
      }
      utext_close(&patText);
-    
+
      if (U_FAILURE(*status)) {
          goto ErrorExit;
      }
-    
+
      //
      // Create the matcher object
      //
@@ -252,11 +252,11 @@ uregex_openUText(UText          *pattern,
      if (U_SUCCESS(*status)) {
          return (URegularExpression*)re;
      }
-    
+
  ErrorExit:
      delete re;
      return NULL;
-    
+
  }
  
  //----------------------------------------------------------------------------------------
@@ -280,7 +280,7 @@ uregex_close(URegularExpression  *re2) {
  //    uregex_clone
  //
  //----------------------------------------------------------------------------------------
-U_CAPI URegularExpression * U_EXPORT2 
+U_CAPI URegularExpression * U_EXPORT2
  uregex_clone(const URegularExpression *source2, UErrorCode *status)  {
      RegularExpression *source = (RegularExpression*)source2;
      if (validateRE(source, FALSE, status) == FALSE) {
@@ -300,7 +300,7 @@ uregex_clone(const URegularExpression *source2, UErrorCode *status)  {
      }
  
      clone->fPat          = source->fPat;
-    clone->fPatRefCount  = source->fPatRefCount; 
+    clone->fPatRefCount  = source->fPatRefCount;
      clone->fPatString    = source->fPatString;
      clone->fPatStringLen = source->fPatStringLen;
      umtx_atomic_inc(source->fPatRefCount);
@@ -317,12 +317,12 @@ uregex_clone(const URegularExpression *source2, UErrorCode *status)  {
  //    uregex_pattern
  //
  //------------------------------------------------------------------------------
-U_CAPI const UChar * U_EXPORT2 
+U_CAPI const UChar * U_EXPORT2
  uregex_pattern(const  URegularExpression *regexp2,
                        int32_t            *patLength,
                        UErrorCode         *status)  {
      RegularExpression *regexp = (RegularExpression*)regexp2;
-    
+
      if (validateRE(regexp, FALSE, status) == FALSE) {
          return NULL;
      }
@@ -351,7 +351,7 @@ uregex_patternUText(const URegularExpression *regexp2,
  //    uregex_flags
  //
  //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uregex_flags(const URegularExpression *regexp2, UErrorCode *status)  {
      RegularExpression *regexp = (RegularExpression*)regexp2;
      if (validateRE(regexp, FALSE, status) == FALSE) {
@@ -367,7 +367,7 @@ uregex_flags(const URegularExpression *regexp2, UErrorCode *status)  {
  //    uregex_setText
  //
  //------------------------------------------------------------------------------
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uregex_setText(URegularExpression *regexp2,
                 const UChar        *text,
                 int32_t             textLength,
@@ -380,15 +380,15 @@ uregex_setText(URegularExpression *regexp2,
          *status = U_ILLEGAL_ARGUMENT_ERROR;
          return;
      }
-    
+
      if (regexp->fOwnsText && regexp->fText != NULL) {
          uprv_free((void *)regexp->fText);
      }
-    
+
      regexp->fText       = text;
      regexp->fTextLength = textLength;
      regexp->fOwnsText   = FALSE;
-    
+
      UText input = UTEXT_INITIALIZER;
      utext_openUChars(&input, text, textLength, status);
      regexp->fMatcher->reset(&input);
@@ -401,7 +401,7 @@ uregex_setText(URegularExpression *regexp2,
  //    uregex_setUText
  //
  //------------------------------------------------------------------------------
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uregex_setUText(URegularExpression *regexp2,
                  UText              *text,
                  UErrorCode         *status) {
@@ -413,11 +413,11 @@ uregex_setUText(URegularExpression *regexp2,
          *status = U_ILLEGAL_ARGUMENT_ERROR;
          return;
      }
-    
+
      if (regexp->fOwnsText && regexp->fText != NULL) {
          uprv_free((void *)regexp->fText);
      }
-    
+
      regexp->fText       = NULL; // only fill it in on request
      regexp->fTextLength = -1;
      regexp->fOwnsText   = TRUE;
@@ -431,7 +431,7 @@ uregex_setUText(URegularExpression *regexp2,
  //    uregex_getText
  //
  //------------------------------------------------------------------------------
-U_CAPI const UChar * U_EXPORT2 
+U_CAPI const UChar * U_EXPORT2
  uregex_getText(URegularExpression *regexp2,
                 int32_t            *textLength,
                 UErrorCode         *status)  {
@@ -439,7 +439,7 @@ uregex_getText(URegularExpression *regexp2,
      if (validateRE(regexp, FALSE, status) == FALSE) {
          return NULL;
      }
-    
+
      if (regexp->fText == NULL) {
          // need to fill in the text
          UText *inputText = regexp->fMatcher->inputText();
@@ -452,13 +452,13 @@ uregex_getText(URegularExpression *regexp2,
              UErrorCode lengthStatus = U_ZERO_ERROR;
              regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error
              UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1));
-            
+
              utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
              regexp->fText = inputChars;
              regexp->fOwnsText = TRUE; // should already be set but just in case
          }
      }
-    
+
      if (textLength != NULL) {
          *textLength = regexp->fTextLength;
      }
@@ -471,7 +471,7 @@ uregex_getText(URegularExpression *regexp2,
  //    uregex_getUText
  //
  //------------------------------------------------------------------------------
-U_CAPI UText * U_EXPORT2 
+U_CAPI UText * U_EXPORT2
  uregex_getUText(URegularExpression *regexp2,
                  UText              *dest,
                  UErrorCode         *status)  {
@@ -488,7 +488,7 @@ uregex_getUText(URegularExpression *regexp2,
  //    uregex_refreshUText
  //
  //------------------------------------------------------------------------------
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uregex_refreshUText(URegularExpression *regexp2,
                      UText              *text,
                      UErrorCode         *status) {
@@ -505,14 +505,14 @@ uregex_refreshUText(URegularExpression *regexp2,
  //    uregex_matches
  //
  //------------------------------------------------------------------------------
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
  uregex_matches(URegularExpression *regexp2,
                 int32_t            startIndex,
                 UErrorCode        *status)  {
      return uregex_matches64( regexp2, (int64_t)startIndex, status);
  }
  
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
  uregex_matches64(URegularExpression *regexp2,
                   int64_t            startIndex,
                   UErrorCode        *status)  {
@@ -535,14 +535,14 @@ uregex_matches64(URegularExpression *regexp2,
  //    uregex_lookingAt
  //
  //------------------------------------------------------------------------------
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
  uregex_lookingAt(URegularExpression *regexp2,
                   int32_t             startIndex,
                   UErrorCode         *status)  {
      return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
  }
  
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
  uregex_lookingAt64(URegularExpression *regexp2,
                     int64_t             startIndex,
                     UErrorCode         *status)  {
@@ -566,16 +566,16 @@ uregex_lookingAt64(URegularExpression *regexp2,
  //    uregex_find
  //
  //------------------------------------------------------------------------------
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
  uregex_find(URegularExpression *regexp2,
-            int32_t             startIndex, 
+            int32_t             startIndex,
              UErrorCode         *status)  {
      return uregex_find64( regexp2, (int64_t)startIndex, status);
  }
  
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
  uregex_find64(URegularExpression *regexp2,
-              int64_t             startIndex, 
+              int64_t             startIndex,
                UErrorCode         *status)  {
      RegularExpression *regexp = (RegularExpression*)regexp2;
      UBool result = FALSE;
@@ -584,7 +584,7 @@ uregex_find64(URegularExpression *regexp2,
      }
      if (startIndex == -1) {
          regexp->fMatcher->resetPreserveRegion();
-        result = regexp->fMatcher->find();
+        result = regexp->fMatcher->find(*status);
      } else {
          result = regexp->fMatcher->find(startIndex, *status);
      }
@@ -597,14 +597,14 @@ uregex_find64(URegularExpression *regexp2,
  //    uregex_findNext
  //
  //------------------------------------------------------------------------------
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
  uregex_findNext(URegularExpression *regexp2,
                  UErrorCode         *status)  {
      RegularExpression *regexp = (RegularExpression*)regexp2;
      if (validateRE(regexp, TRUE, status) == FALSE) {
          return FALSE;
      }
-    UBool result = regexp->fMatcher->find();
+    UBool result = regexp->fMatcher->find(*status);
      return result;
  }
  
@@ -613,7 +613,7 @@ uregex_findNext(URegularExpression *regexp2,
  //    uregex_groupCount
  //
  //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uregex_groupCount(URegularExpression *regexp2,
                    UErrorCode         *status)  {
      RegularExpression *regexp = (RegularExpression*)regexp2;
@@ -630,7 +630,7 @@ uregex_groupCount(URegularExpression *regexp2,
  //    uregex_group
  //
  //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uregex_group(URegularExpression *regexp2,
               int32_t             groupNum,
               UChar              *dest,
@@ -644,11 +644,11 @@ uregex_group(URegularExpression *regexp2,
          *status = U_ILLEGAL_ARGUMENT_ERROR;
          return 0;
      }
-    
+
      if (destCapacity == 0 || regexp->fText != NULL) {
          // If preflighting or if we already have the text as UChars,
          // this is a little cheaper than going through uregex_groupUTextDeep()
-        
+
          //
          // Pick up the range of characters from the matcher
          //
@@ -660,7 +660,7 @@ uregex_group(URegularExpression *regexp2,
  
          //
          // Trim length based on buffer capacity
-        // 
+        //
          int32_t fullLength = endIx - startIx;
          int32_t copyLength = fullLength;
          if (copyLength < destCapacity) {
@@ -671,7 +671,7 @@ uregex_group(URegularExpression *regexp2,
              copyLength = destCapacity;
              *status = U_BUFFER_OVERFLOW_ERROR;
          }
-        
+
          //
          // Copy capture group to user's buffer
          //
@@ -696,7 +696,7 @@ uregex_group(URegularExpression *regexp2,
  //    uregex_groupUText
  //
  //------------------------------------------------------------------------------
-U_CAPI UText * U_EXPORT2 
+U_CAPI UText * U_EXPORT2
  uregex_groupUText(URegularExpression *regexp2,
                    int32_t             groupNum,
                    UText              *dest,
@@ -716,7 +716,7 @@ uregex_groupUText(URegularExpression *regexp2,
  //    uregex_groupUTextDeep
  //
  //------------------------------------------------------------------------------
-U_CAPI UText * U_EXPORT2 
+U_CAPI UText * U_EXPORT2
  uregex_groupUTextDeep(URegularExpression *regexp2,
                    int32_t             groupNum,
                    UText              *dest,
@@ -738,7 +738,7 @@ uregex_groupUTextDeep(URegularExpression *regexp2,
              UErrorCode emptyTextStatus = U_ZERO_ERROR;
              return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
          }
-        
+
          if (dest) {
              utext_replace(dest, 0, utext_nativeLength(dest), &regexp->fText[startIx], endIx - startIx, status);
          } else {
@@ -747,7 +747,7 @@ uregex_groupUTextDeep(URegularExpression *regexp2,
              dest = utext_clone(NULL, &groupText, TRUE, FALSE, status);
              utext_close(&groupText);
          }
-        
+
          return dest;
      } else {
          return regexp->fMatcher->group(groupNum, dest, *status);
@@ -759,14 +759,14 @@ uregex_groupUTextDeep(URegularExpression *regexp2,
  //    uregex_start
  //
  //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uregex_start(URegularExpression *regexp2,
               int32_t             groupNum,
               UErrorCode          *status)  {
      return (int32_t)uregex_start64( regexp2, groupNum, status);
  }
  
-U_CAPI int64_t U_EXPORT2 
+U_CAPI int64_t U_EXPORT2
  uregex_start64(URegularExpression *regexp2,
                 int32_t             groupNum,
                 UErrorCode          *status)  {
@@ -783,14 +783,14 @@ uregex_start64(URegularExpression *regexp2,
  //    uregex_end
  //
  //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uregex_end(URegularExpression   *regexp2,
             int32_t               groupNum,
             UErrorCode           *status)  {
      return (int32_t)uregex_end64( regexp2, groupNum, status);
  }
  
-U_CAPI int64_t U_EXPORT2 
+U_CAPI int64_t U_EXPORT2
  uregex_end64(URegularExpression   *regexp2,
               int32_t               groupNum,
               UErrorCode           *status)  {
@@ -807,14 +807,14 @@ uregex_end64(URegularExpression   *regexp2,
  //    uregex_reset
  //
  //------------------------------------------------------------------------------
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uregex_reset(URegularExpression    *regexp2,
               int32_t               index,
               UErrorCode            *status)  {
      uregex_reset64( regexp2, (int64_t)index, status);
  }
  
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uregex_reset64(URegularExpression    *regexp2,
                 int64_t               index,
                 UErrorCode            *status)  {
@@ -831,7 +831,7 @@ uregex_reset64(URegularExpression    *regexp2,
  //    uregex_setRegion
  //
  //------------------------------------------------------------------------------
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uregex_setRegion(URegularExpression   *regexp2,
                   int32_t               regionStart,
                   int32_t               regionLimit,
@@ -839,7 +839,7 @@ uregex_setRegion(URegularExpression   *regexp2,
      uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
  }
  
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uregex_setRegion64(URegularExpression   *regexp2,
                     int64_t               regionStart,
                     int64_t               regionLimit,
@@ -857,7 +857,7 @@ uregex_setRegion64(URegularExpression   *regexp2,
  //    uregex_setRegionAndStart
  //
  //------------------------------------------------------------------------------
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uregex_setRegionAndStart(URegularExpression   *regexp2,
                   int64_t               regionStart,
                   int64_t               regionLimit,
@@ -875,13 +875,13 @@ uregex_setRegionAndStart(URegularExpression   *regexp2,
  //    uregex_regionStart
  //
  //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uregex_regionStart(const  URegularExpression   *regexp2,
                            UErrorCode           *status)  {
      return (int32_t)uregex_regionStart64(regexp2, status);
  }
  
-U_CAPI int64_t U_EXPORT2 
+U_CAPI int64_t U_EXPORT2
  uregex_regionStart64(const  URegularExpression   *regexp2,
                              UErrorCode           *status)  {
      RegularExpression *regexp = (RegularExpression*)regexp2;
@@ -897,13 +897,13 @@ uregex_regionStart64(const  URegularExpression   *regexp2,
  //    uregex_regionEnd
  //
  //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uregex_regionEnd(const  URegularExpression   *regexp2,
                          UErrorCode           *status)  {
      return (int32_t)uregex_regionEnd64(regexp2, status);
  }
  
-U_CAPI int64_t U_EXPORT2 
+U_CAPI int64_t U_EXPORT2
  uregex_regionEnd64(const  URegularExpression   *regexp2,
                            UErrorCode           *status)  {
      RegularExpression *regexp = (RegularExpression*)regexp2;
@@ -919,7 +919,7 @@ uregex_regionEnd64(const  URegularExpression   *regexp2,
  //    uregex_hasTransparentBounds
  //
  //------------------------------------------------------------------------------
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
  uregex_hasTransparentBounds(const  URegularExpression   *regexp2,
                                     UErrorCode           *status)  {
      RegularExpression *regexp = (RegularExpression*)regexp2;
@@ -935,7 +935,7 @@ uregex_hasTransparentBounds(const  URegularExpression   *regexp2,
  //    uregex_useTransparentBounds
  //
  //------------------------------------------------------------------------------
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uregex_useTransparentBounds(URegularExpression    *regexp2,
                              UBool                  b,
                              UErrorCode            *status)  {
@@ -952,7 +952,7 @@ uregex_useTransparentBounds(URegularExpression    *regexp2,
  //    uregex_hasAnchoringBounds
  //
  //------------------------------------------------------------------------------
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
  uregex_hasAnchoringBounds(const  URegularExpression   *regexp2,
                                   UErrorCode           *status)  {
      RegularExpression *regexp = (RegularExpression*)regexp2;
@@ -968,7 +968,7 @@ uregex_hasAnchoringBounds(const  URegularExpression   *regexp2,
  //    uregex_useAnchoringBounds
  //
  //------------------------------------------------------------------------------
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uregex_useAnchoringBounds(URegularExpression    *regexp2,
                            UBool                  b,
                            UErrorCode            *status)  {
@@ -985,7 +985,7 @@ uregex_useAnchoringBounds(URegularExpression    *regexp2,
  //    uregex_hitEnd
  //
  //------------------------------------------------------------------------------
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
  uregex_hitEnd(const  URegularExpression   *regexp2,
                       UErrorCode           *status)  {
      RegularExpression *regexp = (RegularExpression*)regexp2;
@@ -1001,7 +1001,7 @@ uregex_hitEnd(const  URegularExpression   *regexp2,
  //    uregex_requireEnd
  //
  //------------------------------------------------------------------------------
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
  uregex_requireEnd(const  URegularExpression   *regexp2,
                           UErrorCode           *status)  {
      RegularExpression *regexp = (RegularExpression*)regexp2;
@@ -1017,7 +1017,7 @@ uregex_requireEnd(const  URegularExpression   *regexp2,
  //    uregex_setTimeLimit
  //
  //------------------------------------------------------------------------------
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uregex_setTimeLimit(URegularExpression   *regexp2,
                      int32_t               limit,
                      UErrorCode           *status) {
@@ -1034,7 +1034,7 @@ uregex_setTimeLimit(URegularExpression   *regexp2,
  //    uregex_getTimeLimit
  //
  //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uregex_getTimeLimit(const  URegularExpression   *regexp2,
                             UErrorCode           *status) {
      int32_t retVal = 0;
@@ -1052,7 +1052,7 @@ uregex_getTimeLimit(const  URegularExpression   *regexp2,
  //    uregex_setStackLimit
  //
  //------------------------------------------------------------------------------
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uregex_setStackLimit(URegularExpression   *regexp2,
                       int32_t               limit,
                       UErrorCode           *status) {
@@ -1069,7 +1069,7 @@ uregex_setStackLimit(URegularExpression   *regexp2,
  //    uregex_getStackLimit
  //
  //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uregex_getStackLimit(const  URegularExpression   *regexp2,
                              UErrorCode           *status) {
      int32_t retVal = 0;
@@ -1103,7 +1103,7 @@ uregex_setMatchCallback(URegularExpression      *regexp2,
  //    uregex_getMatchCallback
  //
  //------------------------------------------------------------------------------
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uregex_getMatchCallback(const URegularExpression    *regexp2,
                          URegexMatchCallback        **callback,
                          const void                 **context,
@@ -1137,7 +1137,7 @@ uregex_setFindProgressCallback(URegularExpression              *regexp2,
  //    uregex_getMatchCallback
  //
  //------------------------------------------------------------------------------
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uregex_getFindProgressCallback(const URegularExpression          *regexp2,
                                  URegexFindProgressCallback        **callback,
                                  const void                        **context,
@@ -1154,7 +1154,7 @@ uregex_getFindProgressCallback(const URegularExpression          *regexp2,
  //    uregex_replaceAll
  //
  //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uregex_replaceAll(URegularExpression    *regexp2,
                    const UChar           *replacementText,
                    int32_t                replacementLength,
@@ -1187,7 +1187,7 @@ uregex_replaceAll(URegularExpression    *regexp2,
                                          &destBuf, &destCapacity, status);
      }
      len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
-    
+
      if (U_FAILURE(findStatus)) {
          // If anything went wrong with the findNext(), make that error trump
          //   whatever may have happened with the append() operations.
@@ -1204,7 +1204,7 @@ uregex_replaceAll(URegularExpression    *regexp2,
  //    uregex_replaceAllUText
  //
  //------------------------------------------------------------------------------
-U_CAPI UText * U_EXPORT2 
+U_CAPI UText * U_EXPORT2
  uregex_replaceAllUText(URegularExpression    *regexp2,
                         UText                 *replacementText,
                         UText                 *dest,
@@ -1217,18 +1217,18 @@ uregex_replaceAllUText(URegularExpression    *regexp2,
          *status = U_ILLEGAL_ARGUMENT_ERROR;
          return 0;
      }
-    
+
      dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
      return dest;
  }
-    
+
  
  //------------------------------------------------------------------------------
  //
  //    uregex_replaceFirst
  //
  //------------------------------------------------------------------------------
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uregex_replaceFirst(URegularExpression  *regexp2,
                      const UChar         *replacementText,
                      int32_t              replacementLength,
@@ -1251,7 +1251,7 @@ uregex_replaceFirst(URegularExpression  *regexp2,
      uregex_reset(regexp2, 0, status);
      findSucceeded = uregex_find(regexp2, 0, status);
      if (findSucceeded) {
-        len = uregex_appendReplacement(regexp2, replacementText, replacementLength, 
+        len = uregex_appendReplacement(regexp2, replacementText, replacementLength,
                                         &destBuf, &destCapacity, status);
      }
      len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
@@ -1265,7 +1265,7 @@ uregex_replaceFirst(URegularExpression  *regexp2,
  //    uregex_replaceFirstUText
  //
  //------------------------------------------------------------------------------
-U_CAPI UText * U_EXPORT2 
+U_CAPI UText * U_EXPORT2
  uregex_replaceFirstUText(URegularExpression  *regexp2,
                           UText                 *replacementText,
                           UText                 *dest,
@@ -1278,7 +1278,7 @@ uregex_replaceFirstUText(URegularExpression  *regexp2,
          *status = U_ILLEGAL_ARGUMENT_ERROR;
          return 0;
      }
-    
+
      dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
      return dest;
  }
@@ -1308,7 +1308,7 @@ class RegexCImpl {
          UChar                **destBuf,
          int32_t               *destCapacity,
          UErrorCode            *status);
-                  
+
      inline static int32_t split(RegularExpression    *regexp,
          UChar                 *destBuf,
          int32_t                destCapacity,
@@ -1364,7 +1364,7 @@ int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
          return 0;
      }
      if (replacementText == NULL || replacementLength < -1 ||
-        destCapacity == NULL || destBuf == NULL || 
+        destCapacity == NULL || destBuf == NULL ||
          (*destBuf == NULL && *destCapacity > 0) ||
          *destCapacity < 0) {
          *status = U_ILLEGAL_ARGUMENT_ERROR;
@@ -1381,7 +1381,7 @@ int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
      int32_t   capacity         = *destCapacity;
      int32_t   destIdx          =  0;
      int32_t   i;
-    
+
      // If it wasn't supplied by the caller,  get the length of the replacement text.
      //   TODO:  slightly smarter logic in the copy loop could watch for the NUL on
      //          the fly and avoid this step.
@@ -1405,7 +1405,7 @@ int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
          }
          for (i=lastMatchEnd; i<matchStart; i++) {
              appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
-        }        
+        }
      } else {
          UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
          destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
@@ -1420,7 +1420,7 @@ int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
          UChar  c = replacementText[replIdx];
          replIdx++;
          if (c != DOLLARSIGN && c != BACKSLASH) {
-            // Common case, no substitution, no escaping, 
+            // Common case, no substitution, no escaping,
              //  just copy the char to the dest buf.
              appendToBuf(c, &destIdx, dest, capacity);
              continue;
@@ -1439,9 +1439,9 @@ int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
  
              if (c==0x55/*U*/ || c==0x75/*u*/) {
                  // We have a \udddd or \Udddddddd escape sequence.
-                UChar32 escapedChar = 
+                UChar32 escapedChar =
                      u_unescapeAt(uregex_ucstr_unescape_charAt,
-                       &replIdx,                   // Index is updated by unescapeAt 
+                       &replIdx,                   // Index is updated by unescapeAt
                         replacementLength,          // Length of replacement text
                         (void *)replacementText);
  
@@ -1527,7 +1527,7 @@ int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
      } else {
          *status = U_BUFFER_OVERFLOW_ERROR;
      }
-    
+
      //
      // Return an updated dest buffer and capacity to the caller.
      //
@@ -1554,14 +1554,14 @@ int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
  //
  //   appendReplacement   the actual API function,
  //
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uregex_appendReplacement(URegularExpression    *regexp2,
                           const UChar           *replacementText,
                           int32_t                replacementLength,
                           UChar                **destBuf,
                           int32_t               *destCapacity,
                           UErrorCode            *status) {
-    
+
      RegularExpression *regexp = (RegularExpression*)regexp2;
      return RegexCImpl::appendReplacement(
          regexp, replacementText, replacementLength,destBuf, destCapacity, status);
@@ -1570,7 +1570,7 @@ uregex_appendReplacement(URegularExpression    *regexp2,
  //
  //   uregex_appendReplacementUText...can just use the normal C++ method
  //
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uregex_appendReplacementUText(URegularExpression    *regexp2,
                                UText                 *replText,
                                UText                 *dest,
@@ -1603,8 +1603,8 @@ int32_t RegexCImpl::appendTail(RegularExpression    *regexp,
      if (validateRE(regexp, TRUE, status) == FALSE) {
          return 0;
      }
-    
-    if (destCapacity == NULL || destBuf == NULL || 
+
+    if (destCapacity == NULL || destBuf == NULL ||
          (*destBuf == NULL && *destCapacity > 0) ||
          *destCapacity < 0)
      {
@@ -1617,7 +1617,7 @@ int32_t RegexCImpl::appendTail(RegularExpression    *regexp,
      int32_t  destIdx     = 0;
      int32_t  destCap     = *destCapacity;
      UChar    *dest       = *destBuf;
-    
+
      if (regexp->fText != NULL) {
          int32_t srcIdx;
          int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
@@ -1629,7 +1629,7 @@ int32_t RegexCImpl::appendTail(RegularExpression    *regexp,
              UErrorCode status = U_ZERO_ERROR;
              srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status);
          }
-            
+
          for (;;) {
              U_ASSERT(destIdx >= 0);
  
@@ -1655,11 +1655,11 @@ int32_t RegexCImpl::appendTail(RegularExpression    *regexp,
              }
              srcIdx++;
              destIdx++;
-        }            
+        }
      } else {
          int64_t  srcIdx;
          if (m->fMatch) {
-            // The most recent call to find() succeeded.  
+            // The most recent call to find() succeeded.
              srcIdx = m->fMatchEnd;
          } else {
              // The last call to find() on this matcher failed().
@@ -1710,7 +1710,7 @@ int32_t RegexCImpl::appendTail(RegularExpression    *regexp,
  //
  //   appendTail   the actual API function
  //
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uregex_appendTail(URegularExpression    *regexp2,
                    UChar                **destBuf,
                    int32_t               *destCapacity,
@@ -1723,7 +1723,7 @@ uregex_appendTail(URegularExpression    *regexp2,
  //
  //   uregex_appendTailUText...can just use the normal C++ method
  //
-U_CAPI UText * U_EXPORT2 
+U_CAPI UText * U_EXPORT2
  uregex_appendTailUText(URegularExpression    *regexp2,
                         UText                 *dest,
                         UErrorCode            *status)  {
@@ -1815,19 +1815,19 @@ int32_t RegexCImpl::split(RegularExpression     *regexp,
                      i = destFieldsCapacity-1;
                      destIdx = (int32_t)(destFields[i] - destFields[0]);
                  }
-                
+
                  destFields[i] = &destBuf[destIdx];
                  destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
                                               &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
              }
              break;
          }
-        
+
          if (regexp->fMatcher->find()) {
              // We found another delimiter.  Move everything from where we started looking
              //  up until the start of the delimiter into the next output string.
              destFields[i] = &destBuf[destIdx];
-            
+
              destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
                                           &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
              if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
@@ -1836,7 +1836,7 @@ int32_t RegexCImpl::split(RegularExpression     *regexp,
                  *status = tStatus;
              }
              nextOutputStringStart = regexp->fMatcher->fMatchEnd;
-            
+
              // If the delimiter pattern has capturing parentheses, the captured
              //  text goes out into the next n destination strings.
              int32_t groupNum;
@@ -1846,14 +1846,14 @@ int32_t RegexCImpl::split(RegularExpression     *regexp,
                      break;
                  }
                  i++;
-                
+
                  // Set up to extract the capture group contents into the dest buffer.
                  destFields[i] = &destBuf[destIdx];
                  tStatus = U_ZERO_ERROR;
-                int32_t t = uregex_group((URegularExpression*)regexp, 
-                                         groupNum, 
-                                         destFields[i], 
-                                         REMAINING_CAPACITY(destIdx, destCapacity), 
+                int32_t t = uregex_group((URegularExpression*)regexp,
+                                         groupNum,
+                                         destFields[i],
+                                         REMAINING_CAPACITY(destIdx, destCapacity),
                                           &tStatus);
                  destIdx += t + 1;    // Record the space used in the output string buffer.
                                       //  +1 for the NUL that terminates the string.
@@ -1865,7 +1865,7 @@ int32_t RegexCImpl::split(RegularExpression     *regexp,
              }
  
              if (nextOutputStringStart == inputLen) {
-                // The delimiter was at the end of the string. 
+                // The delimiter was at the end of the string.
                  // Output an empty string, and then we are done.
                  if (destIdx < destCapacity) {
                      destBuf[destIdx] = 0;
@@ -1910,7 +1910,7 @@ int32_t RegexCImpl::split(RegularExpression     *regexp,
  //
  //   uregex_split   The actual API function
  //
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uregex_split(URegularExpression      *regexp2,
               UChar                   *destBuf,
               int32_t                  destCapacity,
@@ -1929,15 +1929,15 @@ uregex_split(URegularExpression      *regexp2,
          *status = U_ILLEGAL_ARGUMENT_ERROR;
          return 0;
      }
-    
+
      return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
  }
-    
+
  
  //
  //   uregex_splitUText...can just use the normal C++ method
  //
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uregex_splitUText(URegularExpression    *regexp2,
                    UText                 *destFields[],
                    int32_t                destFieldsCapacity,
diff --git a/icu4c/source/test/cintltst/reapits.c b/icu4c/source/test/cintltst/reapits.c

index ee18abfb9664c2e8a4e821ae300c29f1dfef2dd2..33897f3000878189dd1ab181cc459fc98d692f11 100644 (file)
--- a/icu4c/source/test/cintltst/reapits.c
+++ b/icu4c/source/test/cintltst/reapits.c
@@ -1,6 +1,6 @@
  /********************************************************************
   * COPYRIGHT: 
- * Copyright (c) 2004-2013, International Business Machines Corporation and
+ * Copyright (c) 2004-2014, International Business Machines Corporation and
   * others. All Rights Reserved.
   ********************************************************************/
  /********************************************************************************
@@ -29,11 +29,13 @@
  #include "unicode/utext.h"
  #include "cintltst.h"
  
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+
  #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
-log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
+log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
  
  #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
-log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}}
+log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
  
  /*
   *   TEST_SETUP and TEST_TEARDOWN
@@ -158,6 +160,7 @@ static void TestBug4315(void);
  static void TestUTextAPI(void);
  static void TestRefreshInput(void);
  static void TestBug8421(void);
+static void TestBug10815(void);
  
  void addURegexTest(TestNode** root);
  
@@ -168,6 +171,7 @@ void addURegexTest(TestNode** root)
      addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
      addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
      addTest(root, &TestBug8421,   "regex/TestBug8421");
+    addTest(root, &TestBug10815,   "regex/TestBug10815");
  }
  
  /*
@@ -204,7 +208,7 @@ static void TestRegexCAPI(void) {
      memset(&minus1, -1, sizeof(minus1));
  
      /* Mimimalist open/close */
-    u_uastrncpy(pat, "abc*", sizeof(pat)/2);
+    u_uastrncpy(pat, "abc*", LENGTHOF(pat));
      re = uregex_open(pat, -1, 0, 0, &status);
      if (U_FAILURE(status)) {
           log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
@@ -256,7 +260,7 @@ static void TestRegexCAPI(void) {
  
          /* The TEST_ASSERT_SUCCESS above should change too... */
          if(U_SUCCESS(status)) {
-            u_uastrncpy(pat, "abc*", sizeof(pat)/2);
+            u_uastrncpy(pat, "abc*", LENGTHOF(pat));
              TEST_ASSERT(u_strcmp(pat, p) == 0);
              TEST_ASSERT(len==(int32_t)strlen("abc*"));
          }
@@ -296,8 +300,8 @@ static void TestRegexCAPI(void) {
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT(clone3 != NULL);
  
-        u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
-        u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
+        u_uastrncpy(testString1, "abcccd", LENGTHOF(pat));
+        u_uastrncpy(testString2, "xxxabcccd", LENGTHOF(pat));
  
          status = U_ZERO_ERROR;
          uregex_setText(clone1, testString1, -1, &status);
@@ -328,7 +332,7 @@ static void TestRegexCAPI(void) {
      {
          const UChar  *resultPat;
          int32_t       resultLen;
-        u_uastrncpy(pat, "hello", sizeof(pat)/2);
+        u_uastrncpy(pat, "hello", LENGTHOF(pat));
          status = U_ZERO_ERROR;
          re = uregex_open(pat, -1, 0, NULL, &status);
          resultPat = uregex_pattern(re, &resultLen, &status);
@@ -394,10 +398,10 @@ static void TestRegexCAPI(void) {
          UChar  text2[50];
          UBool  result;
  
-        u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
-        u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
+        u_uastrncpy(text1, "abcccd",  LENGTHOF(text1));
+        u_uastrncpy(text2, "abcccxd", LENGTHOF(text2));
          status = U_ZERO_ERROR;
-        u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
+        u_uastrncpy(pat, "abc*d", LENGTHOF(pat));
          re = uregex_open(pat, -1, 0, NULL, &status);
          TEST_ASSERT_SUCCESS(status);
  
@@ -449,10 +453,10 @@ static void TestRegexCAPI(void) {
          const UChar   *result;
          int32_t  textLength;
  
-        u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
-        u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
+        u_uastrncpy(text1, "abcccd",  LENGTHOF(text1));
+        u_uastrncpy(text2, "abcccxd", LENGTHOF(text2));
          status = U_ZERO_ERROR;
-        u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
+        u_uastrncpy(pat, "abc*d", LENGTHOF(pat));
          re = uregex_open(pat, -1, 0, NULL, &status);
  
          uregex_setText(re, text1, -1, &status);
@@ -486,9 +490,9 @@ static void TestRegexCAPI(void) {
          int     len;
          UChar   nullString[] = {0,0,0};
  
-        u_uastrncpy(text1, "abcccde",  sizeof(text1)/2);
+        u_uastrncpy(text1, "abcccde",  LENGTHOF(text1));
          status = U_ZERO_ERROR;
-        u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
+        u_uastrncpy(pat, "abc*d", LENGTHOF(pat));
          re = uregex_open(pat, -1, 0, NULL, &status);
  
          uregex_setText(re, text1, -1, &status);
@@ -538,7 +542,7 @@ static void TestRegexCAPI(void) {
      {
          UChar    text1[50];
          UBool    result;
-        u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
+        u_uastrncpy(text1, "012rx5rx890rxrx...",  LENGTHOF(text1));
          status = U_ZERO_ERROR;
          re = uregex_openC("rx", 0, NULL, &status);
  
@@ -621,7 +625,7 @@ static void TestRegexCAPI(void) {
          UChar    buf[80];
          UBool    result;
          int32_t  resultSz;
-        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
+        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  LENGTHOF(text1));
  
          status = U_ZERO_ERROR;
          re = uregex_openC("abc(.*?)def", 0, NULL, &status);
@@ -634,21 +638,21 @@ static void TestRegexCAPI(void) {
  
          /*  Capture Group 0, the full match.  Should succeed.  */
          status = U_ZERO_ERROR;
-        resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status);
+        resultSz = uregex_group(re, 0, buf, LENGTHOF(buf), &status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT_STRING("abc interior def", buf, TRUE);
          TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
  
          /*  Capture group #1.  Should succeed. */
          status = U_ZERO_ERROR;
-        resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status);
+        resultSz = uregex_group(re, 1, buf, LENGTHOF(buf), &status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT_STRING(" interior ", buf, TRUE);
          TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
  
          /*  Capture group out of range.  Error. */
          status = U_ZERO_ERROR;
-        uregex_group(re, 2, buf, sizeof(buf)/2, &status);
+        uregex_group(re, 2, buf, LENGTHOF(buf), &status);
          TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
  
          /* NULL buffer, pure pre-flight */
@@ -692,7 +696,7 @@ static void TestRegexCAPI(void) {
          TEST_ASSERT(uregex_regionStart(re, &status) == 3);
          TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
          TEST_ASSERT(uregex_findNext(re, &status));
-        TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3)
+        TEST_ASSERT(uregex_group(re, 0, resultString, LENGTHOF(resultString), &status) == 3)
          TEST_ASSERT_STRING("345", resultString, TRUE);
          TEST_TEARDOWN;
          
@@ -816,9 +820,9 @@ static void TestRegexCAPI(void) {
          UChar    replText[80];
          UChar    buf[80];
          int32_t  resultSz;
-        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
-        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
-        u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
+        u_uastrncpy(text1, "Replace xaax x1x x...x.",  LENGTHOF(text1));
+        u_uastrncpy(text2, "No match here.",  LENGTHOF(text2));
+        u_uastrncpy(replText, "<$1>", LENGTHOF(replText));
  
          status = U_ZERO_ERROR;
          re = uregex_openC("x(.*?)x", 0, NULL, &status);
@@ -826,7 +830,7 @@ static void TestRegexCAPI(void) {
  
          /*  Normal case, with match */
          uregex_setText(re, text1, -1, &status);
-        resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
+        resultSz = uregex_replaceFirst(re, replText, -1, buf, LENGTHOF(buf), &status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
          TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
@@ -834,7 +838,7 @@ static void TestRegexCAPI(void) {
          /* No match.  Text should copy to output with no changes.  */
          status = U_ZERO_ERROR;
          uregex_setText(re, text2, -1, &status);
-        resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
+        resultSz = uregex_replaceFirst(re, replText, -1, buf, LENGTHOF(buf), &status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT_STRING("No match here.", buf, TRUE);
          TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
@@ -896,10 +900,10 @@ static void TestRegexCAPI(void) {
          int32_t  expectedResultSize2;
          int32_t  i;
  
-        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
-        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
-        u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
-        u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2);
+        u_uastrncpy(text1, "Replace xaax x1x x...x.",  LENGTHOF(text1));
+        u_uastrncpy(text2, "No match here.",  LENGTHOF(text2));
+        u_uastrncpy(replText, "<$1>", LENGTHOF(replText));
+        u_uastrncpy(replText2, "<<$1>>", LENGTHOF(replText2));
          expectedResultSize = strlen(expectedResult);
          expectedResultSize2 = strlen(expectedResult2);
  
@@ -909,7 +913,7 @@ static void TestRegexCAPI(void) {
  
          /*  Normal case, with match */
          uregex_setText(re, text1, -1, &status);
-        resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
+        resultSize = uregex_replaceAll(re, replText, -1, buf, LENGTHOF(buf), &status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT_STRING(expectedResult, buf, TRUE);
          TEST_ASSERT(resultSize == expectedResultSize);
@@ -917,7 +921,7 @@ static void TestRegexCAPI(void) {
          /* No match.  Text should copy to output with no changes.  */
          status = U_ZERO_ERROR;
          uregex_setText(re, text2, -1, &status);
-        resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
+        resultSize = uregex_replaceAll(re, replText, -1, buf, LENGTHOF(buf), &status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT_STRING("No match here.", buf, TRUE);
          TEST_ASSERT(resultSize == u_strlen(text2));
@@ -1001,15 +1005,15 @@ static void TestRegexCAPI(void) {
          re = uregex_openC(".*", 0, 0, &status);
          TEST_ASSERT_SUCCESS(status);
  
-        u_uastrncpy(text, "whatever",  sizeof(text)/2);
-        u_uastrncpy(repl, "some other", sizeof(repl)/2);
+        u_uastrncpy(text, "whatever",  LENGTHOF(text));
+        u_uastrncpy(repl, "some other", LENGTHOF(repl));
          uregex_setText(re, text, -1, &status);
  
          /* match covers whole target string */
          uregex_find(re, 0, &status);
          TEST_ASSERT_SUCCESS(status);
          bufPtr = buf;
-        bufCap = sizeof(buf) / 2;
+        bufCap = LENGTHOF(buf);
          uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT_STRING("some other", buf, TRUE);
@@ -1018,8 +1022,8 @@ static void TestRegexCAPI(void) {
          uregex_find(re, 0, &status);
          TEST_ASSERT_SUCCESS(status);
          bufPtr = buf;
-        bufCap = sizeof(buf) / 2;
-        u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
+        bufCap = LENGTHOF(buf);
+        u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", LENGTHOF(repl));
          uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); 
@@ -1054,8 +1058,8 @@ static void TestRegexCAPI(void) {
          int32_t  spaceNeeded;
          int32_t  sz;
  
-        u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
-        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
+        u_uastrncpy(textToSplit, "first : second:  third",  LENGTHOF(textToSplit));
+        u_uastrncpy(text2, "No match here.",  LENGTHOF(text2));
  
          status = U_ZERO_ERROR;
          re = uregex_openC(":", 0, NULL, &status);
@@ -1070,7 +1074,7 @@ static void TestRegexCAPI(void) {
          if (U_SUCCESS(status)) {
              memset(fields, -1, sizeof(fields));
              numFields = 
-                uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
+                uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
              TEST_ASSERT_SUCCESS(status);
  
              /* The TEST_ASSERT_SUCCESS call above should change too... */
@@ -1102,7 +1106,7 @@ static void TestRegexCAPI(void) {
          if(U_SUCCESS(status)) {
              memset(fields, -1, sizeof(fields));
              numFields = 
-                uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
+                uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
              TEST_ASSERT_SUCCESS(status);
  
              /* The TEST_ASSERT_SUCCESS call above should change too... */
@@ -1160,7 +1164,7 @@ static void TestRegexCAPI(void) {
          int32_t  spaceNeeded;
          int32_t  sz;
  
-        u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
+        u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  LENGTHOF(textToSplit));
  
          status = U_ZERO_ERROR;
          re = uregex_openC("<(.*?)>", 0, NULL, &status);
@@ -1172,7 +1176,7 @@ static void TestRegexCAPI(void) {
          if(U_SUCCESS(status)) {
              memset(fields, -1, sizeof(fields));
              numFields = 
-                uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
+                uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
              TEST_ASSERT_SUCCESS(status);
  
              /* The TEST_ASSERT_SUCCESS call above should change too... */
@@ -1193,7 +1197,7 @@ static void TestRegexCAPI(void) {
          status = U_ZERO_ERROR;
          memset(fields, -1, sizeof(fields));
          numFields = 
-            uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
+            uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
          TEST_ASSERT_SUCCESS(status);
  
          /* The TEST_ASSERT_SUCCESS call above should change too... */
@@ -1211,7 +1215,7 @@ static void TestRegexCAPI(void) {
          status = U_ZERO_ERROR;
          memset(fields, -1, sizeof(fields));
          numFields = 
-            uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status);
+            uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
          TEST_ASSERT_SUCCESS(status);
  
          /* The TEST_ASSERT_SUCCESS call above should change too... */
@@ -1230,7 +1234,7 @@ static void TestRegexCAPI(void) {
          status = U_ZERO_ERROR;
          memset(fields, -1, sizeof(fields));
          numFields = 
-            uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status);
+            uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
          TEST_ASSERT_SUCCESS(status);
  
          /* The TEST_ASSERT_SUCCESS call above should change too... */
@@ -1257,7 +1261,7 @@ static void TestRegexCAPI(void) {
          if(U_SUCCESS(status)) {
              memset(fields, -1, sizeof(fields));
              numFields = 
-                uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status);
+                uregex_split(re, buf, LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
              TEST_ASSERT_SUCCESS(status);
  
              /* The TEST_ASSERT_SUCCESS call above should change too... */
@@ -1464,8 +1468,8 @@ static void TestUTextAPI(void) {
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT(clone3 != NULL);
  
-        u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
-        u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
+        u_uastrncpy(testString1, "abcccd", LENGTHOF(pat));
+        u_uastrncpy(testString2, "xxxabcccd", LENGTHOF(pat));
  
          status = U_ZERO_ERROR;
          uregex_setText(clone1, testString1, -1, &status);
@@ -1499,7 +1503,7 @@ static void TestUTextAPI(void) {
          UText        *resultText;
          const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
          const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
-        u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */
+        u_uastrncpy(pat, "hello", LENGTHOF(pat)); /* for comparison */
          status = U_ZERO_ERROR;
          
          utext_openUTF8(&patternText, str_hello, -1, &status);
@@ -1602,7 +1606,7 @@ static void TestUTextAPI(void) {
  
          status = U_ZERO_ERROR;
          utext_openUTF8(&text1, str_abcccd, -1, &status);
-        u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2);
+        u_uastrncpy(text2Chars, str_abcccxd, LENGTHOF(text2Chars));
          utext_openUChars(&text2, text2Chars, -1, &status);
          
          utext_openUTF8(&patternText, str_abcd, -1, &status);
@@ -1698,7 +1702,7 @@ static void TestUTextAPI(void) {
      {
          UChar    text1[50];
          UBool    result;
-        u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
+        u_uastrncpy(text1, "012rx5rx890rxrx...",  LENGTHOF(text1));
          status = U_ZERO_ERROR;
          re = uregex_openC("rx", 0, NULL, &status);
  
@@ -1762,7 +1766,7 @@ static void TestUTextAPI(void) {
          const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */
          
  
-        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
+        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  LENGTHOF(text1));
  
          status = U_ZERO_ERROR;
          re = uregex_openC("abc(.*?)def", 0, NULL, &status);
@@ -1840,8 +1844,8 @@ static void TestUTextAPI(void) {
          const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
          const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
          status = U_ZERO_ERROR;
-        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
-        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
+        u_uastrncpy(text1, "Replace xaax x1x x...x.",  LENGTHOF(text1));
+        u_uastrncpy(text2, "No match here.",  LENGTHOF(text2));
          utext_openUTF8(&replText, str_1x, -1, &status);
  
          re = uregex_openC("x(.*?)x", 0, NULL, &status);
@@ -1886,8 +1890,8 @@ static void TestUTextAPI(void) {
          const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
          const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
          status = U_ZERO_ERROR;
-        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
-        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
+        u_uastrncpy(text1, "Replace xaax x1x x...x.",  LENGTHOF(text1));
+        u_uastrncpy(text2, "No match here.",  LENGTHOF(text2));
          utext_openUTF8(&replText, str_1, -1, &status);
  
          re = uregex_openC("x(.*?)x", 0, NULL, &status);
@@ -1926,15 +1930,15 @@ static void TestUTextAPI(void) {
          re = uregex_openC(".*", 0, 0, &status);
          TEST_ASSERT_SUCCESS(status);
  
-        u_uastrncpy(text, "whatever",  sizeof(text)/2);
-        u_uastrncpy(repl, "some other", sizeof(repl)/2);
+        u_uastrncpy(text, "whatever",  LENGTHOF(text));
+        u_uastrncpy(repl, "some other", LENGTHOF(repl));
          uregex_setText(re, text, -1, &status);
  
          /* match covers whole target string */
          uregex_find(re, 0, &status);
          TEST_ASSERT_SUCCESS(status);
          bufPtr = buf;
-        bufCap = sizeof(buf) / 2;
+        bufCap = LENGTHOF(buf);
          uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT_STRING("some other", buf, TRUE);
@@ -1943,8 +1947,8 @@ static void TestUTextAPI(void) {
          uregex_find(re, 0, &status);
          TEST_ASSERT_SUCCESS(status);
          bufPtr = buf;
-        bufCap = sizeof(buf) / 2;
-        u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
+        bufCap = LENGTHOF(buf);
+        u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", LENGTHOF(repl));
          uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
          TEST_ASSERT_SUCCESS(status);
          TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); 
@@ -1967,8 +1971,8 @@ static void TestUTextAPI(void) {
          int32_t  numFields;
          int32_t i;
  
-        u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
-        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
+        u_uastrncpy(textToSplit, "first : second:  third",  LENGTHOF(textToSplit));
+        u_uastrncpy(text2, "No match here.",  LENGTHOF(text2));
  
          status = U_ZERO_ERROR;
          re = uregex_openC(":", 0, NULL, &status);
@@ -2043,7 +2047,7 @@ static void TestUTextAPI(void) {
          int32_t  numFields;
          int32_t i;
  
-        u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
+        u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  LENGTHOF(textToSplit));
  
          status = U_ZERO_ERROR;
          re = uregex_openC("<(.*?)>", 0, NULL, &status);
@@ -2266,5 +2270,60 @@ static void TestBug8421(void) {
      uregex_close(re);
  }
  
+static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
+    return FALSE;
+}
+
+static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
+    return FALSE;
+}
+
+static void TestBug10815() {
+  /* Bug 10815:   uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER 
+   *              when the callback function specified by uregex_setMatchCallback() returns FALSE
+   */
+    URegularExpression *re;
+    UErrorCode status = U_ZERO_ERROR;
+    UChar    text[100];
+
+
+    // findNext() with a find progress callback function.
+
+    re = uregex_openC(".z", 0, 0, &status);
+    TEST_ASSERT_SUCCESS(status);
+
+    u_uastrncpy(text, "Hello, World.",  LENGTHOF(text));
+    uregex_setText(re, text, -1, &status);
+    TEST_ASSERT_SUCCESS(status);
+
+    uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
+    TEST_ASSERT_SUCCESS(status);
+
+    uregex_findNext(re, &status);
+    TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
+
+    uregex_close(re);
+
+    // findNext() with a match progress callback function.
+
+    status = U_ZERO_ERROR;
+    re = uregex_openC("((xxx)*)*y", 0, 0, &status);
+    TEST_ASSERT_SUCCESS(status);
+
+    // Pattern + this text gives an exponential time match. Without the callback to stop the match,
+    // it will appear to be stuck in a (near) infinite loop.
+    u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",  LENGTHOF(text));
+    uregex_setText(re, text, -1, &status);
+    TEST_ASSERT_SUCCESS(status);
+
+    uregex_setMatchCallback(re, MatchCallback, NULL, &status);
+    TEST_ASSERT_SUCCESS(status);
+
+    uregex_findNext(re, &status);
+    TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
+
+    uregex_close(re);
+}
+
      
  #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */
diff --git a/icu4c/source/test/intltest/regextst.cpp b/icu4c/source/test/intltest/regextst.cpp

index bc7e3afed2741623ddb0e67118d5e3e15f6cfcd2..00c4763e874aee33afa2abd52482a32ac977983b 100644 (file)
--- a/icu4c/source/test/intltest/regextst.cpp
+++ b/icu4c/source/test/intltest/regextst.cpp
@@ -2035,7 +2035,7 @@ void RegexTest::API_Match_UTF8() {
          utext_openUnicodeString(&destText, &dest, &status);
          UText *result;
          //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */
-        //     Test shallow-clone API
+        //  Test shallow-clone API
          int64_t   group_len;
          result = matcher->group((UText *)NULL, group_len, status);
          REGEX_CHECK_STATUS;
@@ -4826,6 +4826,9 @@ struct progressCallBackContext {
      void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;};
  };
  
+// call-back function for find().
+// Return TRUE to continue the find().
+// Return FALSE to stop the find().
  U_CDECL_BEGIN
  static UBool U_CALLCONV
  testProgressCallBackFn(const void *context, int64_t matchIndex) {
@@ -4861,7 +4864,7 @@ void RegexTest::FindProgressCallbacks() {
          const void                  *returnedContext;
          URegexFindProgressCallback  *returnedFn;
          UErrorCode status = U_ZERO_ERROR;
-        RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status);  // A pattern that can run long.
+        RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)\\2)x"), 0, status);
          REGEX_CHECK_STATUS;
          matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status);
          REGEX_CHECK_STATUS;
@@ -4870,10 +4873,10 @@ void RegexTest::FindProgressCallbacks() {
          REGEX_ASSERT(returnedFn == testProgressCallBackFn);
          REGEX_ASSERT(returnedContext == &cbInfo);
  
-        // A short-running match should NOT invoke the callback.
+        // A find that matches on the initial position does NOT invoke the callback.
          status = U_ZERO_ERROR;
          cbInfo.reset(100);
-        UnicodeString s = "abxxx";
+        UnicodeString s = "aaxxx";
          matcher.reset(s);
  #if 0
          matcher.setTrace(TRUE);
@@ -4882,7 +4885,8 @@ void RegexTest::FindProgressCallbacks() {
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(cbInfo.numCalls == 0);
  
-        // A medium running match that causes matcher.find() to invoke our callback for each index.
+        // A medium running find() that causes matcher.find() to invoke our callback for each index,
+        //   but not so many times that we interrupt the operation.
          status = U_ZERO_ERROR;
          s = "aaaaaaaaaaaaaaaaaaab";
          cbInfo.reset(s.length()); //  Some upper limit for number of calls that is greater than size of our input string
@@ -4897,22 +4901,21 @@ void RegexTest::FindProgressCallbacks() {
          cbInfo.reset(s1.length() - 5); //  Bail early somewhere near the end of input string
          matcher.reset(s1);
          REGEX_ASSERT(matcher.find(0, status)==FALSE);
-        REGEX_CHECK_STATUS;
+        REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
          REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5);
  
-#if 0
          // Now a match that will succeed, but after an interruption
          status = U_ZERO_ERROR;
          UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx";
          cbInfo.reset(s2.length() - 10); //  Bail early somewhere near the end of input string
          matcher.reset(s2);
          REGEX_ASSERT(matcher.find(0, status)==FALSE);
-        REGEX_CHECK_STATUS;
+        REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
          // Now retry the match from where left off
          cbInfo.maxCalls = 100; //  No callback limit
+        status = U_ZERO_ERROR;
          REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status));
          REGEX_CHECK_STATUS;
-#endif
      }
  
  
@@ -5317,7 +5320,7 @@ void RegexTest::TestBug11049() {
      TestCase11049("A|B|C", "a string \\ud800\\udc00", FALSE, __LINE__);
      TestCase11049("A|B|C", "string matches at end C", TRUE, __LINE__);
  
-    // Test again with a pattern starting with a single character, 
+    // Test again with a pattern starting with a single character,
      // which takes a different code path than starting with an OR expression,
      // but with similar logic.
      TestCase11049("C", "a string \\ud800\\udc00", FALSE, __LINE__);
author	Andy Heninger <andy.heninger@gmail.com>
	Thu, 28 Aug 2014 01:19:29 +0000 (01:19 +0000)
committer	Andy Heninger <andy.heninger@gmail.com>
	Thu, 28 Aug 2014 01:19:29 +0000 (01:19 +0000)
icu4c/source/i18n/rematch.cpp		patch \| blob \| history
icu4c/source/i18n/unicode/regex.h		patch \| blob \| history
icu4c/source/i18n/uregex.cpp		patch \| blob \| history
icu4c/source/test/cintltst/reapits.c		patch \| blob \| history
icu4c/source/test/intltest/regextst.cpp		patch \| blob \| history