ICU-10463 Regular Expressions, rework debug conditionals to fix build failures on...

author Andy Heninger <andy.heninger@gmail.com>

Mon, 14 Oct 2013 22:11:21 +0000 (22:11 +0000)

committer Andy Heninger <andy.heninger@gmail.com>

Mon, 14 Oct 2013 22:11:21 +0000 (22:11 +0000)
author Andy Heninger <andy.heninger@gmail.com>
Mon, 14 Oct 2013 22:11:21 +0000 (22:11 +0000)
committer Andy Heninger <andy.heninger@gmail.com>
Mon, 14 Oct 2013 22:11:21 +0000 (22:11 +0000)
diff --git a/icu4c/source/i18n/regexcmp.cpp b/icu4c/source/i18n/regexcmp.cpp

index 0ec61543295dd9976d737e290c57d38650bd306a..52f132b55f2b22dfa482f2ac71ef2abecdc6d3f3 100644 (file)
--- a/icu4c/source/i18n/regexcmp.cpp
+++ b/icu4c/source/i18n/regexcmp.cpp
@@ -109,7 +109,7 @@ void    RegexCompile::compile(
      fRXPat->fPatternString = new UnicodeString(pat);
      UText patternText = UTEXT_INITIALIZER;
      utext_openConstUnicodeString(&patternText, fRXPat->fPatternString, &e);
-    
+
      if (U_SUCCESS(e)) {
          compile(&patternText, pp, e);
          utext_close(&patternText);
@@ -568,13 +568,13 @@ UBool RegexCompile::doParseActions(int32_t action)
  
              op = URX_BUILD(URX_JMP, fRXPat->fCompiledPat->size()+ 3);
              fRXPat->fCompiledPat->addElement(op, *fStatus);
-            
+
              op = URX_BUILD(URX_LA_END, dataLoc);
              fRXPat->fCompiledPat->addElement(op, *fStatus);
  
              op = URX_BUILD(URX_BACKTRACK, 0);
              fRXPat->fCompiledPat->addElement(op, *fStatus);
-            
+
              op = URX_BUILD(URX_NOP, 0);
              fRXPat->fCompiledPat->addElement(op, *fStatus);
              fRXPat->fCompiledPat->addElement(op, *fStatus);
@@ -1147,7 +1147,7 @@ UBool RegexCompile::doParseActions(int32_t action)
              } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) {
                  op = URX_CARET_M;
              } else if ((fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
-                op = URX_CARET;   // Only testing true start of input. 
+                op = URX_CARET;   // Only testing true start of input.
              } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
                  op = URX_CARET_M_UNIX;
              }
@@ -1281,7 +1281,7 @@ UBool RegexCompile::doParseActions(int32_t action)
              literalChar(c);
          }
          break;
-        
+
  
      case doBackRef:
          // BackReference.  Somewhat unusual in that the front-end can not completely parse
@@ -1643,7 +1643,7 @@ UBool RegexCompile::doParseActions(int32_t action)
          compileSet(theSet);
          break;
          }
-        
+
      case doSetIntersection2:
          // Have scanned something like [abc&&
          setPushOp(setIntersection2);
@@ -1654,7 +1654,7 @@ UBool RegexCompile::doParseActions(int32_t action)
          //    This operation is the highest precedence set operation, so we can always do
          //    it immediately, without waiting to see what follows.  It is necessary to perform
          //    any pending '-' or '&' operation first, because these have the same precedence
-        //    as union-ing in a literal' 
+        //    as union-ing in a literal'
          {
              setEval(setUnion);
              UnicodeSet *s = (UnicodeSet *)fSetStack.peek();
@@ -1749,7 +1749,7 @@ UBool RegexCompile::doParseActions(int32_t action)
              }  // else error.  scanProp() reported the error status already.
          }
          break;
-        
+
      case doSetProp:
          //  Scanned a \p \P within [brackets].
          {
@@ -1771,7 +1771,7 @@ UBool RegexCompile::doParseActions(int32_t action)
          //        and ICU UnicodeSet behavior.
          {
          if (fLastSetLiteral > fC.fChar) {
-            error(U_REGEX_INVALID_RANGE);  
+            error(U_REGEX_INVALID_RANGE);
          }
          UnicodeSet *s = (UnicodeSet *)fSetStack.peek();
          s->add(fLastSetLiteral, fC.fChar);
@@ -1830,7 +1830,7 @@ void    RegexCompile::fixLiterals(UBool split) {
      int32_t indexOfLastCodePoint = fLiteralChars.moveIndex32(fLiteralChars.length(), -1);
      UChar32 lastCodePoint = fLiteralChars.char32At(indexOfLastCodePoint);
  
-    // Split:  We need to  ensure that the last item in the compiled pattern 
+    // Split:  We need to  ensure that the last item in the compiled pattern
      //     refers only to the last literal scanned in the pattern, so that
      //     quantifiers (*, +, etc.) affect only it, and not a longer string.
      //     Split before case folding for case insensitive matches.
@@ -1856,7 +1856,7 @@ void    RegexCompile::fixLiterals(UBool split) {
  
      if (indexOfLastCodePoint == 0) {
          // Single character, emit a URX_ONECHAR op to match it.
-        if ((fModeFlags & UREGEX_CASE_INSENSITIVE) && 
+        if ((fModeFlags & UREGEX_CASE_INSENSITIVE) &&
                   u_hasBinaryProperty(lastCodePoint, UCHAR_CASE_SENSITIVE)) {
              op = URX_BUILD(URX_ONECHAR_I, lastCodePoint);
          } else {
@@ -1875,7 +1875,7 @@ void    RegexCompile::fixLiterals(UBool split) {
          fRXPat->fCompiledPat->addElement(op, *fStatus);
          op = URX_BUILD(URX_STRING_LEN, fLiteralChars.length());
          fRXPat->fCompiledPat->addElement(op, *fStatus);
-        
+
          // Add this string into the accumulated strings of the compiled pattern.
          fRXPat->fLiteralText.append(fLiteralChars);
      }
@@ -2449,7 +2449,7 @@ void   RegexCompile::matchStartType() {
          case URX_STO_INP_LOC:
          case URX_BACKREF:         // BackRef.  Must assume that it might be a zero length match
          case URX_BACKREF_I:
-                
+
          case URX_STO_SP:          // Setup for atomic or possessive blocks.  Doesn't change what can match.
          case URX_LD_SP:
              break;
@@ -2762,7 +2762,7 @@ void   RegexCompile::matchStartType() {
              {
                  // Look-around.  Scan forward until the matching look-ahead end,
                  //   without processing the look-around block.  This is overly pessimistic.
-                
+
                  // Keep track of the nesting depth of look-around blocks.  Boilerplate code for
                  //   lookahead contains two LA_END instructions, so count goes up by two
                  //   for each LA_START.
@@ -3322,7 +3322,7 @@ int32_t   RegexCompile::maxMatchLength(int32_t start, int32_t end) {
              //        compiled (folded) string.  Folding may add code points, but
              //        not remove them.
              //
-            //        There is a potential problem if a supplemental code point 
+            //        There is a potential problem if a supplemental code point
              //        case-folds to a BMP code point.  In this case our compiled string
              //        could be shorter (in code units) than a matching user string.
              //
@@ -3353,7 +3353,7 @@ int32_t   RegexCompile::maxMatchLength(int32_t start, int32_t end) {
                      loc = loopEndLoc;
                      break;
                  }
-                
+
                  int32_t maxLoopCount = fRXPat->fCompiledPat->elementAti(loc+3);
                  if (maxLoopCount == -1) {
                      // Unbounded Loop. No upper bound on match length.
@@ -3471,7 +3471,7 @@ void RegexCompile::stripNOPs() {
              d++;
          }
      }
-    
+
      UnicodeString caseStringBuffer;
  
      // Make a second pass over the code, removing the NOPs by moving following
@@ -3518,7 +3518,7 @@ void RegexCompile::stripNOPs() {
                  op    = URX_BUILD(opType, where);
                  fRXPat->fCompiledPat->setElementAt(op, dst);
                  dst++;
-                
+
                  fRXPat->fNeedsAltInput = TRUE;
                  break;
              }
@@ -3609,7 +3609,7 @@ void RegexCompile::error(UErrorCode e) {
              fParseErr->line   = (int32_t)fLineNum;
              fParseErr->offset = (int32_t)fCharNum;
          }
-        
+
          UErrorCode status = U_ZERO_ERROR; // throwaway status for extracting context
  
          // Fill in the context.
@@ -3663,7 +3663,7 @@ UChar32  RegexCompile::nextCharLL() {
          fPeekChar = -1;
          return ch;
      }
-    
+
      // assume we're already in the right place
      ch = UTEXT_NEXT32(fRXPat->fPattern);
      if (ch == U_SENTINEL) {
@@ -3719,7 +3719,7 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
  
      if (fQuoteMode) {
          c.fQuoted = TRUE;
-        if ((c.fChar==chBackSlash && peekCharLL()==chE && ((fModeFlags & UREGEX_LITERAL) == 0)) || 
+        if ((c.fChar==chBackSlash && peekCharLL()==chE && ((fModeFlags & UREGEX_LITERAL) == 0)) ||
              c.fChar == (UChar32)-1) {
              fQuoteMode = FALSE;  //  Exit quote mode,
              nextCharLL();        // discard the E
@@ -3780,11 +3780,11 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
                  //
                  nextCharLL();                 // get & discard the peeked char.
                  c.fQuoted = TRUE;
-                
+
                  if (UTEXT_FULL_TEXT_IN_CHUNK(fRXPat->fPattern, fPatternLength)) {
                      int32_t endIndex = (int32_t)pos;
                      c.fChar = u_unescapeAt(uregex_ucstr_unescape_charAt, &endIndex, (int32_t)fPatternLength, (void *)fRXPat->fPattern->chunkContents);
-                    
+
                      if (endIndex == pos) {
                          error(U_REGEX_BAD_ESCAPE_SEQUENCE);
                      }
@@ -3793,7 +3793,7 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
                  } else {
                      int32_t offset = 0;
                      struct URegexUTextUnescapeCharContext context = U_REGEX_UTEXT_UNESCAPE_CONTEXT(fRXPat->fPattern);
-                    
+
                      UTEXT_SETNATIVEINDEX(fRXPat->fPattern, pos);
                      c.fChar = u_unescapeAt(uregex_utext_unescape_charAt, &offset, INT32_MAX, &context);
  
@@ -3836,8 +3836,8 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
                          c.fChar >>= 3;
                      }
                  }
-                c.fQuoted = TRUE; 
-            } 
+                c.fQuoted = TRUE;
+            }
              else if (peekCharLL() == chQ) {
                  //  "\Q"  enter quote mode, which will continue until "\E"
                  fQuoteMode = TRUE;
@@ -3885,7 +3885,7 @@ UChar32  RegexCompile::scanNamedChar() {
          error(U_REGEX_PROPERTY_SYNTAX);
          return 0;
      }
-    
+
      UnicodeString  charName;
      for (;;) {
          nextChar(fC);
@@ -3898,7 +3898,7 @@ UChar32  RegexCompile::scanNamedChar() {
          }
          charName.append(fC.fChar);
      }
-    
+
      char name[100];
      if (!uprv_isInvariantUString(charName.getBuffer(), charName.length()) ||
           (uint32_t)charName.length()>=sizeof(name)) {
@@ -4006,7 +4006,7 @@ UnicodeSet *RegexCompile::scanPosixProp() {
  
      // Scan for a closing ].   A little tricky because there are some perverse
      //   edge cases possible.  "[:abc\Qdef:] \E]"  is a valid non-property expression,
-    //   ending on the second closing ]. 
+    //   ending on the second closing ].
  
      UnicodeString propName;
      UBool         negated  = FALSE;
@@ -4017,7 +4017,7 @@ UnicodeSet *RegexCompile::scanPosixProp() {
         negated = TRUE;
         nextChar(fC);
      }
-    
+
      // Scan for the closing ":]", collecting the property name along the way.
      UBool  sawPropSetTerminator = FALSE;
      for (;;) {
@@ -4035,7 +4035,7 @@ UnicodeSet *RegexCompile::scanPosixProp() {
              break;
          }
      }
-    
+
      if (sawPropSetTerminator) {
          uset = createSetForProperty(propName, negated);
      }
@@ -4068,7 +4068,7 @@ static inline void addIdentifierIgnorable(UnicodeSet *set, UErrorCode& ec) {
  //  Create a Unicode Set from a Unicode Property expression.
  //     This is common code underlying both \p{...} ane [:...:] expressions.
  //     Includes trying the Java "properties" that aren't supported as
-//     normal ICU UnicodeSet properties 
+//     normal ICU UnicodeSet properties
  //
  static const UChar posSetPrefix[] = {0x5b, 0x5c, 0x70, 0x7b, 0}; // "[\p{"
  static const UChar negSetPrefix[] = {0x5b, 0x5c, 0x50, 0x7b, 0}; // "[\P{"
@@ -4076,7 +4076,7 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB
      UnicodeString   setExpr;
      UnicodeSet      *set;
      uint32_t        usetFlags = 0;
-    
+
      if (U_FAILURE(*fStatus)) {
          return NULL;
      }
@@ -4101,13 +4101,13 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB
      }
      delete set;
      set = NULL;
-    
+
      //
      //  The property as it was didn't work.
  
-    //  Do [:word:]. It is not recognized as a property by UnicodeSet.  "word" not standard POSIX 
+    //  Do [:word:]. It is not recognized as a property by UnicodeSet.  "word" not standard POSIX
      //     or standard Java, but many other regular expression packages do recognize it.
-    
+
      if (propName.caseCompare(UNICODE_STRING_SIMPLE("word"), 0) == 0) {
          *fStatus = U_ZERO_ERROR;
          set = new UnicodeSet(*(fRXPat->fStaticSets[URX_ISWORD_SET]));
@@ -4127,7 +4127,7 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB
      //       InCombiningMarksforSymbols -> InCombiningDiacriticalMarksforSymbols.
      //
      //       Note on Spaces:  either "InCombiningMarksForSymbols" or "InCombining Marks for Symbols"
-    //                        is accepted by Java.  The property part of the name is compared 
+    //                        is accepted by Java.  The property part of the name is compared
      //                        case-insenstively.  The spaces must be exactly as shown, either
      //                        all there, or all omitted, with exactly one at each position
      //                        if they are present.  From checking against JDK 1.6
@@ -4146,7 +4146,7 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB
      else if (mPropName.compare(UNICODE_STRING_SIMPLE("all")) == 0) {
          mPropName = UNICODE_STRING_SIMPLE("javaValidCodePoint");
      }
-    
+
      //    See if the property looks like a Java "InBlockName", which
      //    we will recast as "Block=BlockName"
      //
@@ -4270,7 +4270,7 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB
          set = NULL;
      }
      error(*fStatus);
-    return NULL; 
+    return NULL;
  }
  
  
diff --git a/icu4c/source/i18n/regeximp.h b/icu4c/source/i18n/regeximp.h

index 31d333caf7733eb9b63501e52f755994505ff821..bdf84030993e8dfe7c0d7f95c95277a88978ea86 100644 (file)
--- a/icu4c/source/i18n/regeximp.h
+++ b/icu4c/source/i18n/regeximp.h
@@ -1,5 +1,5 @@
  //
-//   Copyright (C) 2002-2012 International Business Machines Corporation
+//   Copyright (C) 2002-2013 International Business Machines Corporation
  //   and others. All rights reserved.
  //
  //   file:  regeximp.h
@@ -22,11 +22,11 @@
  
  U_NAMESPACE_BEGIN
  
-// For debugging, define REGEX_DEBUG 
+// For debugging, define REGEX_DEBUG
  // To define with configure,
-//   ./runConfigureICU --enable-debug --disable-release Linux CPPFLAGS="-DREGEX_DEBUG"
+//   CPPFLAGS="-DREGEX_DEBUG" ./runConfigureICU --enable-debug --disable-release Linux 
  
-#ifdef REGEX_DEBUG   
+#ifdef REGEX_DEBUG
  //
  //  debugging options.  Enable one or more of the three #defines immediately following
  //
@@ -46,19 +46,6 @@ U_NAMESPACE_BEGIN
  #define REGEX_SCAN_DEBUG_PRINTF(a)
  #endif
  
-#ifdef REGEX_DUMP_DEBUG
-#define REGEX_DUMP_DEBUG_PRINTF(a) printf a
-#else
-#define REGEX_DUMP_DEBUG_PRINTF(a)
-#endif
-
-#ifdef REGEX_RUN_DEBUG
-#define REGEX_RUN_DEBUG_PRINTF(a) printf a
-#define REGEX_DUMP_DEBUG_PRINTF(a) printf a
-#else
-#define REGEX_RUN_DEBUG_PRINTF(a)
-#endif
-
  
  //
  //  Opcode types     In the compiled form of the regexp, these are the type, or opcodes,
@@ -373,9 +360,9 @@ class CaseFoldingUTextIterator: public UMemory {
          CaseFoldingUTextIterator(UText &text);
          ~CaseFoldingUTextIterator();
  
-        UChar32 next();           // Next case folded character 
+        UChar32 next();           // Next case folded character
  
-        UBool   inExpansion();    // True if last char returned from next() and the 
+        UBool   inExpansion();    // True if last char returned from next() and the
                                    //  next to be returned both originated from a string
                                    //  folding of the same code point from the orignal UText.
        private:
@@ -398,9 +385,9 @@ class CaseFoldingUCharIterator: public UMemory {
          CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limit);
          ~CaseFoldingUCharIterator();
  
-        UChar32 next();           // Next case folded character 
+        UChar32 next();           // Next case folded character
  
-        UBool   inExpansion();    // True if last char returned from next() and the 
+        UBool   inExpansion();    // True if last char returned from next() and the
                                    //  next to be returned both originated from a string
                                    //  folding of the same code point from the orignal UText.
  
diff --git a/icu4c/source/i18n/rematch.cpp b/icu4c/source/i18n/rematch.cpp

index 306b416adb0620bd346b7475a53f472957556aec..6ffe61058e71e05ef01c8e888d834e12b9c9814c 100644 (file)
--- a/icu4c/source/i18n/rematch.cpp
+++ b/icu4c/source/i18n/rematch.cpp
@@ -2720,7 +2720,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
      int32_t     opType;                //    the opcode
      int32_t     opValue;               //    and the operand value.
  
-    #ifdef REGEX_RUN_DEBUG
+#ifdef REGEX_RUN_DEBUG
      if (fTraceDebug)
      {
          printf("MatchAt(startIdx=%ld)\n", startIdx);
@@ -2730,7 +2730,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
              if (c<32 || c>256) {
                  c = '.';
              }
-            REGEX_DUMP_DEBUG_PRINTF(("%c", c));
+            printf("%c", c);
  
              c = UTEXT_NEXT32(fPattern->fPattern);
          }
@@ -2748,7 +2748,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
          printf("\n");
          printf("\n");
      }
-    #endif
+#endif
  
      if (U_FAILURE(status)) {
          return;
@@ -2778,23 +2778,17 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
      //  One iteration of the loop per pattern operation performed.
      //
      for (;;) {
-#if 0
-        if (_heapchk() != _HEAPOK) {
-            fprintf(stderr, "Heap Trouble\n");
-        }
-#endif
-
          op      = (int32_t)pat[fp->fPatIdx];
          opType  = URX_TYPE(op);
          opValue = URX_VAL(op);
-        #ifdef REGEX_RUN_DEBUG
+#ifdef REGEX_RUN_DEBUG
          if (fTraceDebug) {
              UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
              printf("inputIdx=%ld   inputChar=%x   sp=%3ld   activeLimit=%ld  ", fp->fInputIdx,
                  UTEXT_CURRENT32(fInputText), (int64_t *)fp-fStack->getBuffer(), fActiveLimit);
              fPattern->dumpOp(fp->fPatIdx);
          }
-        #endif
+#endif
          fp->fPatIdx++;
  
          switch (opType) {
@@ -4188,16 +4182,17 @@ breakFromLoop:
          fLastMatchEnd = fMatchEnd;
          fMatchStart   = startIdx;
          fMatchEnd     = fp->fInputIdx;
-        if (fTraceDebug) {
-            REGEX_RUN_DEBUG_PRINTF(("Match.  start=%ld   end=%ld\n\n", fMatchStart, fMatchEnd));
-        }
      }
-    else
-    {
-        if (fTraceDebug) {
-            REGEX_RUN_DEBUG_PRINTF(("No match\n\n"));
+
+#ifdef REGEX_RUN_DEBUG
+    if (fTraceDebug) {
+        if (isMatch) {
+            printf("Match.  start=%ld   end=%ld\n\n", fMatchStart, fMatchEnd);
+        } else {
+            printf("No match\n\n");
          }
      }
+#endif
  
      fFrame = fp;                // The active stack frame when the engine stopped.
                                  //   Contains the capture group results that we need to
@@ -4228,8 +4223,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu
      int32_t     opValue;               //    and the operand value.
  
  #ifdef REGEX_RUN_DEBUG
-    if (fTraceDebug)
-    {
+    if (fTraceDebug) {
          printf("MatchAt(startIdx=%d)\n", startIdx);
          printf("Original Pattern: ");
          UChar32 c = utext_next32From(fPattern->fPattern, 0);
@@ -4237,7 +4231,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu
              if (c<32 || c>256) {
                  c = '.';
              }
-            REGEX_DUMP_DEBUG_PRINTF(("%c", c));
+            printf("%c", c);
  
              c = UTEXT_NEXT32(fPattern->fPattern);
          }
@@ -4287,12 +4281,6 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu
      //  One iteration of the loop per pattern operation performed.
      //
      for (;;) {
-#if 0
-        if (_heapchk() != _HEAPOK) {
-            fprintf(stderr, "Heap Trouble\n");
-        }
-#endif
-
          op      = (int32_t)pat[fp->fPatIdx];
          opType  = URX_TYPE(op);
          opValue = URX_VAL(op);
@@ -5627,20 +5615,21 @@ breakFromLoop:
          fLastMatchEnd = fMatchEnd;
          fMatchStart   = startIdx;
          fMatchEnd     = fp->fInputIdx;
-        if (fTraceDebug) {
-            REGEX_RUN_DEBUG_PRINTF(("Match.  start=%ld   end=%ld\n\n", fMatchStart, fMatchEnd));
-        }
      }
-    else
-    {
-        if (fTraceDebug) {
-            REGEX_RUN_DEBUG_PRINTF(("No match\n\n"));
+
+#ifdef REGEX_RUN_DEBUG
+    if (fTraceDebug) {
+        if (isMatch) {
+            printf("Match.  start=%ld   end=%ld\n\n", fMatchStart, fMatchEnd);
+        } else {
+            printf("No match\n\n");
          }
      }
+#endif
  
      fFrame = fp;                // The active stack frame when the engine stopped.
-    //   Contains the capture group results that we need to
-    //    access later.
+                                //   Contains the capture group results that we need to
+                                //    access later.
  
      return;
  }
diff --git a/icu4c/source/i18n/repattrn.cpp b/icu4c/source/i18n/repattrn.cpp

index 1454a093a38037a75c69090171f2256f790d72c5..fe6558c7d2e1aaf871dbf7d08be9856681d283b8 100644 (file)
--- a/icu4c/source/i18n/repattrn.cpp
+++ b/icu4c/source/i18n/repattrn.cpp
@@ -3,7 +3,7 @@
  //
  /*
  ***************************************************************************
-*   Copyright (C) 2002-2012 International Business Machines Corporation   *
+*   Copyright (C) 2002-2013 International Business Machines Corporation   *
  *   and others. All rights reserved.                                      *
  ***************************************************************************
  */
@@ -275,21 +275,21 @@ RegexPattern::compile(const UnicodeString &regex,
      if (U_FAILURE(status)) {
          return NULL;
      }
-    
+
      const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
      UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
      UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
-    
+
      if ((flags & ~allFlags) != 0) {
          status = U_REGEX_INVALID_FLAG;
          return NULL;
      }
-    
+
      if ((flags & UREGEX_CANON_EQ) != 0) {
          status = U_REGEX_UNIMPLEMENTED;
          return NULL;
      }
-    
+
      RegexPattern *This = new RegexPattern;
      if (This == NULL) {
          status = U_MEMORY_ALLOCATION_ERROR;
@@ -301,15 +301,15 @@ RegexPattern::compile(const UnicodeString &regex,
          return NULL;
      }
      This->fFlags = flags;
-    
+
      RegexCompile     compiler(This, status);
      compiler.compile(regex, pe, status);
-    
+
      if (U_FAILURE(status)) {
          delete This;
          This = NULL;
      }
-    
+
      return This;
  }
  
@@ -355,7 +355,7 @@ RegexPattern::compile(UText                *regex,
  
      RegexCompile     compiler(This, status);
      compiler.compile(regex, pe, status);
-    
+
      if (U_FAILURE(status)) {
          delete This;
          This = NULL;
@@ -538,12 +538,12 @@ UnicodeString RegexPattern::pattern() const {
          int64_t nativeLen = utext_nativeLength(fPattern);
          int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
          UnicodeString result;
-        
+
          status = U_ZERO_ERROR;
          UChar *resultChars = result.getBuffer(len16);
          utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
          result.releaseBuffer(len16);
-        
+
          return result;
      }
  }
@@ -622,8 +622,9 @@ int32_t  RegexPattern::split(UText *input,
  //           Debugging function only.
  //
  //---------------------------------------------------------------------
-#if defined(REGEX_DEBUG)
  void   RegexPattern::dumpOp(int32_t index) const {
+    (void)index;  // Suppress warnings in non-debug build.
+#if defined(REGEX_DEBUG)
      static const char * const opNames[] = {URX_OPCODE_NAMES};
      int32_t op          = fCompiledPat->elementAti(index);
      int32_t val         = URX_VAL(op);
@@ -633,7 +634,7 @@ void   RegexPattern::dumpOp(int32_t index) const {
          pinnedType = 0;
      }
  
-    REGEX_DUMP_DEBUG_PRINTF(("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]));
+    printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
      switch (type) {
      case URX_NOP:
      case URX_DOTANY:
@@ -682,12 +683,12 @@ void   RegexPattern::dumpOp(int32_t index) const {
      case URX_LOOP_C:
      case URX_LOOP_DOT_I:
          // types with an integer operand field.
-        REGEX_DUMP_DEBUG_PRINTF(("%d", val));
+        printf("%d", val);
          break;
  
      case URX_ONECHAR:
      case URX_ONECHAR_I:
-        REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?'));
+        printf("%c", val<256?val:'?');
          break;
  
      case URX_STRING:
@@ -700,7 +701,7 @@ void   RegexPattern::dumpOp(int32_t index) const {
              for (i=val; i<val+length; i++) {
                  UChar c = fLiteralText[i];
                  if (c < 32 || c >= 256) {c = '.';}
-                REGEX_DUMP_DEBUG_PRINTF(("%c", c));
+                printf("%c", c);
              }
          }
          break;
@@ -712,7 +713,7 @@ void   RegexPattern::dumpOp(int32_t index) const {
              UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
              set->toPattern(s, TRUE);
              for (int32_t i=0; i<s.length(); i++) {
-                REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
+                printf("%c", s.charAt(i));
              }
          }
          break;
@@ -722,89 +723,89 @@ void   RegexPattern::dumpOp(int32_t index) const {
          {
              UnicodeString s;
              if (val & URX_NEG_SET) {
-                REGEX_DUMP_DEBUG_PRINTF(("NOT "));
+                printf("NOT ");
                  val &= ~URX_NEG_SET;
              }
              UnicodeSet *set = fStaticSets[val];
              set->toPattern(s, TRUE);
              for (int32_t i=0; i<s.length(); i++) {
-                REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
+                printf("%c", s.charAt(i));
              }
          }
          break;
  
  
      default:
-        REGEX_DUMP_DEBUG_PRINTF(("??????"));
+        printf("??????");
          break;
      }
-    REGEX_DUMP_DEBUG_PRINTF(("\n"));
-}
+    printf("\n");
  #endif
+}
  
  
-#if defined(REGEX_DEBUG)
  U_CAPI void  U_EXPORT2
-RegexPatternDump(const RegexPattern *This) {
+RegexPattern::dumpPattern() const {
+#if defined(REGEX_DEBUG)
      int      index;
      int      i;
  
-    REGEX_DUMP_DEBUG_PRINTF(("Original Pattern:  "));
-    UChar32 c = utext_next32From(This->fPattern, 0);
+    printf("Original Pattern:  ");
+    UChar32 c = utext_next32From(fPattern, 0);
      while (c != U_SENTINEL) {
          if (c<32 || c>256) {
              c = '.';
          }
-        REGEX_DUMP_DEBUG_PRINTF(("%c", c));
-        
-        c = UTEXT_NEXT32(This->fPattern);
-    }
-    REGEX_DUMP_DEBUG_PRINTF(("\n"));
-    REGEX_DUMP_DEBUG_PRINTF(("   Min Match Length:  %d\n", This->fMinMatchLen));
-    REGEX_DUMP_DEBUG_PRINTF(("   Match Start Type:  %s\n", START_OF_MATCH_STR(This->fStartType)));
-    if (This->fStartType == START_STRING) {
-        REGEX_DUMP_DEBUG_PRINTF(("    Initial match string: \""));
-        for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) {
-            REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i]));   // TODO:  non-printables, surrogates.
+        printf("%c", c);
+
+        c = UTEXT_NEXT32(fPattern);
+    }
+    printf("\n");
+    printf("   Min Match Length:  %d\n", fMinMatchLen);
+    printf("   Match Start Type:  %s\n", START_OF_MATCH_STR(fStartType));
+    if (fStartType == START_STRING) {
+        printf("    Initial match string: \"");
+        for (i=fInitialStringIdx; i<fInitialStringIdx+fInitialStringLen; i++) {
+            printf("%c", fLiteralText[i]);   // TODO:  non-printables, surrogates.
          }
-        REGEX_DUMP_DEBUG_PRINTF(("\"\n"));
+        printf("\"\n");
  
-    } else if (This->fStartType == START_SET) {
-        int32_t numSetChars = This->fInitialChars->size();
+    } else if (fStartType == START_SET) {
+        int32_t numSetChars = fInitialChars->size();
          if (numSetChars > 20) {
              numSetChars = 20;
          }
-        REGEX_DUMP_DEBUG_PRINTF(("     Match First Chars : "));
+        printf("     Match First Chars : ");
          for (i=0; i<numSetChars; i++) {
-            UChar32 c = This->fInitialChars->charAt(i);
+            UChar32 c = fInitialChars->charAt(i);
              if (0x20<c && c <0x7e) {
-                REGEX_DUMP_DEBUG_PRINTF(("%c ", c));
+                printf("%c ", c);
              } else {
-                REGEX_DUMP_DEBUG_PRINTF(("%#x ", c));
+                printf("%#x ", c);
              }
          }
-        if (numSetChars < This->fInitialChars->size()) {
-            REGEX_DUMP_DEBUG_PRINTF((" ..."));
+        if (numSetChars < fInitialChars->size()) {
+            printf(" ...");
          }
-        REGEX_DUMP_DEBUG_PRINTF(("\n"));
+        printf("\n");
  
-    } else if (This->fStartType == START_CHAR) {
-        REGEX_DUMP_DEBUG_PRINTF(("    First char of Match : "));
-        if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) {
-                REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar));
+    } else if (fStartType == START_CHAR) {
+        printf("    First char of Match : ");
+        if (0x20 < fInitialChar && fInitialChar<0x7e) {
+                printf("%c\n", fInitialChar);
              } else {
-                REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar));
+                printf("%#x\n", fInitialChar);
              }
      }
  
-    REGEX_DUMP_DEBUG_PRINTF(("\nIndex   Binary     Type             Operand\n" \
-           "-------------------------------------------\n"));
-    for (index = 0; index<This->fCompiledPat->size(); index++) {
-        This->dumpOp(index);
+    printf("\nIndex   Binary     Type             Operand\n" \
+           "-------------------------------------------\n");
+    for (index = 0; index<fCompiledPat->size(); index++) {
+        dumpOp(index);
      }
-    REGEX_DUMP_DEBUG_PRINTF(("\n\n"));
-}
+    printf("\n\n");
  #endif
+}
  
  
  
diff --git a/icu4c/source/i18n/unicode/regex.h b/icu4c/source/i18n/unicode/regex.h

index e356a9e14f413faae58e04c6433d505120a645c4..b2ef4ce33d9b83b18ef2baaa69ac495852e0cf7a 100644 (file)
--- a/icu4c/source/i18n/unicode/regex.h
+++ b/icu4c/source/i18n/unicode/regex.h
@@ -68,21 +68,6 @@ class  UVector;
  class  UVector32;
  class  UVector64;
  
-#ifndef U_HIDE_INTERNAL_API
-/**
- *   RBBIPatternDump   Debug function, displays the compiled form of a pattern.
- *   @internal
- */
-#ifdef REGEX_DEBUG
-U_INTERNAL void U_EXPORT2
-    RegexPatternDump(const RegexPattern *pat);
-#else
-    #undef RegexPatternDump
-    #define RegexPatternDump(pat)
-#endif
-#endif  /* U_HIDE_INTERNAL_API */
-
-
  
  /**
    * Class <code>RegexPattern</code> represents a compiled regular expression.  It includes
@@ -613,11 +598,17 @@ private:
      //
      void        init();            // Common initialization, for use by constructors.
      void        zap();             // Common cleanup
-#ifdef REGEX_DEBUG
+
      void        dumpOp(int32_t index) const;
-    friend     void U_EXPORT2 RegexPatternDump(const RegexPattern *);
-#endif
  
+  public:
+#ifndef U_HIDE_INTERNAL_API
+    /**
+      * Dump a compiled pattern. Internal debug function.
+      * @internal
+      */
+    void        dumpPattern() const;
+#endif
  };
  
  
diff --git a/icu4c/source/test/intltest/regextst.cpp b/icu4c/source/test/intltest/regextst.cpp

index eb41f0bd682cec2bb7edfb0ae68b6eac4225881b..fe0d8f609a5908fc91c2ce437f2ba646157c1e2f 100644 (file)
--- a/icu4c/source/test/intltest/regextst.cpp
+++ b/icu4c/source/test/intltest/regextst.cpp
@@ -145,7 +145,7 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch
  
  /**
   * Calls utext_openUTF8 after, potentially, converting invariant text from the compilation codepage
- * into ASCII. 
+ * into ASCII.
   * @see utext_openUTF8
   */
  static UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t length, UErrorCode *status);
@@ -298,11 +298,11 @@ void RegexTest::assertUTextInvariant(const char *expected, UText *actual, const
  }
  
  /**
- * Assumes utf-8 input 
+ * Assumes utf-8 input
   */
  #define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actual), __FILE__, __LINE__)
  /**
- * Assumes Invariant input 
+ * Assumes Invariant input
   */
  #define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((expected), (actual), __FILE__, __LINE__)
  
@@ -310,7 +310,7 @@ void RegexTest::assertUTextInvariant(const char *expected, UText *actual, const
   * This buffer ( inv_buf ) is used to hold the UTF-8 strings
   * passed into utext_openUTF8. An error will be given if
   * INV_BUFSIZ is too small.  It's only used on EBCDIC systems.
- */ 
+ */
  
  #define INV_BUFSIZ 2048 /* increase this if too small */
  
@@ -378,7 +378,7 @@ UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking,
              line, u_errorName(status));
          return FALSE;
      }
-    if (line==376) { RegexPatternDump(REPattern);}
+    if (line==376) { REPattern->dumpPattern();}
  
      UnicodeString inputString(inputText);
      UnicodeString unEscapedInput = inputString.unescape();
@@ -414,7 +414,7 @@ UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking,
      }
  
      if (retVal == FALSE) {
-        RegexPatternDump(REPattern);
+        REPattern->dumpPattern();
      }
  
      delete REPattern;
@@ -441,12 +441,12 @@ UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look
              line, u_errorName(status));
          return FALSE;
      }
-    
+
      UnicodeString inputString(text, -1, US_INV);
      UnicodeString unEscapedInput = inputString.unescape();
      LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status));
      ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
-    
+
      inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(), status);
      if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {
          // UTF-8 does not allow unpaired surrogates, so this could actually happen
@@ -457,7 +457,7 @@ UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look
      textChars = new char[inputUTF8Length+1];
      unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias(), status);
      utext_openUTF8(&inputText, textChars, inputUTF8Length, &status);
-    
+
      REMatcher = &REPattern->matcher(status)->reset(&inputText);
      if (U_FAILURE(status)) {
          errln("RegexTest failure in REPattern::matcher() at line %d (UTF8).  Status = %s\n",
@@ -490,7 +490,7 @@ UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look
      }
  
      if (retVal == FALSE) {
-        RegexPatternDump(REPattern);
+        REPattern->dumpPattern();
      }
  
      delete REPattern;
@@ -556,7 +556,7 @@ void RegexTest::regex_err(const char *pat, int32_t errLine, int32_t errCol,
              }
          }
      }
-    
+
      delete callerPattern;
      utext_close(&patternText);
  }
@@ -583,7 +583,7 @@ void RegexTest::Basic() {
          UErrorCode  status = U_ZERO_ERROR;
          RegexPattern *pattern;
          pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unescape(), UREGEX_CASE_INSENSITIVE, pe, status);
-        RegexPatternDump(pattern);
+        pattern->dumpPattern();
          RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz").unescape(), status);
          UBool result = m->find();
          printf("result = %d\n", result);
@@ -731,18 +731,18 @@ void RegexTest::UTextBasic() {
      utext_openUTF8(&pattern, str_abc, -1, &status);
      RegexMatcher matcher(&pattern, 0, status);
      REGEX_CHECK_STATUS;
-    
+
      UText input = UTEXT_INITIALIZER;
      utext_openUTF8(&input, str_abc, -1, &status);
      REGEX_CHECK_STATUS;
      matcher.reset(&input);
      REGEX_CHECK_STATUS;
      REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());
-    
+
      matcher.reset(matcher.inputText());
      REGEX_CHECK_STATUS;
      REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());
-    
+
      utext_close(&pattern);
      utext_close(&input);
  }
@@ -1119,7 +1119,7 @@ void RegexTest::API_Match() {
          delete m;
          delete p;
      }
-    
+
      //
      // Regions
      //
@@ -1132,34 +1132,34 @@ void RegexTest::API_Match() {
          REGEX_ASSERT(m.regionEnd() == testString.length());
          REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
          REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
-        
+
          m.region(2,4, status);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(m.matches(status));
          REGEX_ASSERT(m.start(status)==2);
          REGEX_ASSERT(m.end(status)==4);
          REGEX_CHECK_STATUS;
-        
+
          m.reset();
          REGEX_ASSERT(m.regionStart() == 0);
          REGEX_ASSERT(m.regionEnd() == testString.length());
-        
+
          UnicodeString shorterString("short");
          m.reset(shorterString);
          REGEX_ASSERT(m.regionStart() == 0);
          REGEX_ASSERT(m.regionEnd() == shorterString.length());
-        
+
          REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
          REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));
          REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
          REGEX_ASSERT(&m == &m.reset());
          REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
-        
+
          REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));
          REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
          REGEX_ASSERT(&m == &m.reset());
          REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
-    
+
          REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
          REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));
          REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
@@ -1170,9 +1170,9 @@ void RegexTest::API_Match() {
          REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
          REGEX_ASSERT(&m == &m.reset());
          REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
-        
+
      }
-    
+
      //
      // hitEnd() and requireEnd()
      //
@@ -1184,7 +1184,7 @@ void RegexTest::API_Match() {
          REGEX_ASSERT(m1.hitEnd() == TRUE);
          REGEX_ASSERT(m1.requireEnd() == FALSE);
          REGEX_CHECK_STATUS;
-        
+
          status = U_ZERO_ERROR;
          RegexMatcher m2("a*", testString, 0, status);
          REGEX_ASSERT(m2.lookingAt(status) == TRUE);
@@ -1222,7 +1222,7 @@ void RegexTest::API_Match() {
  #endif
  
      //
-    //  Time Outs.  
+    //  Time Outs.
      //       Note:  These tests will need to be changed when the regexp engine is
      //              able to detect and cut short the exponential time behavior on
      //              this type of match.
@@ -1250,22 +1250,22 @@ void RegexTest::API_Match() {
          REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
          REGEX_CHECK_STATUS;
      }
-    
+
      //
      //  Stack Limits
      //
      {
          UErrorCode status = U_ZERO_ERROR;
          UnicodeString testString(1000000, 0x41, 1000000);  // Length 1,000,000, filled with 'A'
-        
+
          // Adding the capturing parentheses to the pattern "(A)+A$" inhibits optimizations
          //   of the '+', and makes the stack frames larger.
          RegexMatcher matcher("(A)+A$", testString, 0, status);
-        
+
          // With the default stack, this match should fail to run
          REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
          REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);
-        
+
          // With unlimited stack, it should run
          status = U_ZERO_ERROR;
          matcher.setStackLimit(0, status);
@@ -1281,7 +1281,7 @@ void RegexTest::API_Match() {
          REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);
          REGEX_ASSERT(matcher.getStackLimit() == 10000);
      }
-        
+
          // A pattern that doesn't save state should work with
          //   a minimal sized stack
      {
@@ -1294,7 +1294,7 @@ void RegexTest::API_Match() {
          REGEX_ASSERT(matcher.matches(status) == TRUE);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(matcher.getStackLimit() == 30);
-        
+
          // Negative stack sizes should fail
          status = U_ZERO_ERROR;
          matcher.setStackLimit(1000, status);
@@ -1303,7 +1303,7 @@ void RegexTest::API_Match() {
          REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
          REGEX_ASSERT(matcher.getStackLimit() == 1000);
      }
-    
+
  
  }
  
@@ -1852,7 +1852,7 @@ void RegexTest::API_Match_UTF8() {
          regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status);
          REGEX_VERBOSE_TEXT(&input2);
          utext_openUChars(&empty, NULL, 0, &status);
-        
+
          int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not nativelen (input1) ? */
          int32_t input2Len = strlen("not abc");
  
@@ -1962,7 +1962,7 @@ void RegexTest::API_Match_UTF8() {
  
          delete m1;
          delete pat2;
-        
+
          utext_close(&re);
          utext_close(&input1);
          utext_close(&input2);
@@ -1983,10 +1983,10 @@ void RegexTest::API_Match_UTF8() {
          UText               re=UTEXT_INITIALIZER;
          const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67)(.*) */
          utext_openUTF8(&re, str_01234567_pat, -1, &status);
-        
+
          RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
          REGEX_CHECK_STATUS;
-        
+
          UText input = UTEXT_INITIALIZER;
          const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */
          utext_openUTF8(&input, str_0123456789, -1, &status);
@@ -2021,7 +2021,7 @@ void RegexTest::API_Match_UTF8() {
          REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE);
  
          matcher->lookingAt(status);
-        
+
          UnicodeString dest;
          UText destText = UTEXT_INITIALIZER;
          utext_openUnicodeString(&destText, &dest, &status);
@@ -2040,7 +2040,7 @@ void RegexTest::API_Match_UTF8() {
          //  destText is now immutable, reopen it
          utext_close(&destText);
          utext_openUnicodeString(&destText, &dest, &status);
-        
+
          result = matcher->group(0, NULL, status);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
@@ -2049,7 +2049,7 @@ void RegexTest::API_Match_UTF8() {
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(result == &destText);
          REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
-        
+
          result = matcher->group(1, NULL, status);
          REGEX_CHECK_STATUS;
          const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */
@@ -2059,7 +2059,7 @@ void RegexTest::API_Match_UTF8() {
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(result == &destText);
          REGEX_ASSERT_UTEXT_UTF8(str_234567, result);
-        
+
          result = matcher->group(2, NULL, status);
          REGEX_CHECK_STATUS;
          const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */
@@ -2069,7 +2069,7 @@ void RegexTest::API_Match_UTF8() {
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(result == &destText);
          REGEX_ASSERT_UTEXT_UTF8(str_45, result);
-        
+
          result = matcher->group(3, NULL, status);
          REGEX_CHECK_STATUS;
          const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */
@@ -2087,7 +2087,7 @@ void RegexTest::API_Match_UTF8() {
  
          delete matcher;
          delete pat;
-        
+
          utext_close(&destText);
          utext_close(&input);
          utext_close(&re);
@@ -2148,7 +2148,7 @@ void RegexTest::API_Match_UTF8() {
  
          delete matcher;
          delete pat;
-        
+
          utext_close(&input);
          utext_close(&re);
      }
@@ -2166,7 +2166,7 @@ void RegexTest::API_Match_UTF8() {
          utext_openUTF8(&re, str_Gabcabc, -1, &status);
  
          RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
-        
+
          REGEX_CHECK_STATUS;
          UText input = UTEXT_INITIALIZER;
          const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */
@@ -2188,7 +2188,7 @@ void RegexTest::API_Match_UTF8() {
  
          delete matcher;
          delete pat;
-        
+
          utext_close(&input);
          utext_close(&re);
      }
@@ -2228,7 +2228,7 @@ void RegexTest::API_Match_UTF8() {
              REGEX_ASSERT(m.end(status) == i);
          }
          REGEX_ASSERT(i==20);
-        
+
          utext_close(&s);
      }
      {
@@ -2250,7 +2250,7 @@ void RegexTest::API_Match_UTF8() {
              REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i));
          }
          REGEX_ASSERT(i==5);
-        
+
          utext_close(&s);
      }
  
@@ -2278,7 +2278,7 @@ void RegexTest::API_Match_UTF8() {
          delete m;
          delete p;
      }
-    
+
      //
      // Regions
      //
@@ -2290,42 +2290,42 @@ void RegexTest::API_Match_UTF8() {
          REGEX_VERBOSE_TEXT(&testPattern);
          regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &status);
          REGEX_VERBOSE_TEXT(&testText);
-        
+
          RegexMatcher m(&testPattern, &testText, 0, status);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(m.regionStart() == 0);
          REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));
          REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
          REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
-        
+
          m.region(2,4, status);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(m.matches(status));
          REGEX_ASSERT(m.start(status)==2);
          REGEX_ASSERT(m.end(status)==4);
          REGEX_CHECK_STATUS;
-        
+
          m.reset();
          REGEX_ASSERT(m.regionStart() == 0);
          REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));
-        
+
          regextst_openUTF8FromInvariant(&testText, "short", -1, &status);
          REGEX_VERBOSE_TEXT(&testText);
          m.reset(&testText);
          REGEX_ASSERT(m.regionStart() == 0);
          REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short"));
-        
+
          REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
          REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));
          REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
          REGEX_ASSERT(&m == &m.reset());
          REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
-        
+
          REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));
          REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
          REGEX_ASSERT(&m == &m.reset());
          REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
-    
+
          REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
          REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));
          REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
@@ -2336,11 +2336,11 @@ void RegexTest::API_Match_UTF8() {
          REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
          REGEX_ASSERT(&m == &m.reset());
          REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
-        
+
          utext_close(&testText);
          utext_close(&testPattern);
      }
-    
+
      //
      // hitEnd() and requireEnd()
      //
@@ -2352,13 +2352,13 @@ void RegexTest::API_Match_UTF8() {
          const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */
          utext_openUTF8(&testPattern, str_, -1, &status);
          utext_openUTF8(&testText, str_aabb, -1, &status);
-        
+
          RegexMatcher m1(&testPattern, &testText,  0, status);
          REGEX_ASSERT(m1.lookingAt(status) == TRUE);
          REGEX_ASSERT(m1.hitEnd() == TRUE);
          REGEX_ASSERT(m1.requireEnd() == FALSE);
          REGEX_CHECK_STATUS;
-        
+
          status = U_ZERO_ERROR;
          const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */
          utext_openUTF8(&testPattern, str_a, -1, &status);
@@ -2376,7 +2376,7 @@ void RegexTest::API_Match_UTF8() {
          REGEX_ASSERT(m3.hitEnd() == TRUE);
          REGEX_ASSERT(m3.requireEnd() == TRUE);
          REGEX_CHECK_STATUS;
-        
+
          utext_close(&testText);
          utext_close(&testPattern);
      }
@@ -2402,7 +2402,7 @@ void RegexTest::API_Replace_UTF8() {
      REGEX_VERBOSE_TEXT(&re);
      RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
      REGEX_CHECK_STATUS;
-    
+
      char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */
      //             012345678901234567
      UText dataText = UTEXT_INITIALIZER;
@@ -2418,9 +2418,9 @@ void RegexTest::API_Replace_UTF8() {
      UText destText = UTEXT_INITIALIZER;
      utext_openUnicodeString(&destText, &dest, &status);
      UText *result;
-    
+
      UText replText = UTEXT_INITIALIZER;
-    
+
      const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */
      utext_openUTF8(&replText, str_yz, -1, &status);
      REGEX_VERBOSE_TEXT(&replText);
@@ -2452,7 +2452,7 @@ void RegexTest::API_Replace_UTF8() {
      const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx...abx.. */
      utext_openUTF8(&dataText, str_abxabxabx, -1, &status);
      matcher->reset(&dataText);
-    
+
      result = matcher->replaceFirst(&replText, NULL, status);
      REGEX_CHECK_STATUS;
      REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
@@ -2477,7 +2477,7 @@ void RegexTest::API_Replace_UTF8() {
      //
      utext_openUTF8(&dataText, NULL, 0, &status);
      matcher->reset(&dataText);
-    
+
      result = matcher->replaceFirst(&replText, NULL, status);
      REGEX_CHECK_STATUS;
      REGEX_ASSERT_UTEXT_UTF8("", result);
@@ -2501,7 +2501,7 @@ void RegexTest::API_Replace_UTF8() {
      //
      utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.."
      matcher->reset(&dataText);
-    
+
      utext_openUTF8(&replText, NULL, 0, &status);
      result = matcher->replaceFirst(&replText, NULL, status);
      REGEX_CHECK_STATUS;
@@ -2565,7 +2565,7 @@ void RegexTest::API_Replace_UTF8() {
      utext_openUTF8(&dataText, str_abcdefg, -1, &status);
      RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText);
      REGEX_CHECK_STATUS;
-    
+
      const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */
      utext_openUTF8(&replText, str_11, -1, &status);
      result = matcher2->replaceFirst(&replText, NULL, status);
@@ -2578,8 +2578,8 @@ void RegexTest::API_Replace_UTF8() {
      REGEX_CHECK_STATUS;
      REGEX_ASSERT(result == &destText);
      REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result);
-   
-    const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */ 
+
+    const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */
      utext_openUTF8(&replText, str_v, -1, &status);
      REGEX_VERBOSE_TEXT(&replText);
      result = matcher2->replaceFirst(&replText, NULL, status);
@@ -2592,7 +2592,7 @@ void RegexTest::API_Replace_UTF8() {
      REGEX_CHECK_STATUS;
      REGEX_ASSERT(result == &destText);
      REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);
-    
+
      const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x00 }; /* $ by itself, no group number $$$ */
      utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status);
      result = matcher2->replaceFirst(&replText, NULL, status);
@@ -2614,7 +2614,7 @@ void RegexTest::API_Replace_UTF8() {
      supplDigitChars[24] = 0x9F;
      supplDigitChars[25] = 0x8F;
      utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status);
-    
+
      result = matcher2->replaceFirst(&replText, NULL, status);
      REGEX_CHECK_STATUS;
      const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplemental Digit 1 bc.defg */
@@ -2644,7 +2644,7 @@ void RegexTest::API_Replace_UTF8() {
          utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status);
          utext_openUTF8(&replText, str_u0043, -1, &status);
          matcher->reset(&dataText);
-        
+
          result = matcher->replaceAll(&replText, NULL, status);
          REGEX_CHECK_STATUS;
          const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */
@@ -2664,7 +2664,7 @@ void RegexTest::API_Replace_UTF8() {
          matcher->reset(&dataText);
  
          unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008 A"
-        //                          0123456789     
+        //                          0123456789
          expected[2] = 0xF0;
          expected[3] = 0x90;
          expected[4] = 0x80;
@@ -2692,10 +2692,10 @@ const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */
          utext_openUTF8(&re, str_ssee, -1, &status);
          utext_openUTF8(&dataText, str_blah, -1, &status);
          utext_openUTF8(&replText, str_ooh, -1, &status);
-        
+
          RegexMatcher m(&re, 0, status);
          REGEX_CHECK_STATUS;
-        
+
          UnicodeString result;
          UText resultText = UTEXT_INITIALIZER;
          utext_openUnicodeString(&resultText, &result, &status);
@@ -2736,7 +2736,7 @@ const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */
          m.appendTail(&resultText, status);
          const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */
          REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText);
-        
+
          utext_close(&resultText);
      }
  
@@ -2744,7 +2744,7 @@ const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */
      delete pat2;
      delete matcher;
      delete pat;
-    
+
      utext_close(&dataText);
      utext_close(&replText);
      utext_close(&destText);
@@ -2769,7 +2769,7 @@ void RegexTest::API_Pattern_UTF8() {
      UText         re2 = UTEXT_INITIALIZER;
      UErrorCode    status = U_ZERO_ERROR;
      UParseError   pe;
-    
+
      const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d, 0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */
      const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */
      utext_openUTF8(&re1, str_abcalmz, -1, &status);
@@ -2818,7 +2818,7 @@ void RegexTest::API_Pattern_UTF8() {
      delete pat1a;
      delete pat1;
      delete pat2;
-    
+
      utext_close(&re1);
      utext_close(&re2);
  
@@ -2832,13 +2832,13 @@ void RegexTest::API_Pattern_UTF8() {
          UText          pattern    = UTEXT_INITIALIZER;
          const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \p{L}+ */
          utext_openUTF8(&pattern, str_pL, -1, &status);
-        
+
          RegexPattern  *pSource    = RegexPattern::compile(&pattern, 0, status);
          RegexPattern  *pClone     = pSource->clone();
          delete         pSource;
          RegexMatcher  *mFromClone = pClone->matcher(status);
          REGEX_CHECK_STATUS;
-        
+
          UText          input      = UTEXT_INITIALIZER;
          const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */
          utext_openUTF8(&input, str_HelloWorld, -1, &status);
@@ -2850,7 +2850,7 @@ void RegexTest::API_Pattern_UTF8() {
          REGEX_ASSERT(mFromClone->find() == FALSE);
          delete mFromClone;
          delete pClone;
-        
+
          utext_close(&input);
          utext_close(&pattern);
      }
@@ -2862,7 +2862,7 @@ void RegexTest::API_Pattern_UTF8() {
          UErrorCode status  = U_ZERO_ERROR;
          UText      pattern = UTEXT_INITIALIZER;
          UText      input   = UTEXT_INITIALIZER;
-        
+
          const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x20, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */
          utext_openUTF8(&input, str_randominput, -1, &status);
  
@@ -2870,17 +2870,17 @@ void RegexTest::API_Pattern_UTF8() {
          utext_openUTF8(&pattern, str_dotstar, -1, &status);
          REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE);
          REGEX_CHECK_STATUS;
-        
+
          const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */
          utext_openUTF8(&pattern, str_abc, -1, &status);
          REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE);
          REGEX_CHECK_STATUS;
-        
+
          const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* .*nput */
          utext_openUTF8(&pattern, str_nput, -1, &status);
          REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE);
          REGEX_CHECK_STATUS;
-        
+
          utext_openUTF8(&pattern, str_randominput, -1, &status);
          REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE);
          REGEX_CHECK_STATUS;
@@ -2889,13 +2889,13 @@ void RegexTest::API_Pattern_UTF8() {
          utext_openUTF8(&pattern, str_u, -1, &status);
          REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE);
          REGEX_CHECK_STATUS;
-        
+
          utext_openUTF8(&input, str_abc, -1, &status);
          utext_openUTF8(&pattern, str_abc, -1, &status);
          status = U_INDEX_OUTOFBOUNDS_ERROR;
          REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE);
          REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
-        
+
          utext_close(&input);
          utext_close(&pattern);
      }
@@ -3286,7 +3286,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
                             int32_t line) {
      UnicodeString       unEscapedInput;
      UnicodeString       deTaggedInput;
-    
+
      int32_t             patternUTF8Length,      inputUTF8Length;
      char                *patternChars  = NULL, *inputChars = NULL;
      UText               patternText    = UTEXT_INITIALIZER;
@@ -3313,7 +3313,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
      int32_t             regionEnd        = -1;
      int32_t             regionStartUTF8  = -1;
      int32_t             regionEndUTF8    = -1;
-    
+
  
      //
      //  Compile the caller's pattern
@@ -3331,7 +3331,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
      if (flags.indexOf((UChar)0x6d) >= 0)  { // 'm' flag
          bflags |= UREGEX_MULTILINE;
      }
-    
+
      if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag
          bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES;
      }
@@ -3367,16 +3367,16 @@ void RegexTest::regex_find(const UnicodeString &pattern,
  
      UTF8Converter = ucnv_open("UTF8", &status);
      ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
-    
+
      patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status);
      status = U_ZERO_ERROR; // buffer overflow
      patternChars = new char[patternUTF8Length+1];
      pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status);
      utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status);
-    
+
      if (status == U_ZERO_ERROR) {
          UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status);
-        
+
          if (status != U_ZERO_ERROR) {
  #if UCONFIG_NO_BREAK_ITERATION==1
              // 'v' test flag means that the test pattern should not compile if ICU was configured
@@ -3398,7 +3398,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
              }
          }
      }
-    
+
      if (UTF8Pattern == NULL) {
          // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine
          logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d", srcPath, line);
@@ -3406,7 +3406,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
      }
  
      if (flags.indexOf((UChar)0x64) >= 0) {  // 'd' flag
-        RegexPatternDump(callerPattern);
+        callerPattern->dumpPattern();
      }
  
      if (flags.indexOf((UChar)0x45) >= 0) {  // 'E' flag
@@ -3428,7 +3428,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
              numFinds = i;
          }
      }
-    
+
      // 'M' flag.  Use matches() instead of find()
      if (flags.indexOf((UChar)0x4d) >= 0) {
          useMatchesFunc = TRUE;
@@ -3483,7 +3483,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
      if (flags.indexOf((UChar)0x74) >= 0) {   //  't' trace flag
          matcher->setTrace(TRUE);
      }
-    
+
      if (UTF8Pattern != NULL) {
          inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status);
          status = U_ZERO_ERROR; // buffer overflow
@@ -3495,7 +3495,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
              UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText);
              REGEX_CHECK_STATUS_L(line);
          }
-        
+
          if (UTF8Matcher == NULL) {
              // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine
            logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d", srcPath, line);
@@ -3509,7 +3509,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
      if (UTF8Matcher != NULL) {
          if (regionStart>=0)    (void) utextOffsetToNative(&inputText, regionStart, regionStartUTF8);
          if (regionEnd>=0)      (void) utextOffsetToNative(&inputText, regionEnd, regionEndUTF8);
-       
+
          //  Fill out the native index UVector info.
          //  Only need 1 loop, from above we know groupStarts.size() = groupEnds.size()
          for (i=0; i<groupStarts.size(); i++) {
@@ -3524,7 +3524,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
                  }
                  setInt(groupStartsUTF8, startUTF8, i);
              }
-            
+
              int32_t  end = groupEnds.elementAti(i);
              //  -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting
              if (end >= 0) {
@@ -3559,8 +3559,8 @@ void RegexTest::regex_find(const UnicodeString &pattern,
              UTF8Matcher->useTransparentBounds(TRUE);
          }
      }
-    
-    
+
+
  
      //
      // Do a find on the de-tagged input using the caller's pattern
@@ -3635,7 +3635,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
              failed = TRUE;
              goto cleanupAndReturn;  // Good chance of subsequent bogus errors.  Stop now.
          }
-        
+
          int32_t  expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti(i));
          int32_t  expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF8.elementAti(i));
          if (matcher->end(i, status) != expectedEnd) {
@@ -3672,7 +3672,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
          errln("Error at line %d: requireEnd() returned TRUE.  Expected FALSE (UTF8)", line);
          failed = TRUE;
      }
-    
+
      if ((flags.indexOf((UChar)0x79) >= 0) &&   //  'y' flag:  RequireEnd() == true
          matcher->requireEnd() == FALSE) {
          errln("Error at line %d: requireEnd() returned FALSE.  Expected TRUE", line);
@@ -3682,7 +3682,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
          errln("Error at line %d: requireEnd() returned FALSE.  Expected TRUE (UTF8)", line);
          failed = TRUE;
      }
-    
+
      if ((flags.indexOf((UChar)0x5A) >= 0) &&   //  'Z' flag:  hitEnd() == false
          matcher->hitEnd() == TRUE) {
          errln("Error at line %d: hitEnd() returned TRUE.  Expected FALSE", line);
@@ -3692,7 +3692,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
          errln("Error at line %d: hitEnd() returned TRUE.  Expected FALSE (UTF8)", line);
          failed = TRUE;
      }
-    
+
      if ((flags.indexOf((UChar)0x7A) >= 0) &&   //  'z' flag:  hitEnd() == true
          matcher->hitEnd() == FALSE) {
          errln("Error at line %d: hitEnd() returned FALSE.  Expected TRUE", line);
@@ -3716,7 +3716,7 @@ cleanupAndReturn:
      delete UTF8Pattern;
      delete matcher;
      delete callerPattern;
-    
+
      utext_close(&inputText);
      delete[] inputChars;
      utext_close(&patternText);
@@ -3792,7 +3792,7 @@ void RegexTest::Errors() {
  
  
  //-------------------------------------------------------------------------------
-//      
+//
  //  Read a text data file, convert it to UChars, and return the data
  //    in one big UChar * buffer, which the caller must delete.
  //
@@ -4135,7 +4135,7 @@ void RegexTest::PerlTests() {
                  lineNum, expected?"":"no ", found?"":"no " );
              continue;
          }
-        
+
          // Don't try to check expected results if there is no match.
          //   (Some have stuff in the expected fields)
          if (!found) {
@@ -4433,7 +4433,7 @@ void RegexTest::PerlTestsUTF8() {
          if (flagStr.indexOf(UChar_x) != -1) {
              flags |= UREGEX_COMMENTS;
          }
-        
+
          //
          // Put the pattern in a UTF-8 UText
          //
@@ -4530,7 +4530,7 @@ void RegexTest::PerlTestsUTF8() {
                  lineNum, expected?"":"no ", found?"":"no " );
              continue;
          }
-        
+
          // Don't try to check expected results if there is no match.
          //   (Some have stuff in the expected fields)
          if (!found) {
@@ -4673,10 +4673,10 @@ void RegexTest::PerlTestsUTF8() {
  
      delete fieldPat;
      delete [] testData;
-    
+
      utext_close(&patternText);
      utext_close(&inputText);
-    
+
      delete [] patternChars;
      delete [] inputChars;
  
@@ -4740,12 +4740,12 @@ U_CDECL_END
  void RegexTest::Callbacks() {
     {
          // Getter returns NULLs if no callback has been set
-        
+
          //   The variables that the getter will fill in.
          //   Init to non-null values so that the action of the getter can be seen.
          const void          *returnedContext = &returnedContext;
          URegexMatchCallback *returnedFn = &testCallBackFn;
-        
+
          UErrorCode status = U_ZERO_ERROR;
          RegexMatcher matcher("x", 0, status);
          REGEX_CHECK_STATUS;
@@ -4754,7 +4754,7 @@ void RegexTest::Callbacks() {
          REGEX_ASSERT(returnedFn == NULL);
          REGEX_ASSERT(returnedContext == NULL);
      }
-    
+
     {
          // Set and Get work
          callBackContext cbInfo = {this, 0, 0, 0};
@@ -4769,7 +4769,7 @@ void RegexTest::Callbacks() {
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(returnedFn == testCallBackFn);
          REGEX_ASSERT(returnedContext == &cbInfo);
-        
+
          // A short-running match shouldn't invoke the callback
          status = U_ZERO_ERROR;
          cbInfo.reset(1);
@@ -4778,7 +4778,7 @@ void RegexTest::Callbacks() {
          REGEX_ASSERT(matcher.matches(status));
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(cbInfo.numCalls == 0);
-        
+
          // A medium-length match that runs long enough to invoke the
          //   callback, but not so long that the callback aborts it.
          status = U_ZERO_ERROR;
@@ -4788,7 +4788,7 @@ void RegexTest::Callbacks() {
          REGEX_ASSERT(matcher.matches(status)==FALSE);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(cbInfo.numCalls > 0);
-        
+
          // A longer running match that the callback function will abort.
          status = U_ZERO_ERROR;
          cbInfo.reset(4);
@@ -4798,7 +4798,7 @@ void RegexTest::Callbacks() {
          REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
          REGEX_ASSERT(cbInfo.numCalls == 4);
      }
- 
+
  
  }
  
@@ -4832,12 +4832,12 @@ U_CDECL_END
  void RegexTest::FindProgressCallbacks() {
     {
          // Getter returns NULLs if no callback has been set
-        
+
          //   The variables that the getter will fill in.
          //   Init to non-null values so that the action of the getter can be seen.
          const void                  *returnedContext = &returnedContext;
          URegexFindProgressCallback  *returnedFn = &testProgressCallBackFn;
-        
+
          UErrorCode status = U_ZERO_ERROR;
          RegexMatcher matcher("x", 0, status);
          REGEX_CHECK_STATUS;
@@ -4846,7 +4846,7 @@ void RegexTest::FindProgressCallbacks() {
          REGEX_ASSERT(returnedFn == NULL);
          REGEX_ASSERT(returnedContext == NULL);
      }
-    
+
     {
          // Set and Get work
          progressCallBackContext cbInfo = {this, 0, 0, 0};
@@ -4861,7 +4861,7 @@ void RegexTest::FindProgressCallbacks() {
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(returnedFn == testProgressCallBackFn);
          REGEX_ASSERT(returnedContext == &cbInfo);
-        
+
          // A short-running match should NOT invoke the callback.
          status = U_ZERO_ERROR;
          cbInfo.reset(100);
@@ -4873,7 +4873,7 @@ void RegexTest::FindProgressCallbacks() {
          REGEX_ASSERT(matcher.find(0, status));
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(cbInfo.numCalls == 0);
-        
+
          // A medium running match that causes matcher.find() to invoke our callback for each index.
          status = U_ZERO_ERROR;
          s = "aaaaaaaaaaaaaaaaaaab";
@@ -4882,7 +4882,7 @@ void RegexTest::FindProgressCallbacks() {
          REGEX_ASSERT(matcher.find(0, status)==FALSE);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25);
-        
+
          // A longer running match that causes matcher.find() to invoke our callback which we cancel/interrupt at some point.
          status = U_ZERO_ERROR;
          UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab";
@@ -4906,7 +4906,7 @@ void RegexTest::FindProgressCallbacks() {
          REGEX_CHECK_STATUS;
  #endif
      }
- 
+
  
  }
  
@@ -4925,7 +4925,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
      UText                patternText = UTEXT_INITIALIZER;
      UnicodeString        buffer;
      UText                bufferText = UTEXT_INITIALIZER;
-    
+
      utext_openUnicodeString(&bufferText, &buffer, &status);
  
      /*
@@ -4942,7 +4942,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
          regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status);
          u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2);
          utext_openUChars(&text2, text2Chars, -1, &status);
-        
+
          regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status);
          re = uregex_openUText(&patternText, 0, NULL, &status);
  
@@ -4954,7 +4954,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
          utext_setNativeIndex(resultText, 0);
          utext_setNativeIndex(&text1, 0);
          REGEX_ASSERT(testUTextEqual(resultText, &text1));
-        
+
          resultText = uregex_getUText(re, &bufferText, &status);
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(resultText == &bufferText);
@@ -4970,7 +4970,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
          utext_setNativeIndex(resultText, 0);
          utext_setNativeIndex(&text2, 0);
          REGEX_ASSERT(testUTextEqual(resultText, &text2));
-        
+
          uregex_close(re);
          utext_close(&text1);
          utext_close(&text2);
@@ -5016,7 +5016,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
          uregex_close(re);
  
      }
-    
+
      /*
       *  replaceFirst()
       */
@@ -5025,7 +5025,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
          UChar    text2[80];
          UText    replText = UTEXT_INITIALIZER;
          UText   *result;
-        
+
          status = U_ZERO_ERROR;
          u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
          u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
@@ -5049,7 +5049,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
          REGEX_CHECK_STATUS;
          REGEX_ASSERT(result == &bufferText);
          REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result);
-        
+
          /* Unicode escapes */
          uregex_setText(re, text1, -1, &status);
          regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a", -1, &status);
@@ -5106,7 +5106,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
       *  splitUText() uses the C++ API directly, and the UnicodeString version uses mutable UTexts,
       *   so we don't need to test it here.
       */
-    
+
      utext_close(&bufferText);
      utext_close(&patternText);
  }
@@ -5181,7 +5181,7 @@ void RegexTest::Bug8479() {
          delete pMatcher;
      }
  }
-     
+
  
  // Bug 7029
  void RegexTest::Bug7029() {
@@ -5199,11 +5199,11 @@ void RegexTest::Bug7029() {
  
  // Bug 9283
  //   This test is checking for the existance of any supplemental characters that case-fold
-//   to a bmp character.  
+//   to a bmp character.
  //
-//   At the time of this writing there are none. If any should appear in a subsequent release 
-//   of Unicode, the code in regular expressions compilation that determines the longest 
-//   posssible match for a literal string  will need to be enhanced.  
+//   At the time of this writing there are none. If any should appear in a subsequent release
+//   of Unicode, the code in regular expressions compilation that determines the longest
+//   posssible match for a literal string  will need to be enhanced.
  //
  //   See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength()
  //   for details on what to do in case of a failure of this test.
@@ -5247,7 +5247,7 @@ void RegexTest::Bug10459() {
  
      URegularExpression *icu_re = uregex_openUText(utext_pat, 0, NULL, &status);
      REGEX_CHECK_STATUS;
- 
+
      uregex_setUText(icu_re, utext_txt, &status);
      REGEX_CHECK_STATUS;
  
@@ -5256,7 +5256,7 @@ void RegexTest::Bug10459() {
      //   It should set an U_REGEX_INVALID_STATE.
  
      UChar buf[100];
-    int32_t len = uregex_group(icu_re, 0, buf, LENGTHOF(buf), &status);    
+    int32_t len = uregex_group(icu_re, 0, buf, LENGTHOF(buf), &status);
      REGEX_ASSERT(status == U_REGEX_INVALID_STATE);
      REGEX_ASSERT(len == 0);
author	Andy Heninger <andy.heninger@gmail.com>
	Mon, 14 Oct 2013 22:11:21 +0000 (22:11 +0000)
committer	Andy Heninger <andy.heninger@gmail.com>
	Mon, 14 Oct 2013 22:11:21 +0000 (22:11 +0000)
icu4c/source/i18n/regexcmp.cpp		patch \| blob \| history
icu4c/source/i18n/regeximp.h		patch \| blob \| history
icu4c/source/i18n/rematch.cpp		patch \| blob \| history
icu4c/source/i18n/repattrn.cpp		patch \| blob \| history
icu4c/source/i18n/unicode/regex.h		patch \| blob \| history
icu4c/source/test/intltest/regextst.cpp		patch \| blob \| history