From 10dd7ed47b43c13fd8fd7d24d84f21cc5652ea85 Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Mon, 14 Oct 2013 22:11:21 +0000 Subject: [PATCH] ICU-10463 Regular Expressions, rework debug conditionals to fix build failures on clang, and to somewhat simplify. X-SVN-Rev: 34565 --- icu4c/source/i18n/regexcmp.cpp | 78 +++---- icu4c/source/i18n/regeximp.h | 29 +-- icu4c/source/i18n/rematch.cpp | 61 ++--- icu4c/source/i18n/repattrn.cpp | 117 +++++----- icu4c/source/i18n/unicode/regex.h | 27 +-- icu4c/source/test/intltest/regextst.cpp | 288 ++++++++++++------------ 6 files changed, 284 insertions(+), 316 deletions(-) diff --git a/icu4c/source/i18n/regexcmp.cpp b/icu4c/source/i18n/regexcmp.cpp index 0ec61543295..52f132b55f2 100644 --- a/icu4c/source/i18n/regexcmp.cpp +++ b/icu4c/source/i18n/regexcmp.cpp @@ -109,7 +109,7 @@ void RegexCompile::compile( fRXPat->fPatternString = new UnicodeString(pat); UText patternText = UTEXT_INITIALIZER; utext_openConstUnicodeString(&patternText, fRXPat->fPatternString, &e); - + if (U_SUCCESS(e)) { compile(&patternText, pp, e); utext_close(&patternText); @@ -568,13 +568,13 @@ UBool RegexCompile::doParseActions(int32_t action) op = URX_BUILD(URX_JMP, fRXPat->fCompiledPat->size()+ 3); fRXPat->fCompiledPat->addElement(op, *fStatus); - + op = URX_BUILD(URX_LA_END, dataLoc); fRXPat->fCompiledPat->addElement(op, *fStatus); op = URX_BUILD(URX_BACKTRACK, 0); fRXPat->fCompiledPat->addElement(op, *fStatus); - + op = URX_BUILD(URX_NOP, 0); fRXPat->fCompiledPat->addElement(op, *fStatus); fRXPat->fCompiledPat->addElement(op, *fStatus); @@ -1147,7 +1147,7 @@ UBool RegexCompile::doParseActions(int32_t action) } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) { op = URX_CARET_M; } else if ((fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) { - op = URX_CARET; // Only testing true start of input. + op = URX_CARET; // Only testing true start of input. } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) { op = URX_CARET_M_UNIX; } @@ -1281,7 +1281,7 @@ UBool RegexCompile::doParseActions(int32_t action) literalChar(c); } break; - + case doBackRef: // BackReference. Somewhat unusual in that the front-end can not completely parse @@ -1643,7 +1643,7 @@ UBool RegexCompile::doParseActions(int32_t action) compileSet(theSet); break; } - + case doSetIntersection2: // Have scanned something like [abc&& setPushOp(setIntersection2); @@ -1654,7 +1654,7 @@ UBool RegexCompile::doParseActions(int32_t action) // This operation is the highest precedence set operation, so we can always do // it immediately, without waiting to see what follows. It is necessary to perform // any pending '-' or '&' operation first, because these have the same precedence - // as union-ing in a literal' + // as union-ing in a literal' { setEval(setUnion); UnicodeSet *s = (UnicodeSet *)fSetStack.peek(); @@ -1749,7 +1749,7 @@ UBool RegexCompile::doParseActions(int32_t action) } // else error. scanProp() reported the error status already. } break; - + case doSetProp: // Scanned a \p \P within [brackets]. { @@ -1771,7 +1771,7 @@ UBool RegexCompile::doParseActions(int32_t action) // and ICU UnicodeSet behavior. { if (fLastSetLiteral > fC.fChar) { - error(U_REGEX_INVALID_RANGE); + error(U_REGEX_INVALID_RANGE); } UnicodeSet *s = (UnicodeSet *)fSetStack.peek(); s->add(fLastSetLiteral, fC.fChar); @@ -1830,7 +1830,7 @@ void RegexCompile::fixLiterals(UBool split) { int32_t indexOfLastCodePoint = fLiteralChars.moveIndex32(fLiteralChars.length(), -1); UChar32 lastCodePoint = fLiteralChars.char32At(indexOfLastCodePoint); - // Split: We need to ensure that the last item in the compiled pattern + // Split: We need to ensure that the last item in the compiled pattern // refers only to the last literal scanned in the pattern, so that // quantifiers (*, +, etc.) affect only it, and not a longer string. // Split before case folding for case insensitive matches. @@ -1856,7 +1856,7 @@ void RegexCompile::fixLiterals(UBool split) { if (indexOfLastCodePoint == 0) { // Single character, emit a URX_ONECHAR op to match it. - if ((fModeFlags & UREGEX_CASE_INSENSITIVE) && + if ((fModeFlags & UREGEX_CASE_INSENSITIVE) && u_hasBinaryProperty(lastCodePoint, UCHAR_CASE_SENSITIVE)) { op = URX_BUILD(URX_ONECHAR_I, lastCodePoint); } else { @@ -1875,7 +1875,7 @@ void RegexCompile::fixLiterals(UBool split) { fRXPat->fCompiledPat->addElement(op, *fStatus); op = URX_BUILD(URX_STRING_LEN, fLiteralChars.length()); fRXPat->fCompiledPat->addElement(op, *fStatus); - + // Add this string into the accumulated strings of the compiled pattern. fRXPat->fLiteralText.append(fLiteralChars); } @@ -2449,7 +2449,7 @@ void RegexCompile::matchStartType() { case URX_STO_INP_LOC: case URX_BACKREF: // BackRef. Must assume that it might be a zero length match case URX_BACKREF_I: - + case URX_STO_SP: // Setup for atomic or possessive blocks. Doesn't change what can match. case URX_LD_SP: break; @@ -2762,7 +2762,7 @@ void RegexCompile::matchStartType() { { // Look-around. Scan forward until the matching look-ahead end, // without processing the look-around block. This is overly pessimistic. - + // Keep track of the nesting depth of look-around blocks. Boilerplate code for // lookahead contains two LA_END instructions, so count goes up by two // for each LA_START. @@ -3322,7 +3322,7 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) { // compiled (folded) string. Folding may add code points, but // not remove them. // - // There is a potential problem if a supplemental code point + // There is a potential problem if a supplemental code point // case-folds to a BMP code point. In this case our compiled string // could be shorter (in code units) than a matching user string. // @@ -3353,7 +3353,7 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) { loc = loopEndLoc; break; } - + int32_t maxLoopCount = fRXPat->fCompiledPat->elementAti(loc+3); if (maxLoopCount == -1) { // Unbounded Loop. No upper bound on match length. @@ -3471,7 +3471,7 @@ void RegexCompile::stripNOPs() { d++; } } - + UnicodeString caseStringBuffer; // Make a second pass over the code, removing the NOPs by moving following @@ -3518,7 +3518,7 @@ void RegexCompile::stripNOPs() { op = URX_BUILD(opType, where); fRXPat->fCompiledPat->setElementAt(op, dst); dst++; - + fRXPat->fNeedsAltInput = TRUE; break; } @@ -3609,7 +3609,7 @@ void RegexCompile::error(UErrorCode e) { fParseErr->line = (int32_t)fLineNum; fParseErr->offset = (int32_t)fCharNum; } - + UErrorCode status = U_ZERO_ERROR; // throwaway status for extracting context // Fill in the context. @@ -3663,7 +3663,7 @@ UChar32 RegexCompile::nextCharLL() { fPeekChar = -1; return ch; } - + // assume we're already in the right place ch = UTEXT_NEXT32(fRXPat->fPattern); if (ch == U_SENTINEL) { @@ -3719,7 +3719,7 @@ void RegexCompile::nextChar(RegexPatternChar &c) { if (fQuoteMode) { c.fQuoted = TRUE; - if ((c.fChar==chBackSlash && peekCharLL()==chE && ((fModeFlags & UREGEX_LITERAL) == 0)) || + if ((c.fChar==chBackSlash && peekCharLL()==chE && ((fModeFlags & UREGEX_LITERAL) == 0)) || c.fChar == (UChar32)-1) { fQuoteMode = FALSE; // Exit quote mode, nextCharLL(); // discard the E @@ -3780,11 +3780,11 @@ void RegexCompile::nextChar(RegexPatternChar &c) { // nextCharLL(); // get & discard the peeked char. c.fQuoted = TRUE; - + if (UTEXT_FULL_TEXT_IN_CHUNK(fRXPat->fPattern, fPatternLength)) { int32_t endIndex = (int32_t)pos; c.fChar = u_unescapeAt(uregex_ucstr_unescape_charAt, &endIndex, (int32_t)fPatternLength, (void *)fRXPat->fPattern->chunkContents); - + if (endIndex == pos) { error(U_REGEX_BAD_ESCAPE_SEQUENCE); } @@ -3793,7 +3793,7 @@ void RegexCompile::nextChar(RegexPatternChar &c) { } else { int32_t offset = 0; struct URegexUTextUnescapeCharContext context = U_REGEX_UTEXT_UNESCAPE_CONTEXT(fRXPat->fPattern); - + UTEXT_SETNATIVEINDEX(fRXPat->fPattern, pos); c.fChar = u_unescapeAt(uregex_utext_unescape_charAt, &offset, INT32_MAX, &context); @@ -3836,8 +3836,8 @@ void RegexCompile::nextChar(RegexPatternChar &c) { c.fChar >>= 3; } } - c.fQuoted = TRUE; - } + c.fQuoted = TRUE; + } else if (peekCharLL() == chQ) { // "\Q" enter quote mode, which will continue until "\E" fQuoteMode = TRUE; @@ -3885,7 +3885,7 @@ UChar32 RegexCompile::scanNamedChar() { error(U_REGEX_PROPERTY_SYNTAX); return 0; } - + UnicodeString charName; for (;;) { nextChar(fC); @@ -3898,7 +3898,7 @@ UChar32 RegexCompile::scanNamedChar() { } charName.append(fC.fChar); } - + char name[100]; if (!uprv_isInvariantUString(charName.getBuffer(), charName.length()) || (uint32_t)charName.length()>=sizeof(name)) { @@ -4006,7 +4006,7 @@ UnicodeSet *RegexCompile::scanPosixProp() { // Scan for a closing ]. A little tricky because there are some perverse // edge cases possible. "[:abc\Qdef:] \E]" is a valid non-property expression, - // ending on the second closing ]. + // ending on the second closing ]. UnicodeString propName; UBool negated = FALSE; @@ -4017,7 +4017,7 @@ UnicodeSet *RegexCompile::scanPosixProp() { negated = TRUE; nextChar(fC); } - + // Scan for the closing ":]", collecting the property name along the way. UBool sawPropSetTerminator = FALSE; for (;;) { @@ -4035,7 +4035,7 @@ UnicodeSet *RegexCompile::scanPosixProp() { break; } } - + if (sawPropSetTerminator) { uset = createSetForProperty(propName, negated); } @@ -4068,7 +4068,7 @@ static inline void addIdentifierIgnorable(UnicodeSet *set, UErrorCode& ec) { // Create a Unicode Set from a Unicode Property expression. // This is common code underlying both \p{...} ane [:...:] expressions. // Includes trying the Java "properties" that aren't supported as -// normal ICU UnicodeSet properties +// normal ICU UnicodeSet properties // static const UChar posSetPrefix[] = {0x5b, 0x5c, 0x70, 0x7b, 0}; // "[\p{" static const UChar negSetPrefix[] = {0x5b, 0x5c, 0x50, 0x7b, 0}; // "[\P{" @@ -4076,7 +4076,7 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB UnicodeString setExpr; UnicodeSet *set; uint32_t usetFlags = 0; - + if (U_FAILURE(*fStatus)) { return NULL; } @@ -4101,13 +4101,13 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB } delete set; set = NULL; - + // // The property as it was didn't work. - // Do [:word:]. It is not recognized as a property by UnicodeSet. "word" not standard POSIX + // Do [:word:]. It is not recognized as a property by UnicodeSet. "word" not standard POSIX // or standard Java, but many other regular expression packages do recognize it. - + if (propName.caseCompare(UNICODE_STRING_SIMPLE("word"), 0) == 0) { *fStatus = U_ZERO_ERROR; set = new UnicodeSet(*(fRXPat->fStaticSets[URX_ISWORD_SET])); @@ -4127,7 +4127,7 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB // InCombiningMarksforSymbols -> InCombiningDiacriticalMarksforSymbols. // // Note on Spaces: either "InCombiningMarksForSymbols" or "InCombining Marks for Symbols" - // is accepted by Java. The property part of the name is compared + // is accepted by Java. The property part of the name is compared // case-insenstively. The spaces must be exactly as shown, either // all there, or all omitted, with exactly one at each position // if they are present. From checking against JDK 1.6 @@ -4146,7 +4146,7 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB else if (mPropName.compare(UNICODE_STRING_SIMPLE("all")) == 0) { mPropName = UNICODE_STRING_SIMPLE("javaValidCodePoint"); } - + // See if the property looks like a Java "InBlockName", which // we will recast as "Block=BlockName" // @@ -4270,7 +4270,7 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB set = NULL; } error(*fStatus); - return NULL; + return NULL; } diff --git a/icu4c/source/i18n/regeximp.h b/icu4c/source/i18n/regeximp.h index 31d333caf77..bdf84030993 100644 --- a/icu4c/source/i18n/regeximp.h +++ b/icu4c/source/i18n/regeximp.h @@ -1,5 +1,5 @@ // -// Copyright (C) 2002-2012 International Business Machines Corporation +// Copyright (C) 2002-2013 International Business Machines Corporation // and others. All rights reserved. // // file: regeximp.h @@ -22,11 +22,11 @@ U_NAMESPACE_BEGIN -// For debugging, define REGEX_DEBUG +// For debugging, define REGEX_DEBUG // To define with configure, -// ./runConfigureICU --enable-debug --disable-release Linux CPPFLAGS="-DREGEX_DEBUG" +// CPPFLAGS="-DREGEX_DEBUG" ./runConfigureICU --enable-debug --disable-release Linux -#ifdef REGEX_DEBUG +#ifdef REGEX_DEBUG // // debugging options. Enable one or more of the three #defines immediately following // @@ -46,19 +46,6 @@ U_NAMESPACE_BEGIN #define REGEX_SCAN_DEBUG_PRINTF(a) #endif -#ifdef REGEX_DUMP_DEBUG -#define REGEX_DUMP_DEBUG_PRINTF(a) printf a -#else -#define REGEX_DUMP_DEBUG_PRINTF(a) -#endif - -#ifdef REGEX_RUN_DEBUG -#define REGEX_RUN_DEBUG_PRINTF(a) printf a -#define REGEX_DUMP_DEBUG_PRINTF(a) printf a -#else -#define REGEX_RUN_DEBUG_PRINTF(a) -#endif - // // Opcode types In the compiled form of the regexp, these are the type, or opcodes, @@ -373,9 +360,9 @@ class CaseFoldingUTextIterator: public UMemory { CaseFoldingUTextIterator(UText &text); ~CaseFoldingUTextIterator(); - UChar32 next(); // Next case folded character + UChar32 next(); // Next case folded character - UBool inExpansion(); // True if last char returned from next() and the + UBool inExpansion(); // True if last char returned from next() and the // next to be returned both originated from a string // folding of the same code point from the orignal UText. private: @@ -398,9 +385,9 @@ class CaseFoldingUCharIterator: public UMemory { CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limit); ~CaseFoldingUCharIterator(); - UChar32 next(); // Next case folded character + UChar32 next(); // Next case folded character - UBool inExpansion(); // True if last char returned from next() and the + UBool inExpansion(); // True if last char returned from next() and the // next to be returned both originated from a string // folding of the same code point from the orignal UText. diff --git a/icu4c/source/i18n/rematch.cpp b/icu4c/source/i18n/rematch.cpp index 306b416adb0..6ffe61058e7 100644 --- a/icu4c/source/i18n/rematch.cpp +++ b/icu4c/source/i18n/rematch.cpp @@ -2720,7 +2720,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { int32_t opType; // the opcode int32_t opValue; // and the operand value. - #ifdef REGEX_RUN_DEBUG +#ifdef REGEX_RUN_DEBUG if (fTraceDebug) { printf("MatchAt(startIdx=%ld)\n", startIdx); @@ -2730,7 +2730,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { if (c<32 || c>256) { c = '.'; } - REGEX_DUMP_DEBUG_PRINTF(("%c", c)); + printf("%c", c); c = UTEXT_NEXT32(fPattern->fPattern); } @@ -2748,7 +2748,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { printf("\n"); printf("\n"); } - #endif +#endif if (U_FAILURE(status)) { return; @@ -2778,23 +2778,17 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { // One iteration of the loop per pattern operation performed. // for (;;) { -#if 0 - if (_heapchk() != _HEAPOK) { - fprintf(stderr, "Heap Trouble\n"); - } -#endif - op = (int32_t)pat[fp->fPatIdx]; opType = URX_TYPE(op); opValue = URX_VAL(op); - #ifdef REGEX_RUN_DEBUG +#ifdef REGEX_RUN_DEBUG if (fTraceDebug) { UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); printf("inputIdx=%ld inputChar=%x sp=%3ld activeLimit=%ld ", fp->fInputIdx, UTEXT_CURRENT32(fInputText), (int64_t *)fp-fStack->getBuffer(), fActiveLimit); fPattern->dumpOp(fp->fPatIdx); } - #endif +#endif fp->fPatIdx++; switch (opType) { @@ -4188,16 +4182,17 @@ breakFromLoop: fLastMatchEnd = fMatchEnd; fMatchStart = startIdx; fMatchEnd = fp->fInputIdx; - if (fTraceDebug) { - REGEX_RUN_DEBUG_PRINTF(("Match. start=%ld end=%ld\n\n", fMatchStart, fMatchEnd)); - } } - else - { - if (fTraceDebug) { - REGEX_RUN_DEBUG_PRINTF(("No match\n\n")); + +#ifdef REGEX_RUN_DEBUG + if (fTraceDebug) { + if (isMatch) { + printf("Match. start=%ld end=%ld\n\n", fMatchStart, fMatchEnd); + } else { + printf("No match\n\n"); } } +#endif fFrame = fp; // The active stack frame when the engine stopped. // Contains the capture group results that we need to @@ -4228,8 +4223,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu int32_t opValue; // and the operand value. #ifdef REGEX_RUN_DEBUG - if (fTraceDebug) - { + if (fTraceDebug) { printf("MatchAt(startIdx=%d)\n", startIdx); printf("Original Pattern: "); UChar32 c = utext_next32From(fPattern->fPattern, 0); @@ -4237,7 +4231,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu if (c<32 || c>256) { c = '.'; } - REGEX_DUMP_DEBUG_PRINTF(("%c", c)); + printf("%c", c); c = UTEXT_NEXT32(fPattern->fPattern); } @@ -4287,12 +4281,6 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu // One iteration of the loop per pattern operation performed. // for (;;) { -#if 0 - if (_heapchk() != _HEAPOK) { - fprintf(stderr, "Heap Trouble\n"); - } -#endif - op = (int32_t)pat[fp->fPatIdx]; opType = URX_TYPE(op); opValue = URX_VAL(op); @@ -5627,20 +5615,21 @@ breakFromLoop: fLastMatchEnd = fMatchEnd; fMatchStart = startIdx; fMatchEnd = fp->fInputIdx; - if (fTraceDebug) { - REGEX_RUN_DEBUG_PRINTF(("Match. start=%ld end=%ld\n\n", fMatchStart, fMatchEnd)); - } } - else - { - if (fTraceDebug) { - REGEX_RUN_DEBUG_PRINTF(("No match\n\n")); + +#ifdef REGEX_RUN_DEBUG + if (fTraceDebug) { + if (isMatch) { + printf("Match. start=%ld end=%ld\n\n", fMatchStart, fMatchEnd); + } else { + printf("No match\n\n"); } } +#endif fFrame = fp; // The active stack frame when the engine stopped. - // Contains the capture group results that we need to - // access later. + // Contains the capture group results that we need to + // access later. return; } diff --git a/icu4c/source/i18n/repattrn.cpp b/icu4c/source/i18n/repattrn.cpp index 1454a093a38..fe6558c7d2e 100644 --- a/icu4c/source/i18n/repattrn.cpp +++ b/icu4c/source/i18n/repattrn.cpp @@ -3,7 +3,7 @@ // /* *************************************************************************** -* Copyright (C) 2002-2012 International Business Machines Corporation * +* Copyright (C) 2002-2013 International Business Machines Corporation * * and others. All rights reserved. * *************************************************************************** */ @@ -275,21 +275,21 @@ RegexPattern::compile(const UnicodeString ®ex, if (U_FAILURE(status)) { return NULL; } - + const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; - + if ((flags & ~allFlags) != 0) { status = U_REGEX_INVALID_FLAG; return NULL; } - + if ((flags & UREGEX_CANON_EQ) != 0) { status = U_REGEX_UNIMPLEMENTED; return NULL; } - + RegexPattern *This = new RegexPattern; if (This == NULL) { status = U_MEMORY_ALLOCATION_ERROR; @@ -301,15 +301,15 @@ RegexPattern::compile(const UnicodeString ®ex, return NULL; } This->fFlags = flags; - + RegexCompile compiler(This, status); compiler.compile(regex, pe, status); - + if (U_FAILURE(status)) { delete This; This = NULL; } - + return This; } @@ -355,7 +355,7 @@ RegexPattern::compile(UText *regex, RegexCompile compiler(This, status); compiler.compile(regex, pe, status); - + if (U_FAILURE(status)) { delete This; This = NULL; @@ -538,12 +538,12 @@ UnicodeString RegexPattern::pattern() const { int64_t nativeLen = utext_nativeLength(fPattern); int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error UnicodeString result; - + status = U_ZERO_ERROR; UChar *resultChars = result.getBuffer(len16); utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning result.releaseBuffer(len16); - + return result; } } @@ -622,8 +622,9 @@ int32_t RegexPattern::split(UText *input, // Debugging function only. // //--------------------------------------------------------------------- -#if defined(REGEX_DEBUG) void RegexPattern::dumpOp(int32_t index) const { + (void)index; // Suppress warnings in non-debug build. +#if defined(REGEX_DEBUG) static const char * const opNames[] = {URX_OPCODE_NAMES}; int32_t op = fCompiledPat->elementAti(index); int32_t val = URX_VAL(op); @@ -633,7 +634,7 @@ void RegexPattern::dumpOp(int32_t index) const { pinnedType = 0; } - REGEX_DUMP_DEBUG_PRINTF(("%4d %08x %-15s ", index, op, opNames[pinnedType])); + printf("%4d %08x %-15s ", index, op, opNames[pinnedType]); switch (type) { case URX_NOP: case URX_DOTANY: @@ -682,12 +683,12 @@ void RegexPattern::dumpOp(int32_t index) const { case URX_LOOP_C: case URX_LOOP_DOT_I: // types with an integer operand field. - REGEX_DUMP_DEBUG_PRINTF(("%d", val)); + printf("%d", val); break; case URX_ONECHAR: case URX_ONECHAR_I: - REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?')); + printf("%c", val<256?val:'?'); break; case URX_STRING: @@ -700,7 +701,7 @@ void RegexPattern::dumpOp(int32_t index) const { for (i=val; i= 256) {c = '.';} - REGEX_DUMP_DEBUG_PRINTF(("%c", c)); + printf("%c", c); } } break; @@ -712,7 +713,7 @@ void RegexPattern::dumpOp(int32_t index) const { UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val); set->toPattern(s, TRUE); for (int32_t i=0; itoPattern(s, TRUE); for (int32_t i=0; ifPattern, 0); + printf("Original Pattern: "); + UChar32 c = utext_next32From(fPattern, 0); while (c != U_SENTINEL) { if (c<32 || c>256) { c = '.'; } - REGEX_DUMP_DEBUG_PRINTF(("%c", c)); - - c = UTEXT_NEXT32(This->fPattern); - } - REGEX_DUMP_DEBUG_PRINTF(("\n")); - REGEX_DUMP_DEBUG_PRINTF((" Min Match Length: %d\n", This->fMinMatchLen)); - REGEX_DUMP_DEBUG_PRINTF((" Match Start Type: %s\n", START_OF_MATCH_STR(This->fStartType))); - if (This->fStartType == START_STRING) { - REGEX_DUMP_DEBUG_PRINTF((" Initial match string: \"")); - for (i=This->fInitialStringIdx; ifInitialStringIdx+This->fInitialStringLen; i++) { - REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i])); // TODO: non-printables, surrogates. + printf("%c", c); + + c = UTEXT_NEXT32(fPattern); + } + printf("\n"); + printf(" Min Match Length: %d\n", fMinMatchLen); + printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType)); + if (fStartType == START_STRING) { + printf(" Initial match string: \""); + for (i=fInitialStringIdx; ifStartType == START_SET) { - int32_t numSetChars = This->fInitialChars->size(); + } else if (fStartType == START_SET) { + int32_t numSetChars = fInitialChars->size(); if (numSetChars > 20) { numSetChars = 20; } - REGEX_DUMP_DEBUG_PRINTF((" Match First Chars : ")); + printf(" Match First Chars : "); for (i=0; ifInitialChars->charAt(i); + UChar32 c = fInitialChars->charAt(i); if (0x20fInitialChars->size()) { - REGEX_DUMP_DEBUG_PRINTF((" ...")); + if (numSetChars < fInitialChars->size()) { + printf(" ..."); } - REGEX_DUMP_DEBUG_PRINTF(("\n")); + printf("\n"); - } else if (This->fStartType == START_CHAR) { - REGEX_DUMP_DEBUG_PRINTF((" First char of Match : ")); - if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) { - REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar)); + } else if (fStartType == START_CHAR) { + printf(" First char of Match : "); + if (0x20 < fInitialChar && fInitialChar<0x7e) { + printf("%c\n", fInitialChar); } else { - REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar)); + printf("%#x\n", fInitialChar); } } - REGEX_DUMP_DEBUG_PRINTF(("\nIndex Binary Type Operand\n" \ - "-------------------------------------------\n")); - for (index = 0; indexfCompiledPat->size(); index++) { - This->dumpOp(index); + printf("\nIndex Binary Type Operand\n" \ + "-------------------------------------------\n"); + for (index = 0; indexsize(); index++) { + dumpOp(index); } - REGEX_DUMP_DEBUG_PRINTF(("\n\n")); -} + printf("\n\n"); #endif +} diff --git a/icu4c/source/i18n/unicode/regex.h b/icu4c/source/i18n/unicode/regex.h index e356a9e14f4..b2ef4ce33d9 100644 --- a/icu4c/source/i18n/unicode/regex.h +++ b/icu4c/source/i18n/unicode/regex.h @@ -68,21 +68,6 @@ class UVector; class UVector32; class UVector64; -#ifndef U_HIDE_INTERNAL_API -/** - * RBBIPatternDump Debug function, displays the compiled form of a pattern. - * @internal - */ -#ifdef REGEX_DEBUG -U_INTERNAL void U_EXPORT2 - RegexPatternDump(const RegexPattern *pat); -#else - #undef RegexPatternDump - #define RegexPatternDump(pat) -#endif -#endif /* U_HIDE_INTERNAL_API */ - - /** * Class RegexPattern represents a compiled regular expression. It includes @@ -613,11 +598,17 @@ private: // void init(); // Common initialization, for use by constructors. void zap(); // Common cleanup -#ifdef REGEX_DEBUG + void dumpOp(int32_t index) const; - friend void U_EXPORT2 RegexPatternDump(const RegexPattern *); -#endif + public: +#ifndef U_HIDE_INTERNAL_API + /** + * Dump a compiled pattern. Internal debug function. + * @internal + */ + void dumpPattern() const; +#endif }; diff --git a/icu4c/source/test/intltest/regextst.cpp b/icu4c/source/test/intltest/regextst.cpp index eb41f0bd682..fe0d8f609a5 100644 --- a/icu4c/source/test/intltest/regextst.cpp +++ b/icu4c/source/test/intltest/regextst.cpp @@ -145,7 +145,7 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch /** * Calls utext_openUTF8 after, potentially, converting invariant text from the compilation codepage - * into ASCII. + * into ASCII. * @see utext_openUTF8 */ static UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t length, UErrorCode *status); @@ -298,11 +298,11 @@ void RegexTest::assertUTextInvariant(const char *expected, UText *actual, const } /** - * Assumes utf-8 input + * Assumes utf-8 input */ #define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actual), __FILE__, __LINE__) /** - * Assumes Invariant input + * Assumes Invariant input */ #define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((expected), (actual), __FILE__, __LINE__) @@ -310,7 +310,7 @@ void RegexTest::assertUTextInvariant(const char *expected, UText *actual, const * This buffer ( inv_buf ) is used to hold the UTF-8 strings * passed into utext_openUTF8. An error will be given if * INV_BUFSIZ is too small. It's only used on EBCDIC systems. - */ + */ #define INV_BUFSIZ 2048 /* increase this if too small */ @@ -378,7 +378,7 @@ UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking, line, u_errorName(status)); return FALSE; } - if (line==376) { RegexPatternDump(REPattern);} + if (line==376) { REPattern->dumpPattern();} UnicodeString inputString(inputText); UnicodeString unEscapedInput = inputString.unescape(); @@ -414,7 +414,7 @@ UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking, } if (retVal == FALSE) { - RegexPatternDump(REPattern); + REPattern->dumpPattern(); } delete REPattern; @@ -441,12 +441,12 @@ UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look line, u_errorName(status)); return FALSE; } - + UnicodeString inputString(text, -1, US_INV); UnicodeString unEscapedInput = inputString.unescape(); LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status)); ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status); - + inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(), status); if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) { // UTF-8 does not allow unpaired surrogates, so this could actually happen @@ -457,7 +457,7 @@ UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look textChars = new char[inputUTF8Length+1]; unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias(), status); utext_openUTF8(&inputText, textChars, inputUTF8Length, &status); - + REMatcher = &REPattern->matcher(status)->reset(&inputText); if (U_FAILURE(status)) { errln("RegexTest failure in REPattern::matcher() at line %d (UTF8). Status = %s\n", @@ -490,7 +490,7 @@ UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look } if (retVal == FALSE) { - RegexPatternDump(REPattern); + REPattern->dumpPattern(); } delete REPattern; @@ -556,7 +556,7 @@ void RegexTest::regex_err(const char *pat, int32_t errLine, int32_t errCol, } } } - + delete callerPattern; utext_close(&patternText); } @@ -583,7 +583,7 @@ void RegexTest::Basic() { UErrorCode status = U_ZERO_ERROR; RegexPattern *pattern; pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unescape(), UREGEX_CASE_INSENSITIVE, pe, status); - RegexPatternDump(pattern); + pattern->dumpPattern(); RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz").unescape(), status); UBool result = m->find(); printf("result = %d\n", result); @@ -731,18 +731,18 @@ void RegexTest::UTextBasic() { utext_openUTF8(&pattern, str_abc, -1, &status); RegexMatcher matcher(&pattern, 0, status); REGEX_CHECK_STATUS; - + UText input = UTEXT_INITIALIZER; utext_openUTF8(&input, str_abc, -1, &status); REGEX_CHECK_STATUS; matcher.reset(&input); REGEX_CHECK_STATUS; REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); - + matcher.reset(matcher.inputText()); REGEX_CHECK_STATUS; REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); - + utext_close(&pattern); utext_close(&input); } @@ -1119,7 +1119,7 @@ void RegexTest::API_Match() { delete m; delete p; } - + // // Regions // @@ -1132,34 +1132,34 @@ void RegexTest::API_Match() { REGEX_ASSERT(m.regionEnd() == testString.length()); REGEX_ASSERT(m.hasTransparentBounds() == FALSE); REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); - + m.region(2,4, status); REGEX_CHECK_STATUS; REGEX_ASSERT(m.matches(status)); REGEX_ASSERT(m.start(status)==2); REGEX_ASSERT(m.end(status)==4); REGEX_CHECK_STATUS; - + m.reset(); REGEX_ASSERT(m.regionStart() == 0); REGEX_ASSERT(m.regionEnd() == testString.length()); - + UnicodeString shorterString("short"); m.reset(shorterString); REGEX_ASSERT(m.regionStart() == 0); REGEX_ASSERT(m.regionEnd() == shorterString.length()); - + REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); REGEX_ASSERT(&m == &m.reset()); REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); - + REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); REGEX_ASSERT(&m == &m.reset()); REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); - + REGEX_ASSERT(m.hasTransparentBounds() == FALSE); REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); REGEX_ASSERT(m.hasTransparentBounds() == TRUE); @@ -1170,9 +1170,9 @@ void RegexTest::API_Match() { REGEX_ASSERT(m.hasTransparentBounds() == FALSE); REGEX_ASSERT(&m == &m.reset()); REGEX_ASSERT(m.hasTransparentBounds() == FALSE); - + } - + // // hitEnd() and requireEnd() // @@ -1184,7 +1184,7 @@ void RegexTest::API_Match() { REGEX_ASSERT(m1.hitEnd() == TRUE); REGEX_ASSERT(m1.requireEnd() == FALSE); REGEX_CHECK_STATUS; - + status = U_ZERO_ERROR; RegexMatcher m2("a*", testString, 0, status); REGEX_ASSERT(m2.lookingAt(status) == TRUE); @@ -1222,7 +1222,7 @@ void RegexTest::API_Match() { #endif // - // Time Outs. + // Time Outs. // Note: These tests will need to be changed when the regexp engine is // able to detect and cut short the exponential time behavior on // this type of match. @@ -1250,22 +1250,22 @@ void RegexTest::API_Match() { REGEX_ASSERT(matcher.lookingAt(status) == FALSE); REGEX_CHECK_STATUS; } - + // // Stack Limits // { UErrorCode status = U_ZERO_ERROR; UnicodeString testString(1000000, 0x41, 1000000); // Length 1,000,000, filled with 'A' - + // Adding the capturing parentheses to the pattern "(A)+A$" inhibits optimizations // of the '+', and makes the stack frames larger. RegexMatcher matcher("(A)+A$", testString, 0, status); - + // With the default stack, this match should fail to run REGEX_ASSERT(matcher.lookingAt(status) == FALSE); REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); - + // With unlimited stack, it should run status = U_ZERO_ERROR; matcher.setStackLimit(0, status); @@ -1281,7 +1281,7 @@ void RegexTest::API_Match() { REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); REGEX_ASSERT(matcher.getStackLimit() == 10000); } - + // A pattern that doesn't save state should work with // a minimal sized stack { @@ -1294,7 +1294,7 @@ void RegexTest::API_Match() { REGEX_ASSERT(matcher.matches(status) == TRUE); REGEX_CHECK_STATUS; REGEX_ASSERT(matcher.getStackLimit() == 30); - + // Negative stack sizes should fail status = U_ZERO_ERROR; matcher.setStackLimit(1000, status); @@ -1303,7 +1303,7 @@ void RegexTest::API_Match() { REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); REGEX_ASSERT(matcher.getStackLimit() == 1000); } - + } @@ -1852,7 +1852,7 @@ void RegexTest::API_Match_UTF8() { regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status); REGEX_VERBOSE_TEXT(&input2); utext_openUChars(&empty, NULL, 0, &status); - + int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not nativelen (input1) ? */ int32_t input2Len = strlen("not abc"); @@ -1962,7 +1962,7 @@ void RegexTest::API_Match_UTF8() { delete m1; delete pat2; - + utext_close(&re); utext_close(&input1); utext_close(&input2); @@ -1983,10 +1983,10 @@ void RegexTest::API_Match_UTF8() { UText re=UTEXT_INITIALIZER; const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67)(.*) */ utext_openUTF8(&re, str_01234567_pat, -1, &status); - + RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); REGEX_CHECK_STATUS; - + UText input = UTEXT_INITIALIZER; const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ utext_openUTF8(&input, str_0123456789, -1, &status); @@ -2021,7 +2021,7 @@ void RegexTest::API_Match_UTF8() { REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE); matcher->lookingAt(status); - + UnicodeString dest; UText destText = UTEXT_INITIALIZER; utext_openUnicodeString(&destText, &dest, &status); @@ -2040,7 +2040,7 @@ void RegexTest::API_Match_UTF8() { // destText is now immutable, reopen it utext_close(&destText); utext_openUnicodeString(&destText, &dest, &status); - + result = matcher->group(0, NULL, status); REGEX_CHECK_STATUS; REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); @@ -2049,7 +2049,7 @@ void RegexTest::API_Match_UTF8() { REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); - + result = matcher->group(1, NULL, status); REGEX_CHECK_STATUS; const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */ @@ -2059,7 +2059,7 @@ void RegexTest::API_Match_UTF8() { REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); REGEX_ASSERT_UTEXT_UTF8(str_234567, result); - + result = matcher->group(2, NULL, status); REGEX_CHECK_STATUS; const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */ @@ -2069,7 +2069,7 @@ void RegexTest::API_Match_UTF8() { REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); REGEX_ASSERT_UTEXT_UTF8(str_45, result); - + result = matcher->group(3, NULL, status); REGEX_CHECK_STATUS; const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */ @@ -2087,7 +2087,7 @@ void RegexTest::API_Match_UTF8() { delete matcher; delete pat; - + utext_close(&destText); utext_close(&input); utext_close(&re); @@ -2148,7 +2148,7 @@ void RegexTest::API_Match_UTF8() { delete matcher; delete pat; - + utext_close(&input); utext_close(&re); } @@ -2166,7 +2166,7 @@ void RegexTest::API_Match_UTF8() { utext_openUTF8(&re, str_Gabcabc, -1, &status); RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); - + REGEX_CHECK_STATUS; UText input = UTEXT_INITIALIZER; const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */ @@ -2188,7 +2188,7 @@ void RegexTest::API_Match_UTF8() { delete matcher; delete pat; - + utext_close(&input); utext_close(&re); } @@ -2228,7 +2228,7 @@ void RegexTest::API_Match_UTF8() { REGEX_ASSERT(m.end(status) == i); } REGEX_ASSERT(i==20); - + utext_close(&s); } { @@ -2250,7 +2250,7 @@ void RegexTest::API_Match_UTF8() { REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i)); } REGEX_ASSERT(i==5); - + utext_close(&s); } @@ -2278,7 +2278,7 @@ void RegexTest::API_Match_UTF8() { delete m; delete p; } - + // // Regions // @@ -2290,42 +2290,42 @@ void RegexTest::API_Match_UTF8() { REGEX_VERBOSE_TEXT(&testPattern); regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &status); REGEX_VERBOSE_TEXT(&testText); - + RegexMatcher m(&testPattern, &testText, 0, status); REGEX_CHECK_STATUS; REGEX_ASSERT(m.regionStart() == 0); REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); REGEX_ASSERT(m.hasTransparentBounds() == FALSE); REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); - + m.region(2,4, status); REGEX_CHECK_STATUS; REGEX_ASSERT(m.matches(status)); REGEX_ASSERT(m.start(status)==2); REGEX_ASSERT(m.end(status)==4); REGEX_CHECK_STATUS; - + m.reset(); REGEX_ASSERT(m.regionStart() == 0); REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); - + regextst_openUTF8FromInvariant(&testText, "short", -1, &status); REGEX_VERBOSE_TEXT(&testText); m.reset(&testText); REGEX_ASSERT(m.regionStart() == 0); REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short")); - + REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); REGEX_ASSERT(&m == &m.reset()); REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); - + REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); REGEX_ASSERT(&m == &m.reset()); REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); - + REGEX_ASSERT(m.hasTransparentBounds() == FALSE); REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); REGEX_ASSERT(m.hasTransparentBounds() == TRUE); @@ -2336,11 +2336,11 @@ void RegexTest::API_Match_UTF8() { REGEX_ASSERT(m.hasTransparentBounds() == FALSE); REGEX_ASSERT(&m == &m.reset()); REGEX_ASSERT(m.hasTransparentBounds() == FALSE); - + utext_close(&testText); utext_close(&testPattern); } - + // // hitEnd() and requireEnd() // @@ -2352,13 +2352,13 @@ void RegexTest::API_Match_UTF8() { const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */ utext_openUTF8(&testPattern, str_, -1, &status); utext_openUTF8(&testText, str_aabb, -1, &status); - + RegexMatcher m1(&testPattern, &testText, 0, status); REGEX_ASSERT(m1.lookingAt(status) == TRUE); REGEX_ASSERT(m1.hitEnd() == TRUE); REGEX_ASSERT(m1.requireEnd() == FALSE); REGEX_CHECK_STATUS; - + status = U_ZERO_ERROR; const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */ utext_openUTF8(&testPattern, str_a, -1, &status); @@ -2376,7 +2376,7 @@ void RegexTest::API_Match_UTF8() { REGEX_ASSERT(m3.hitEnd() == TRUE); REGEX_ASSERT(m3.requireEnd() == TRUE); REGEX_CHECK_STATUS; - + utext_close(&testText); utext_close(&testPattern); } @@ -2402,7 +2402,7 @@ void RegexTest::API_Replace_UTF8() { REGEX_VERBOSE_TEXT(&re); RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); REGEX_CHECK_STATUS; - + char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */ // 012345678901234567 UText dataText = UTEXT_INITIALIZER; @@ -2418,9 +2418,9 @@ void RegexTest::API_Replace_UTF8() { UText destText = UTEXT_INITIALIZER; utext_openUnicodeString(&destText, &dest, &status); UText *result; - + UText replText = UTEXT_INITIALIZER; - + const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */ utext_openUTF8(&replText, str_yz, -1, &status); REGEX_VERBOSE_TEXT(&replText); @@ -2452,7 +2452,7 @@ void RegexTest::API_Replace_UTF8() { const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx...abx.. */ utext_openUTF8(&dataText, str_abxabxabx, -1, &status); matcher->reset(&dataText); - + result = matcher->replaceFirst(&replText, NULL, status); REGEX_CHECK_STATUS; REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); @@ -2477,7 +2477,7 @@ void RegexTest::API_Replace_UTF8() { // utext_openUTF8(&dataText, NULL, 0, &status); matcher->reset(&dataText); - + result = matcher->replaceFirst(&replText, NULL, status); REGEX_CHECK_STATUS; REGEX_ASSERT_UTEXT_UTF8("", result); @@ -2501,7 +2501,7 @@ void RegexTest::API_Replace_UTF8() { // utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.." matcher->reset(&dataText); - + utext_openUTF8(&replText, NULL, 0, &status); result = matcher->replaceFirst(&replText, NULL, status); REGEX_CHECK_STATUS; @@ -2565,7 +2565,7 @@ void RegexTest::API_Replace_UTF8() { utext_openUTF8(&dataText, str_abcdefg, -1, &status); RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText); REGEX_CHECK_STATUS; - + const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */ utext_openUTF8(&replText, str_11, -1, &status); result = matcher2->replaceFirst(&replText, NULL, status); @@ -2578,8 +2578,8 @@ void RegexTest::API_Replace_UTF8() { REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); - - const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */ + + const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */ utext_openUTF8(&replText, str_v, -1, &status); REGEX_VERBOSE_TEXT(&replText); result = matcher2->replaceFirst(&replText, NULL, status); @@ -2592,7 +2592,7 @@ void RegexTest::API_Replace_UTF8() { REGEX_CHECK_STATUS; REGEX_ASSERT(result == &destText); REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); - + const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x00 }; /* $ by itself, no group number $$$ */ utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status); result = matcher2->replaceFirst(&replText, NULL, status); @@ -2614,7 +2614,7 @@ void RegexTest::API_Replace_UTF8() { supplDigitChars[24] = 0x9F; supplDigitChars[25] = 0x8F; utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status); - + result = matcher2->replaceFirst(&replText, NULL, status); REGEX_CHECK_STATUS; const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplemental Digit 1 bc.defg */ @@ -2644,7 +2644,7 @@ void RegexTest::API_Replace_UTF8() { utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status); utext_openUTF8(&replText, str_u0043, -1, &status); matcher->reset(&dataText); - + result = matcher->replaceAll(&replText, NULL, status); REGEX_CHECK_STATUS; const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */ @@ -2664,7 +2664,7 @@ void RegexTest::API_Replace_UTF8() { matcher->reset(&dataText); unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008 A" - // 0123456789 + // 0123456789 expected[2] = 0xF0; expected[3] = 0x90; expected[4] = 0x80; @@ -2692,10 +2692,10 @@ const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */ utext_openUTF8(&re, str_ssee, -1, &status); utext_openUTF8(&dataText, str_blah, -1, &status); utext_openUTF8(&replText, str_ooh, -1, &status); - + RegexMatcher m(&re, 0, status); REGEX_CHECK_STATUS; - + UnicodeString result; UText resultText = UTEXT_INITIALIZER; utext_openUnicodeString(&resultText, &result, &status); @@ -2736,7 +2736,7 @@ const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */ m.appendTail(&resultText, status); const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */ REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText); - + utext_close(&resultText); } @@ -2744,7 +2744,7 @@ const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */ delete pat2; delete matcher; delete pat; - + utext_close(&dataText); utext_close(&replText); utext_close(&destText); @@ -2769,7 +2769,7 @@ void RegexTest::API_Pattern_UTF8() { UText re2 = UTEXT_INITIALIZER; UErrorCode status = U_ZERO_ERROR; UParseError pe; - + const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d, 0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */ const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */ utext_openUTF8(&re1, str_abcalmz, -1, &status); @@ -2818,7 +2818,7 @@ void RegexTest::API_Pattern_UTF8() { delete pat1a; delete pat1; delete pat2; - + utext_close(&re1); utext_close(&re2); @@ -2832,13 +2832,13 @@ void RegexTest::API_Pattern_UTF8() { UText pattern = UTEXT_INITIALIZER; const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \p{L}+ */ utext_openUTF8(&pattern, str_pL, -1, &status); - + RegexPattern *pSource = RegexPattern::compile(&pattern, 0, status); RegexPattern *pClone = pSource->clone(); delete pSource; RegexMatcher *mFromClone = pClone->matcher(status); REGEX_CHECK_STATUS; - + UText input = UTEXT_INITIALIZER; const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */ utext_openUTF8(&input, str_HelloWorld, -1, &status); @@ -2850,7 +2850,7 @@ void RegexTest::API_Pattern_UTF8() { REGEX_ASSERT(mFromClone->find() == FALSE); delete mFromClone; delete pClone; - + utext_close(&input); utext_close(&pattern); } @@ -2862,7 +2862,7 @@ void RegexTest::API_Pattern_UTF8() { UErrorCode status = U_ZERO_ERROR; UText pattern = UTEXT_INITIALIZER; UText input = UTEXT_INITIALIZER; - + const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x20, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */ utext_openUTF8(&input, str_randominput, -1, &status); @@ -2870,17 +2870,17 @@ void RegexTest::API_Pattern_UTF8() { utext_openUTF8(&pattern, str_dotstar, -1, &status); REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE); REGEX_CHECK_STATUS; - + const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ utext_openUTF8(&pattern, str_abc, -1, &status); REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE); REGEX_CHECK_STATUS; - + const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* .*nput */ utext_openUTF8(&pattern, str_nput, -1, &status); REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE); REGEX_CHECK_STATUS; - + utext_openUTF8(&pattern, str_randominput, -1, &status); REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE); REGEX_CHECK_STATUS; @@ -2889,13 +2889,13 @@ void RegexTest::API_Pattern_UTF8() { utext_openUTF8(&pattern, str_u, -1, &status); REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE); REGEX_CHECK_STATUS; - + utext_openUTF8(&input, str_abc, -1, &status); utext_openUTF8(&pattern, str_abc, -1, &status); status = U_INDEX_OUTOFBOUNDS_ERROR; REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE); REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); - + utext_close(&input); utext_close(&pattern); } @@ -3286,7 +3286,7 @@ void RegexTest::regex_find(const UnicodeString &pattern, int32_t line) { UnicodeString unEscapedInput; UnicodeString deTaggedInput; - + int32_t patternUTF8Length, inputUTF8Length; char *patternChars = NULL, *inputChars = NULL; UText patternText = UTEXT_INITIALIZER; @@ -3313,7 +3313,7 @@ void RegexTest::regex_find(const UnicodeString &pattern, int32_t regionEnd = -1; int32_t regionStartUTF8 = -1; int32_t regionEndUTF8 = -1; - + // // Compile the caller's pattern @@ -3331,7 +3331,7 @@ void RegexTest::regex_find(const UnicodeString &pattern, if (flags.indexOf((UChar)0x6d) >= 0) { // 'm' flag bflags |= UREGEX_MULTILINE; } - + if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES; } @@ -3367,16 +3367,16 @@ void RegexTest::regex_find(const UnicodeString &pattern, UTF8Converter = ucnv_open("UTF8", &status); ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status); - + patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status); status = U_ZERO_ERROR; // buffer overflow patternChars = new char[patternUTF8Length+1]; pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status); utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status); - + if (status == U_ZERO_ERROR) { UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status); - + if (status != U_ZERO_ERROR) { #if UCONFIG_NO_BREAK_ITERATION==1 // 'v' test flag means that the test pattern should not compile if ICU was configured @@ -3398,7 +3398,7 @@ void RegexTest::regex_find(const UnicodeString &pattern, } } } - + if (UTF8Pattern == NULL) { // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d", srcPath, line); @@ -3406,7 +3406,7 @@ void RegexTest::regex_find(const UnicodeString &pattern, } if (flags.indexOf((UChar)0x64) >= 0) { // 'd' flag - RegexPatternDump(callerPattern); + callerPattern->dumpPattern(); } if (flags.indexOf((UChar)0x45) >= 0) { // 'E' flag @@ -3428,7 +3428,7 @@ void RegexTest::regex_find(const UnicodeString &pattern, numFinds = i; } } - + // 'M' flag. Use matches() instead of find() if (flags.indexOf((UChar)0x4d) >= 0) { useMatchesFunc = TRUE; @@ -3483,7 +3483,7 @@ void RegexTest::regex_find(const UnicodeString &pattern, if (flags.indexOf((UChar)0x74) >= 0) { // 't' trace flag matcher->setTrace(TRUE); } - + if (UTF8Pattern != NULL) { inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status); status = U_ZERO_ERROR; // buffer overflow @@ -3495,7 +3495,7 @@ void RegexTest::regex_find(const UnicodeString &pattern, UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText); REGEX_CHECK_STATUS_L(line); } - + if (UTF8Matcher == NULL) { // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d", srcPath, line); @@ -3509,7 +3509,7 @@ void RegexTest::regex_find(const UnicodeString &pattern, if (UTF8Matcher != NULL) { if (regionStart>=0) (void) utextOffsetToNative(&inputText, regionStart, regionStartUTF8); if (regionEnd>=0) (void) utextOffsetToNative(&inputText, regionEnd, regionEndUTF8); - + // Fill out the native index UVector info. // Only need 1 loop, from above we know groupStarts.size() = groupEnds.size() for (i=0; i= 0) { @@ -3559,8 +3559,8 @@ void RegexTest::regex_find(const UnicodeString &pattern, UTF8Matcher->useTransparentBounds(TRUE); } } - - + + // // Do a find on the de-tagged input using the caller's pattern @@ -3635,7 +3635,7 @@ void RegexTest::regex_find(const UnicodeString &pattern, failed = TRUE; goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now. } - + int32_t expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti(i)); int32_t expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF8.elementAti(i)); if (matcher->end(i, status) != expectedEnd) { @@ -3672,7 +3672,7 @@ void RegexTest::regex_find(const UnicodeString &pattern, errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE (UTF8)", line); failed = TRUE; } - + if ((flags.indexOf((UChar)0x79) >= 0) && // 'y' flag: RequireEnd() == true matcher->requireEnd() == FALSE) { errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE", line); @@ -3682,7 +3682,7 @@ void RegexTest::regex_find(const UnicodeString &pattern, errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE (UTF8)", line); failed = TRUE; } - + if ((flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false matcher->hitEnd() == TRUE) { errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE", line); @@ -3692,7 +3692,7 @@ void RegexTest::regex_find(const UnicodeString &pattern, errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE (UTF8)", line); failed = TRUE; } - + if ((flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true matcher->hitEnd() == FALSE) { errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE", line); @@ -3716,7 +3716,7 @@ cleanupAndReturn: delete UTF8Pattern; delete matcher; delete callerPattern; - + utext_close(&inputText); delete[] inputChars; utext_close(&patternText); @@ -3792,7 +3792,7 @@ void RegexTest::Errors() { //------------------------------------------------------------------------------- -// +// // Read a text data file, convert it to UChars, and return the data // in one big UChar * buffer, which the caller must delete. // @@ -4135,7 +4135,7 @@ void RegexTest::PerlTests() { lineNum, expected?"":"no ", found?"":"no " ); continue; } - + // Don't try to check expected results if there is no match. // (Some have stuff in the expected fields) if (!found) { @@ -4433,7 +4433,7 @@ void RegexTest::PerlTestsUTF8() { if (flagStr.indexOf(UChar_x) != -1) { flags |= UREGEX_COMMENTS; } - + // // Put the pattern in a UTF-8 UText // @@ -4530,7 +4530,7 @@ void RegexTest::PerlTestsUTF8() { lineNum, expected?"":"no ", found?"":"no " ); continue; } - + // Don't try to check expected results if there is no match. // (Some have stuff in the expected fields) if (!found) { @@ -4673,10 +4673,10 @@ void RegexTest::PerlTestsUTF8() { delete fieldPat; delete [] testData; - + utext_close(&patternText); utext_close(&inputText); - + delete [] patternChars; delete [] inputChars; @@ -4740,12 +4740,12 @@ U_CDECL_END void RegexTest::Callbacks() { { // Getter returns NULLs if no callback has been set - + // The variables that the getter will fill in. // Init to non-null values so that the action of the getter can be seen. const void *returnedContext = &returnedContext; URegexMatchCallback *returnedFn = &testCallBackFn; - + UErrorCode status = U_ZERO_ERROR; RegexMatcher matcher("x", 0, status); REGEX_CHECK_STATUS; @@ -4754,7 +4754,7 @@ void RegexTest::Callbacks() { REGEX_ASSERT(returnedFn == NULL); REGEX_ASSERT(returnedContext == NULL); } - + { // Set and Get work callBackContext cbInfo = {this, 0, 0, 0}; @@ -4769,7 +4769,7 @@ void RegexTest::Callbacks() { REGEX_CHECK_STATUS; REGEX_ASSERT(returnedFn == testCallBackFn); REGEX_ASSERT(returnedContext == &cbInfo); - + // A short-running match shouldn't invoke the callback status = U_ZERO_ERROR; cbInfo.reset(1); @@ -4778,7 +4778,7 @@ void RegexTest::Callbacks() { REGEX_ASSERT(matcher.matches(status)); REGEX_CHECK_STATUS; REGEX_ASSERT(cbInfo.numCalls == 0); - + // A medium-length match that runs long enough to invoke the // callback, but not so long that the callback aborts it. status = U_ZERO_ERROR; @@ -4788,7 +4788,7 @@ void RegexTest::Callbacks() { REGEX_ASSERT(matcher.matches(status)==FALSE); REGEX_CHECK_STATUS; REGEX_ASSERT(cbInfo.numCalls > 0); - + // A longer running match that the callback function will abort. status = U_ZERO_ERROR; cbInfo.reset(4); @@ -4798,7 +4798,7 @@ void RegexTest::Callbacks() { REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); REGEX_ASSERT(cbInfo.numCalls == 4); } - + } @@ -4832,12 +4832,12 @@ U_CDECL_END void RegexTest::FindProgressCallbacks() { { // Getter returns NULLs if no callback has been set - + // The variables that the getter will fill in. // Init to non-null values so that the action of the getter can be seen. const void *returnedContext = &returnedContext; URegexFindProgressCallback *returnedFn = &testProgressCallBackFn; - + UErrorCode status = U_ZERO_ERROR; RegexMatcher matcher("x", 0, status); REGEX_CHECK_STATUS; @@ -4846,7 +4846,7 @@ void RegexTest::FindProgressCallbacks() { REGEX_ASSERT(returnedFn == NULL); REGEX_ASSERT(returnedContext == NULL); } - + { // Set and Get work progressCallBackContext cbInfo = {this, 0, 0, 0}; @@ -4861,7 +4861,7 @@ void RegexTest::FindProgressCallbacks() { REGEX_CHECK_STATUS; REGEX_ASSERT(returnedFn == testProgressCallBackFn); REGEX_ASSERT(returnedContext == &cbInfo); - + // A short-running match should NOT invoke the callback. status = U_ZERO_ERROR; cbInfo.reset(100); @@ -4873,7 +4873,7 @@ void RegexTest::FindProgressCallbacks() { REGEX_ASSERT(matcher.find(0, status)); REGEX_CHECK_STATUS; REGEX_ASSERT(cbInfo.numCalls == 0); - + // A medium running match that causes matcher.find() to invoke our callback for each index. status = U_ZERO_ERROR; s = "aaaaaaaaaaaaaaaaaaab"; @@ -4882,7 +4882,7 @@ void RegexTest::FindProgressCallbacks() { REGEX_ASSERT(matcher.find(0, status)==FALSE); REGEX_CHECK_STATUS; REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25); - + // A longer running match that causes matcher.find() to invoke our callback which we cancel/interrupt at some point. status = U_ZERO_ERROR; UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab"; @@ -4906,7 +4906,7 @@ void RegexTest::FindProgressCallbacks() { REGEX_CHECK_STATUS; #endif } - + } @@ -4925,7 +4925,7 @@ void RegexTest::PreAllocatedUTextCAPI () { UText patternText = UTEXT_INITIALIZER; UnicodeString buffer; UText bufferText = UTEXT_INITIALIZER; - + utext_openUnicodeString(&bufferText, &buffer, &status); /* @@ -4942,7 +4942,7 @@ void RegexTest::PreAllocatedUTextCAPI () { regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status); u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2); utext_openUChars(&text2, text2Chars, -1, &status); - + regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status); re = uregex_openUText(&patternText, 0, NULL, &status); @@ -4954,7 +4954,7 @@ void RegexTest::PreAllocatedUTextCAPI () { utext_setNativeIndex(resultText, 0); utext_setNativeIndex(&text1, 0); REGEX_ASSERT(testUTextEqual(resultText, &text1)); - + resultText = uregex_getUText(re, &bufferText, &status); REGEX_CHECK_STATUS; REGEX_ASSERT(resultText == &bufferText); @@ -4970,7 +4970,7 @@ void RegexTest::PreAllocatedUTextCAPI () { utext_setNativeIndex(resultText, 0); utext_setNativeIndex(&text2, 0); REGEX_ASSERT(testUTextEqual(resultText, &text2)); - + uregex_close(re); utext_close(&text1); utext_close(&text2); @@ -5016,7 +5016,7 @@ void RegexTest::PreAllocatedUTextCAPI () { uregex_close(re); } - + /* * replaceFirst() */ @@ -5025,7 +5025,7 @@ void RegexTest::PreAllocatedUTextCAPI () { UChar text2[80]; UText replText = UTEXT_INITIALIZER; UText *result; - + status = U_ZERO_ERROR; u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); u_uastrncpy(text2, "No match here.", sizeof(text2)/2); @@ -5049,7 +5049,7 @@ void RegexTest::PreAllocatedUTextCAPI () { REGEX_CHECK_STATUS; REGEX_ASSERT(result == &bufferText); REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result); - + /* Unicode escapes */ uregex_setText(re, text1, -1, &status); regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a", -1, &status); @@ -5106,7 +5106,7 @@ void RegexTest::PreAllocatedUTextCAPI () { * splitUText() uses the C++ API directly, and the UnicodeString version uses mutable UTexts, * so we don't need to test it here. */ - + utext_close(&bufferText); utext_close(&patternText); } @@ -5181,7 +5181,7 @@ void RegexTest::Bug8479() { delete pMatcher; } } - + // Bug 7029 void RegexTest::Bug7029() { @@ -5199,11 +5199,11 @@ void RegexTest::Bug7029() { // Bug 9283 // This test is checking for the existance of any supplemental characters that case-fold -// to a bmp character. +// to a bmp character. // -// At the time of this writing there are none. If any should appear in a subsequent release -// of Unicode, the code in regular expressions compilation that determines the longest -// posssible match for a literal string will need to be enhanced. +// At the time of this writing there are none. If any should appear in a subsequent release +// of Unicode, the code in regular expressions compilation that determines the longest +// posssible match for a literal string will need to be enhanced. // // See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength() // for details on what to do in case of a failure of this test. @@ -5247,7 +5247,7 @@ void RegexTest::Bug10459() { URegularExpression *icu_re = uregex_openUText(utext_pat, 0, NULL, &status); REGEX_CHECK_STATUS; - + uregex_setUText(icu_re, utext_txt, &status); REGEX_CHECK_STATUS; @@ -5256,7 +5256,7 @@ void RegexTest::Bug10459() { // It should set an U_REGEX_INVALID_STATE. UChar buf[100]; - int32_t len = uregex_group(icu_re, 0, buf, LENGTHOF(buf), &status); + int32_t len = uregex_group(icu_re, 0, buf, LENGTHOF(buf), &status); REGEX_ASSERT(status == U_REGEX_INVALID_STATE); REGEX_ASSERT(len == 0); -- 2.40.0