// It is generated by the Perl script "regexcst.pl" from
// the rule parser state definitions file "regexcst.txt".
//
-// Copyright (C) 2002-2007 International Business Machines Corporation
+// Copyright (C) 2002-2015 International Business Machines Corporation
// and others. All rights reserved.
//
//---------------------------------------------------------------------------------
// Character classes for regex pattern scanning.
//
static const uint8_t kRuleSet_digit_char = 128;
- static const uint8_t kRuleSet_rule_char = 129;
+ static const uint8_t kRuleSet_ascii_letter = 129;
+ static const uint8_t kRuleSet_rule_char = 130;
enum Regex_PatternParseAction {
- doLiteralChar,
- doSetEnd,
- doBackslashA,
- doSetBeginUnion,
- doNOP,
- doSetBackslash_w,
- doSetRange,
- doBackslashG,
- doPerlInline,
- doSetAddDash,
- doIntevalLowerDigit,
- doProperty,
- doBackslashX,
- doOpenAtomicParen,
- doSetLiteralEscaped,
- doPatFinish,
- doSetBackslash_D,
- doSetDifference2,
- doNamedChar,
- doNGPlus,
+ doIntervalUpperDigit,
+ doPossessiveOpt,
doOpenLookBehindNeg,
- doIntervalError,
- doIntervalSame,
- doBackRef,
- doPlus,
- doOpenCaptureParen,
- doMismatchedParenErr,
- doBeginMatchMode,
+ doDotAny,
+ doSetBackslash_D,
+ doSetLiteral,
+ doSetBackslash_S,
doEscapeError,
- doOpenNonCaptureParen,
+ doSetBackslash_W,
doDollar,
- doSetProp,
- doIntervalUpperDigit,
- doSetBegin,
- doBackslashs,
- doOpenLookBehind,
+ doBackslashb,
+ doSetOpError,
+ doBackslashG,
+ doPatStart,
+ doMismatchedParenErr,
+ doPossessivePlus,
+ doBackslashX,
+ doSetBackslash_s,
+ doSetBackslash_w,
+ doBackslashW,
+ doBackslashw,
doSetMatchMode,
doOrOperator,
- doCaret,
- doMatchModeParen,
- doStar,
- doOpt,
- doMatchMode,
- doSuppressComments,
- doPossessiveInterval,
doOpenLookAheadNeg,
- doBackslashW,
- doCloseParen,
- doSetOpError,
+ doOpenLookBehind,
+ doBackslashS,
+ doBeginMatchMode,
+ doNOP,
+ doSetProp,
+ doBackslashA,
doIntervalInit,
- doSetFinish,
- doSetIntersection2,
- doNGStar,
- doEnterQuoteMode,
- doSetAddAmp,
- doBackslashB,
- doBackslashw,
- doPossessiveOpt,
+ doOpenCaptureParen,
+ doNGPlus,
+ doIntervalError,
+ doSetDifference2,
+ doNGOpt,
+ doEscapedLiteralChar,
doSetNegate,
- doRuleError,
- doBackslashb,
- doConditionalExpr,
- doPossessivePlus,
+ doSetBegin,
+ doMatchModeParen,
+ doLiteralChar,
+ doOpt,
+ doSetIntersection2,
doBadOpenParenType,
+ doSuppressComments,
+ doCloseParen,
+ doPatFinish,
+ doSetBeginUnion,
+ doSetBackslash_d,
+ doProperty,
doNGInterval,
- doSetLiteral,
- doSetNamedChar,
- doBackslashd,
- doSetBeginDifference1,
- doBackslashD,
- doExit,
- doSetBackslash_S,
+ doNGStar,
+ doOpenLookAhead,
+ doSetBeginIntersection1,
+ doBeginNamedCapture,
doInterval,
+ doMatchMode,
doSetNoCloseError,
- doNGOpt,
+ doSetBeginDifference1,
+ doPlus,
+ doBackslashD,
+ doSetLiteralEscaped,
+ doContinueNamedCapture,
doSetPosixProp,
- doBackslashS,
- doBackslashZ,
- doSetBeginIntersection1,
- doSetBackslash_W,
- doSetBackslash_d,
- doOpenLookAhead,
- doBadModeFlag,
- doPatStart,
+ doBackslashz,
doSetNamedRange,
doPossessiveStar,
- doEscapedLiteralChar,
- doSetBackslash_s,
- doBackslashz,
- doDotAny,
+ doBadModeFlag,
+ doContinueNamedBackRef,
+ doPerlInline,
+ doBackslashd,
+ doOpenNonCaptureParen,
+ doSetEnd,
+ doSetAddDash,
+ doSetFinish,
+ doCaret,
+ doConditionalExpr,
+ doExit,
+ doNamedChar,
+ doSetRange,
+ doPossessiveInterval,
+ doBackslashs,
+ doIntervalSame,
+ doEnterQuoteMode,
+ doOpenAtomicParen,
+ doSetNamedChar,
+ doRuleError,
+ doStar,
+ doSetAddAmp,
+ doBackslashB,
+ doCompleteNamedBackRef,
+ doBackslashZ,
+ doIntevalLowerDigit,
+ doBeginNamedBackRef,
+ doBackRef,
+ doBadNamedCapture,
rbbiLastAction};
//-------------------------------------------------------------------------------
{doNOP, 0, 0, 0, TRUE}
, {doPatStart, 255, 2,0, FALSE} // 1 start
, {doLiteralChar, 254, 14,0, TRUE} // 2 term
- , {doLiteralChar, 129, 14,0, TRUE} // 3
- , {doSetBegin, 91 /* [ */, 104, 182, TRUE} // 4
+ , {doLiteralChar, 130, 14,0, TRUE} // 3
+ , {doSetBegin, 91 /* [ */, 118, 196, TRUE} // 4
, {doNOP, 40 /* ( */, 27,0, TRUE} // 5
, {doDotAny, 46 /* . */, 14,0, TRUE} // 6
, {doCaret, 94 /* ^ */, 14,0, TRUE} // 7
, {doDollar, 36 /* $ */, 14,0, TRUE} // 8
- , {doNOP, 92 /* \ */, 84,0, TRUE} // 9
+ , {doNOP, 92 /* \ */, 89,0, TRUE} // 9
, {doOrOperator, 124 /* | */, 2,0, TRUE} // 10
, {doCloseParen, 41 /* ) */, 255,0, TRUE} // 11
, {doPatFinish, 253, 2,0, FALSE} // 12
- , {doRuleError, 255, 183,0, FALSE} // 13
- , {doNOP, 42 /* * */, 63,0, TRUE} // 14 expr-quant
- , {doNOP, 43 /* + */, 66,0, TRUE} // 15
- , {doNOP, 63 /* ? */, 69,0, TRUE} // 16
- , {doIntervalInit, 123 /* { */, 72,0, TRUE} // 17
+ , {doRuleError, 255, 197,0, FALSE} // 13
+ , {doNOP, 42 /* * */, 68,0, TRUE} // 14 expr-quant
+ , {doNOP, 43 /* + */, 71,0, TRUE} // 15
+ , {doNOP, 63 /* ? */, 74,0, TRUE} // 16
+ , {doIntervalInit, 123 /* { */, 77,0, TRUE} // 17
, {doNOP, 40 /* ( */, 23,0, TRUE} // 18
, {doNOP, 255, 20,0, FALSE} // 19
, {doOrOperator, 124 /* | */, 2,0, TRUE} // 20 expr-cont
, {doNOP, 255, 2,0, FALSE} // 22
, {doSuppressComments, 63 /* ? */, 25,0, TRUE} // 23 open-paren-quant
, {doNOP, 255, 27,0, FALSE} // 24
- , {doNOP, 35 /* # */, 49, 14, TRUE} // 25 open-paren-quant2
+ , {doNOP, 35 /* # */, 50, 14, TRUE} // 25 open-paren-quant2
, {doNOP, 255, 29,0, FALSE} // 26
, {doSuppressComments, 63 /* ? */, 29,0, TRUE} // 27 open-paren
, {doOpenCaptureParen, 255, 2, 14, FALSE} // 28
, {doOpenLookAhead, 61 /* = */, 2, 20, TRUE} // 31
, {doOpenLookAheadNeg, 33 /* ! */, 2, 20, TRUE} // 32
, {doNOP, 60 /* < */, 46,0, TRUE} // 33
- , {doNOP, 35 /* # */, 49, 2, TRUE} // 34
- , {doBeginMatchMode, 105 /* i */, 52,0, FALSE} // 35
- , {doBeginMatchMode, 100 /* d */, 52,0, FALSE} // 36
- , {doBeginMatchMode, 109 /* m */, 52,0, FALSE} // 37
- , {doBeginMatchMode, 115 /* s */, 52,0, FALSE} // 38
- , {doBeginMatchMode, 117 /* u */, 52,0, FALSE} // 39
- , {doBeginMatchMode, 119 /* w */, 52,0, FALSE} // 40
- , {doBeginMatchMode, 120 /* x */, 52,0, FALSE} // 41
- , {doBeginMatchMode, 45 /* - */, 52,0, FALSE} // 42
- , {doConditionalExpr, 40 /* ( */, 183,0, TRUE} // 43
- , {doPerlInline, 123 /* { */, 183,0, TRUE} // 44
- , {doBadOpenParenType, 255, 183,0, FALSE} // 45
+ , {doNOP, 35 /* # */, 50, 2, TRUE} // 34
+ , {doBeginMatchMode, 105 /* i */, 53,0, FALSE} // 35
+ , {doBeginMatchMode, 100 /* d */, 53,0, FALSE} // 36
+ , {doBeginMatchMode, 109 /* m */, 53,0, FALSE} // 37
+ , {doBeginMatchMode, 115 /* s */, 53,0, FALSE} // 38
+ , {doBeginMatchMode, 117 /* u */, 53,0, FALSE} // 39
+ , {doBeginMatchMode, 119 /* w */, 53,0, FALSE} // 40
+ , {doBeginMatchMode, 120 /* x */, 53,0, FALSE} // 41
+ , {doBeginMatchMode, 45 /* - */, 53,0, FALSE} // 42
+ , {doConditionalExpr, 40 /* ( */, 197,0, TRUE} // 43
+ , {doPerlInline, 123 /* { */, 197,0, TRUE} // 44
+ , {doBadOpenParenType, 255, 197,0, FALSE} // 45
, {doOpenLookBehind, 61 /* = */, 2, 20, TRUE} // 46 open-paren-lookbehind
, {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE} // 47
- , {doBadOpenParenType, 255, 183,0, FALSE} // 48
- , {doNOP, 41 /* ) */, 255,0, TRUE} // 49 paren-comment
- , {doMismatchedParenErr, 253, 183,0, FALSE} // 50
- , {doNOP, 255, 49,0, TRUE} // 51
- , {doMatchMode, 105 /* i */, 52,0, TRUE} // 52 paren-flag
- , {doMatchMode, 100 /* d */, 52,0, TRUE} // 53
- , {doMatchMode, 109 /* m */, 52,0, TRUE} // 54
- , {doMatchMode, 115 /* s */, 52,0, TRUE} // 55
- , {doMatchMode, 117 /* u */, 52,0, TRUE} // 56
- , {doMatchMode, 119 /* w */, 52,0, TRUE} // 57
- , {doMatchMode, 120 /* x */, 52,0, TRUE} // 58
- , {doMatchMode, 45 /* - */, 52,0, TRUE} // 59
- , {doSetMatchMode, 41 /* ) */, 2,0, TRUE} // 60
- , {doMatchModeParen, 58 /* : */, 2, 14, TRUE} // 61
- , {doBadModeFlag, 255, 183,0, FALSE} // 62
- , {doNGStar, 63 /* ? */, 20,0, TRUE} // 63 quant-star
- , {doPossessiveStar, 43 /* + */, 20,0, TRUE} // 64
- , {doStar, 255, 20,0, FALSE} // 65
- , {doNGPlus, 63 /* ? */, 20,0, TRUE} // 66 quant-plus
- , {doPossessivePlus, 43 /* + */, 20,0, TRUE} // 67
- , {doPlus, 255, 20,0, FALSE} // 68
- , {doNGOpt, 63 /* ? */, 20,0, TRUE} // 69 quant-opt
- , {doPossessiveOpt, 43 /* + */, 20,0, TRUE} // 70
- , {doOpt, 255, 20,0, FALSE} // 71
- , {doNOP, 128, 74,0, FALSE} // 72 interval-open
- , {doIntervalError, 255, 183,0, FALSE} // 73
- , {doIntevalLowerDigit, 128, 74,0, TRUE} // 74 interval-lower
- , {doNOP, 44 /* , */, 78,0, TRUE} // 75
- , {doIntervalSame, 125 /* } */, 81,0, TRUE} // 76
- , {doIntervalError, 255, 183,0, FALSE} // 77
- , {doIntervalUpperDigit, 128, 78,0, TRUE} // 78 interval-upper
- , {doNOP, 125 /* } */, 81,0, TRUE} // 79
- , {doIntervalError, 255, 183,0, FALSE} // 80
- , {doNGInterval, 63 /* ? */, 20,0, TRUE} // 81 interval-type
- , {doPossessiveInterval, 43 /* + */, 20,0, TRUE} // 82
- , {doInterval, 255, 20,0, FALSE} // 83
- , {doBackslashA, 65 /* A */, 2,0, TRUE} // 84 backslash
- , {doBackslashB, 66 /* B */, 2,0, TRUE} // 85
- , {doBackslashb, 98 /* b */, 2,0, TRUE} // 86
- , {doBackslashd, 100 /* d */, 14,0, TRUE} // 87
- , {doBackslashD, 68 /* D */, 14,0, TRUE} // 88
- , {doBackslashG, 71 /* G */, 2,0, TRUE} // 89
- , {doNamedChar, 78 /* N */, 14,0, FALSE} // 90
- , {doProperty, 112 /* p */, 14,0, FALSE} // 91
- , {doProperty, 80 /* P */, 14,0, FALSE} // 92
- , {doEnterQuoteMode, 81 /* Q */, 2,0, TRUE} // 93
- , {doBackslashS, 83 /* S */, 14,0, TRUE} // 94
- , {doBackslashs, 115 /* s */, 14,0, TRUE} // 95
- , {doBackslashW, 87 /* W */, 14,0, TRUE} // 96
- , {doBackslashw, 119 /* w */, 14,0, TRUE} // 97
- , {doBackslashX, 88 /* X */, 14,0, TRUE} // 98
- , {doBackslashZ, 90 /* Z */, 2,0, TRUE} // 99
- , {doBackslashz, 122 /* z */, 2,0, TRUE} // 100
- , {doBackRef, 128, 14,0, TRUE} // 101
- , {doEscapeError, 253, 183,0, FALSE} // 102
- , {doEscapedLiteralChar, 255, 14,0, TRUE} // 103
- , {doSetNegate, 94 /* ^ */, 107,0, TRUE} // 104 set-open
- , {doSetPosixProp, 58 /* : */, 109,0, FALSE} // 105
- , {doNOP, 255, 107,0, FALSE} // 106
- , {doSetLiteral, 93 /* ] */, 122,0, TRUE} // 107 set-open2
- , {doNOP, 255, 112,0, FALSE} // 108
- , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 109 set-posix
- , {doNOP, 58 /* : */, 112,0, FALSE} // 110
- , {doRuleError, 255, 183,0, FALSE} // 111
- , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 112 set-start
- , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 113
- , {doNOP, 92 /* \ */, 172,0, TRUE} // 114
- , {doNOP, 45 /* - */, 118,0, TRUE} // 115
- , {doNOP, 38 /* & */, 120,0, TRUE} // 116
- , {doSetLiteral, 255, 122,0, TRUE} // 117
- , {doRuleError, 45 /* - */, 183,0, FALSE} // 118 set-start-dash
- , {doSetAddDash, 255, 122,0, FALSE} // 119
- , {doRuleError, 38 /* & */, 183,0, FALSE} // 120 set-start-amp
- , {doSetAddAmp, 255, 122,0, FALSE} // 121
- , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 122 set-after-lit
- , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 123
- , {doNOP, 45 /* - */, 159,0, TRUE} // 124
- , {doNOP, 38 /* & */, 150,0, TRUE} // 125
- , {doNOP, 92 /* \ */, 172,0, TRUE} // 126
- , {doSetNoCloseError, 253, 183,0, FALSE} // 127
- , {doSetLiteral, 255, 122,0, TRUE} // 128
- , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 129 set-after-set
- , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 130
- , {doNOP, 45 /* - */, 152,0, TRUE} // 131
- , {doNOP, 38 /* & */, 147,0, TRUE} // 132
- , {doNOP, 92 /* \ */, 172,0, TRUE} // 133
- , {doSetNoCloseError, 253, 183,0, FALSE} // 134
- , {doSetLiteral, 255, 122,0, TRUE} // 135
- , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 136 set-after-range
- , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 137
- , {doNOP, 45 /* - */, 155,0, TRUE} // 138
- , {doNOP, 38 /* & */, 157,0, TRUE} // 139
- , {doNOP, 92 /* \ */, 172,0, TRUE} // 140
- , {doSetNoCloseError, 253, 183,0, FALSE} // 141
- , {doSetLiteral, 255, 122,0, TRUE} // 142
- , {doSetBeginUnion, 91 /* [ */, 104, 129, TRUE} // 143 set-after-op
- , {doSetOpError, 93 /* ] */, 183,0, FALSE} // 144
- , {doNOP, 92 /* \ */, 172,0, TRUE} // 145
- , {doSetLiteral, 255, 122,0, TRUE} // 146
- , {doSetBeginIntersection1, 91 /* [ */, 104, 129, TRUE} // 147 set-set-amp
- , {doSetIntersection2, 38 /* & */, 143,0, TRUE} // 148
- , {doSetAddAmp, 255, 122,0, FALSE} // 149
- , {doSetIntersection2, 38 /* & */, 143,0, TRUE} // 150 set-lit-amp
- , {doSetAddAmp, 255, 122,0, FALSE} // 151
- , {doSetBeginDifference1, 91 /* [ */, 104, 129, TRUE} // 152 set-set-dash
- , {doSetDifference2, 45 /* - */, 143,0, TRUE} // 153
- , {doSetAddDash, 255, 122,0, FALSE} // 154
- , {doSetDifference2, 45 /* - */, 143,0, TRUE} // 155 set-range-dash
- , {doSetAddDash, 255, 122,0, FALSE} // 156
- , {doSetIntersection2, 38 /* & */, 143,0, TRUE} // 157 set-range-amp
- , {doSetAddAmp, 255, 122,0, FALSE} // 158
- , {doSetDifference2, 45 /* - */, 143,0, TRUE} // 159 set-lit-dash
- , {doSetAddDash, 91 /* [ */, 122,0, FALSE} // 160
- , {doSetAddDash, 93 /* ] */, 122,0, FALSE} // 161
- , {doNOP, 92 /* \ */, 164,0, TRUE} // 162
- , {doSetRange, 255, 136,0, TRUE} // 163
- , {doSetOpError, 115 /* s */, 183,0, FALSE} // 164 set-lit-dash-escape
- , {doSetOpError, 83 /* S */, 183,0, FALSE} // 165
- , {doSetOpError, 119 /* w */, 183,0, FALSE} // 166
- , {doSetOpError, 87 /* W */, 183,0, FALSE} // 167
- , {doSetOpError, 100 /* d */, 183,0, FALSE} // 168
- , {doSetOpError, 68 /* D */, 183,0, FALSE} // 169
- , {doSetNamedRange, 78 /* N */, 136,0, FALSE} // 170
- , {doSetRange, 255, 136,0, TRUE} // 171
- , {doSetProp, 112 /* p */, 129,0, FALSE} // 172 set-escape
- , {doSetProp, 80 /* P */, 129,0, FALSE} // 173
- , {doSetNamedChar, 78 /* N */, 122,0, FALSE} // 174
- , {doSetBackslash_s, 115 /* s */, 136,0, TRUE} // 175
- , {doSetBackslash_S, 83 /* S */, 136,0, TRUE} // 176
- , {doSetBackslash_w, 119 /* w */, 136,0, TRUE} // 177
- , {doSetBackslash_W, 87 /* W */, 136,0, TRUE} // 178
- , {doSetBackslash_d, 100 /* d */, 136,0, TRUE} // 179
- , {doSetBackslash_D, 68 /* D */, 136,0, TRUE} // 180
- , {doSetLiteralEscaped, 255, 122,0, TRUE} // 181
- , {doSetFinish, 255, 14,0, FALSE} // 182 set-finish
- , {doExit, 255, 183,0, TRUE} // 183 errorDeath
+ , {doBeginNamedCapture, 129, 64,0, FALSE} // 48
+ , {doBadOpenParenType, 255, 197,0, FALSE} // 49
+ , {doNOP, 41 /* ) */, 255,0, TRUE} // 50 paren-comment
+ , {doMismatchedParenErr, 253, 197,0, FALSE} // 51
+ , {doNOP, 255, 50,0, TRUE} // 52
+ , {doMatchMode, 105 /* i */, 53,0, TRUE} // 53 paren-flag
+ , {doMatchMode, 100 /* d */, 53,0, TRUE} // 54
+ , {doMatchMode, 109 /* m */, 53,0, TRUE} // 55
+ , {doMatchMode, 115 /* s */, 53,0, TRUE} // 56
+ , {doMatchMode, 117 /* u */, 53,0, TRUE} // 57
+ , {doMatchMode, 119 /* w */, 53,0, TRUE} // 58
+ , {doMatchMode, 120 /* x */, 53,0, TRUE} // 59
+ , {doMatchMode, 45 /* - */, 53,0, TRUE} // 60
+ , {doSetMatchMode, 41 /* ) */, 2,0, TRUE} // 61
+ , {doMatchModeParen, 58 /* : */, 2, 14, TRUE} // 62
+ , {doBadModeFlag, 255, 197,0, FALSE} // 63
+ , {doContinueNamedCapture, 129, 64,0, TRUE} // 64 named-capture
+ , {doContinueNamedCapture, 128, 64,0, TRUE} // 65
+ , {doOpenCaptureParen, 62 /* > */, 2, 14, TRUE} // 66
+ , {doBadNamedCapture, 255, 197,0, FALSE} // 67
+ , {doNGStar, 63 /* ? */, 20,0, TRUE} // 68 quant-star
+ , {doPossessiveStar, 43 /* + */, 20,0, TRUE} // 69
+ , {doStar, 255, 20,0, FALSE} // 70
+ , {doNGPlus, 63 /* ? */, 20,0, TRUE} // 71 quant-plus
+ , {doPossessivePlus, 43 /* + */, 20,0, TRUE} // 72
+ , {doPlus, 255, 20,0, FALSE} // 73
+ , {doNGOpt, 63 /* ? */, 20,0, TRUE} // 74 quant-opt
+ , {doPossessiveOpt, 43 /* + */, 20,0, TRUE} // 75
+ , {doOpt, 255, 20,0, FALSE} // 76
+ , {doNOP, 128, 79,0, FALSE} // 77 interval-open
+ , {doIntervalError, 255, 197,0, FALSE} // 78
+ , {doIntevalLowerDigit, 128, 79,0, TRUE} // 79 interval-lower
+ , {doNOP, 44 /* , */, 83,0, TRUE} // 80
+ , {doIntervalSame, 125 /* } */, 86,0, TRUE} // 81
+ , {doIntervalError, 255, 197,0, FALSE} // 82
+ , {doIntervalUpperDigit, 128, 83,0, TRUE} // 83 interval-upper
+ , {doNOP, 125 /* } */, 86,0, TRUE} // 84
+ , {doIntervalError, 255, 197,0, FALSE} // 85
+ , {doNGInterval, 63 /* ? */, 20,0, TRUE} // 86 interval-type
+ , {doPossessiveInterval, 43 /* + */, 20,0, TRUE} // 87
+ , {doInterval, 255, 20,0, FALSE} // 88
+ , {doBackslashA, 65 /* A */, 2,0, TRUE} // 89 backslash
+ , {doBackslashB, 66 /* B */, 2,0, TRUE} // 90
+ , {doBackslashb, 98 /* b */, 2,0, TRUE} // 91
+ , {doBackslashd, 100 /* d */, 14,0, TRUE} // 92
+ , {doBackslashD, 68 /* D */, 14,0, TRUE} // 93
+ , {doBackslashG, 71 /* G */, 2,0, TRUE} // 94
+ , {doNOP, 107 /* k */, 110,0, TRUE} // 95
+ , {doNamedChar, 78 /* N */, 14,0, FALSE} // 96
+ , {doProperty, 112 /* p */, 14,0, FALSE} // 97
+ , {doProperty, 80 /* P */, 14,0, FALSE} // 98
+ , {doEnterQuoteMode, 81 /* Q */, 2,0, TRUE} // 99
+ , {doBackslashS, 83 /* S */, 14,0, TRUE} // 100
+ , {doBackslashs, 115 /* s */, 14,0, TRUE} // 101
+ , {doBackslashW, 87 /* W */, 14,0, TRUE} // 102
+ , {doBackslashw, 119 /* w */, 14,0, TRUE} // 103
+ , {doBackslashX, 88 /* X */, 14,0, TRUE} // 104
+ , {doBackslashZ, 90 /* Z */, 2,0, TRUE} // 105
+ , {doBackslashz, 122 /* z */, 2,0, TRUE} // 106
+ , {doBackRef, 128, 14,0, TRUE} // 107
+ , {doEscapeError, 253, 197,0, FALSE} // 108
+ , {doEscapedLiteralChar, 255, 14,0, TRUE} // 109
+ , {doBeginNamedBackRef, 60 /* < */, 112,0, TRUE} // 110 named-backref
+ , {doBadNamedCapture, 255, 197,0, FALSE} // 111
+ , {doContinueNamedBackRef, 129, 114,0, TRUE} // 112 named-backref-2
+ , {doBadNamedCapture, 255, 197,0, FALSE} // 113
+ , {doContinueNamedBackRef, 129, 114,0, TRUE} // 114 named-backref-3
+ , {doContinueNamedBackRef, 128, 114,0, TRUE} // 115
+ , {doCompleteNamedBackRef, 62 /* > */, 14,0, TRUE} // 116
+ , {doBadNamedCapture, 255, 197,0, FALSE} // 117
+ , {doSetNegate, 94 /* ^ */, 121,0, TRUE} // 118 set-open
+ , {doSetPosixProp, 58 /* : */, 123,0, FALSE} // 119
+ , {doNOP, 255, 121,0, FALSE} // 120
+ , {doSetLiteral, 93 /* ] */, 136,0, TRUE} // 121 set-open2
+ , {doNOP, 255, 126,0, FALSE} // 122
+ , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 123 set-posix
+ , {doNOP, 58 /* : */, 126,0, FALSE} // 124
+ , {doRuleError, 255, 197,0, FALSE} // 125
+ , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 126 set-start
+ , {doSetBeginUnion, 91 /* [ */, 118, 143, TRUE} // 127
+ , {doNOP, 92 /* \ */, 186,0, TRUE} // 128
+ , {doNOP, 45 /* - */, 132,0, TRUE} // 129
+ , {doNOP, 38 /* & */, 134,0, TRUE} // 130
+ , {doSetLiteral, 255, 136,0, TRUE} // 131
+ , {doRuleError, 45 /* - */, 197,0, FALSE} // 132 set-start-dash
+ , {doSetAddDash, 255, 136,0, FALSE} // 133
+ , {doRuleError, 38 /* & */, 197,0, FALSE} // 134 set-start-amp
+ , {doSetAddAmp, 255, 136,0, FALSE} // 135
+ , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 136 set-after-lit
+ , {doSetBeginUnion, 91 /* [ */, 118, 143, TRUE} // 137
+ , {doNOP, 45 /* - */, 173,0, TRUE} // 138
+ , {doNOP, 38 /* & */, 164,0, TRUE} // 139
+ , {doNOP, 92 /* \ */, 186,0, TRUE} // 140
+ , {doSetNoCloseError, 253, 197,0, FALSE} // 141
+ , {doSetLiteral, 255, 136,0, TRUE} // 142
+ , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 143 set-after-set
+ , {doSetBeginUnion, 91 /* [ */, 118, 143, TRUE} // 144
+ , {doNOP, 45 /* - */, 166,0, TRUE} // 145
+ , {doNOP, 38 /* & */, 161,0, TRUE} // 146
+ , {doNOP, 92 /* \ */, 186,0, TRUE} // 147
+ , {doSetNoCloseError, 253, 197,0, FALSE} // 148
+ , {doSetLiteral, 255, 136,0, TRUE} // 149
+ , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 150 set-after-range
+ , {doSetBeginUnion, 91 /* [ */, 118, 143, TRUE} // 151
+ , {doNOP, 45 /* - */, 169,0, TRUE} // 152
+ , {doNOP, 38 /* & */, 171,0, TRUE} // 153
+ , {doNOP, 92 /* \ */, 186,0, TRUE} // 154
+ , {doSetNoCloseError, 253, 197,0, FALSE} // 155
+ , {doSetLiteral, 255, 136,0, TRUE} // 156
+ , {doSetBeginUnion, 91 /* [ */, 118, 143, TRUE} // 157 set-after-op
+ , {doSetOpError, 93 /* ] */, 197,0, FALSE} // 158
+ , {doNOP, 92 /* \ */, 186,0, TRUE} // 159
+ , {doSetLiteral, 255, 136,0, TRUE} // 160
+ , {doSetBeginIntersection1, 91 /* [ */, 118, 143, TRUE} // 161 set-set-amp
+ , {doSetIntersection2, 38 /* & */, 157,0, TRUE} // 162
+ , {doSetAddAmp, 255, 136,0, FALSE} // 163
+ , {doSetIntersection2, 38 /* & */, 157,0, TRUE} // 164 set-lit-amp
+ , {doSetAddAmp, 255, 136,0, FALSE} // 165
+ , {doSetBeginDifference1, 91 /* [ */, 118, 143, TRUE} // 166 set-set-dash
+ , {doSetDifference2, 45 /* - */, 157,0, TRUE} // 167
+ , {doSetAddDash, 255, 136,0, FALSE} // 168
+ , {doSetDifference2, 45 /* - */, 157,0, TRUE} // 169 set-range-dash
+ , {doSetAddDash, 255, 136,0, FALSE} // 170
+ , {doSetIntersection2, 38 /* & */, 157,0, TRUE} // 171 set-range-amp
+ , {doSetAddAmp, 255, 136,0, FALSE} // 172
+ , {doSetDifference2, 45 /* - */, 157,0, TRUE} // 173 set-lit-dash
+ , {doSetAddDash, 91 /* [ */, 136,0, FALSE} // 174
+ , {doSetAddDash, 93 /* ] */, 136,0, FALSE} // 175
+ , {doNOP, 92 /* \ */, 178,0, TRUE} // 176
+ , {doSetRange, 255, 150,0, TRUE} // 177
+ , {doSetOpError, 115 /* s */, 197,0, FALSE} // 178 set-lit-dash-escape
+ , {doSetOpError, 83 /* S */, 197,0, FALSE} // 179
+ , {doSetOpError, 119 /* w */, 197,0, FALSE} // 180
+ , {doSetOpError, 87 /* W */, 197,0, FALSE} // 181
+ , {doSetOpError, 100 /* d */, 197,0, FALSE} // 182
+ , {doSetOpError, 68 /* D */, 197,0, FALSE} // 183
+ , {doSetNamedRange, 78 /* N */, 150,0, FALSE} // 184
+ , {doSetRange, 255, 150,0, TRUE} // 185
+ , {doSetProp, 112 /* p */, 143,0, FALSE} // 186 set-escape
+ , {doSetProp, 80 /* P */, 143,0, FALSE} // 187
+ , {doSetNamedChar, 78 /* N */, 136,0, FALSE} // 188
+ , {doSetBackslash_s, 115 /* s */, 150,0, TRUE} // 189
+ , {doSetBackslash_S, 83 /* S */, 150,0, TRUE} // 190
+ , {doSetBackslash_w, 119 /* w */, 150,0, TRUE} // 191
+ , {doSetBackslash_W, 87 /* W */, 150,0, TRUE} // 192
+ , {doSetBackslash_d, 100 /* d */, 150,0, TRUE} // 193
+ , {doSetBackslash_D, 68 /* D */, 150,0, TRUE} // 194
+ , {doSetLiteralEscaped, 255, 136,0, TRUE} // 195
+ , {doSetFinish, 255, 14,0, FALSE} // 196 set-finish
+ , {doExit, 255, 197,0, TRUE} // 197 errorDeath
};
static const char * const RegexStateNames[] = { 0,
"start",
0,
"open-paren-lookbehind",
0,
+ 0,
0,
"paren-comment",
0,
0,
0,
0,
+ 0,
+ "named-capture",
+ 0,
+ 0,
0,
"quant-star",
0,
0,
0,
0,
+ 0,
+ 0,
+ "named-backref",
+ 0,
+ "named-backref-2",
+ 0,
+ "named-backref-3",
+ 0,
+ 0,
0,
"set-open",
0,
case 25: name = "TestBug11371";
if (exec) TestBug11371();
break;
+ case 26: name = "TestBug11480";
+ if (exec) TestBug11480();
+ break;
+ case 27: name = "NamedCapture";
+ if (exec) NamedCapture();
+ break;
+ case 28: name = "NamedCaptureLimits";
+ if (exec) NamedCaptureLimits();
+ break;
default: name = "";
break; //needed to end loop
}
REGEX_ASSERT(dest == "The value of $1 is bc.defg");
dest = matcher2->replaceFirst("$ by itself, no group number $$$", status);
- REGEX_CHECK_STATUS;
- REGEX_ASSERT(dest == "$ by itself, no group number $$$defg");
+ REGEX_ASSERT(U_FAILURE(status));
+ status = U_ZERO_ERROR;
UnicodeString replacement = UNICODE_STRING_SIMPLE("Supplemental Digit 1 $\\U0001D7CF.");
replacement = replacement.unescape();
REGEX_ASSERT(result == &destText);
REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);
- const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x00 }; /* $ by itself, no group number $$$ */
+ const char str_byitselfnogroupnumber[] = { 0x5c, 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c,
+ 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62,
+ 0x65, 0x72, 0x20, 0x5c, 0x24, 0x5c, 0x24, 0x5c, 0x24, 0x00 }; /* \$ by itself, no group number \$\$\$ */
utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status);
result = matcher2->replaceFirst(&replText, NULL, status);
REGEX_CHECK_STATUS;
UnicodeString stringToSplit("first:second:third");
UText *textToSplit = utext_openUnicodeString(NULL, &stringToSplit, &status);
REGEX_CHECK_STATUS;
-
+
UText *splits[10] = {NULL};
int32_t numFields = matcher.split(textToSplit, splits, UPRV_LENGTHOF(splits), status);
REGEX_CHECK_STATUS;
/* Unicode escapes */
uregex_setText(re, text1, -1, &status);
- regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a", -1, &status);
+ regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042\\$\\a", -1, &status);
utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);
REGEX_CHECK_STATUS;
utext_close(&patternText);
}
+
+//--------------------------------------------------------------
+//
+// NamedCapture Check basic named capture group functionality
+//
+//--------------------------------------------------------------
+void RegexTest::NamedCapture() {
+ UErrorCode status = U_ZERO_ERROR;
+ RegexPattern *pat = RegexPattern::compile(UnicodeString(
+ "abc()()(?<three>xyz)(de)(?<five>hmm)(?<six>oh)f\\k<five>"), 0, status);
+ REGEX_CHECK_STATUS;
+ int32_t group = pat->groupNumberFromName("five", -1, status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(5 == group);
+ group = pat->groupNumberFromName("three", -1, status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(3 == group);
+
+ status = U_ZERO_ERROR;
+ group = pat->groupNumberFromName(UnicodeString("six"), status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(6 == group);
+
+ status = U_ZERO_ERROR;
+ group = pat->groupNumberFromName(UnicodeString("nosuch"), status);
+ U_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+ status = U_ZERO_ERROR;
+
+ // After copying a pattern, named capture should still work in the copy.
+ RegexPattern *copiedPat = new RegexPattern(*pat);
+ REGEX_ASSERT(*copiedPat == *pat);
+ delete pat; pat = NULL; // Delete original, copy should have no references back to it.
+
+ group = copiedPat->groupNumberFromName("five", -1, status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(5 == group);
+ group = copiedPat->groupNumberFromName("three", -1, status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(3 == group);
+ delete copiedPat;
+
+ // ReplaceAll with named capture group.
+ status = U_ZERO_ERROR;
+ UnicodeString text("Substitution of <<quotes>> for <<double brackets>>");
+ RegexMatcher *m = new RegexMatcher(UnicodeString("<<(?<mid>.+?)>>"), text, 0, status);
+ REGEX_CHECK_STATUS;
+ // m.pattern().dumpPattern();
+ UnicodeString replacedText = m->replaceAll("'${mid}'", status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(UnicodeString("Substitution of 'quotes' for 'double brackets'") == replacedText);
+ delete m;
+
+ // ReplaceAll, allowed capture group numbers.
+ text = UnicodeString("abcmxyz");
+ m = new RegexMatcher(UnicodeString("..(?<one>m)(.)(.)"), text, 0, status);
+ REGEX_CHECK_STATUS;
+
+ status = U_ZERO_ERROR;
+ replacedText = m->replaceAll(UnicodeString("<$0>"), status); // group 0, full match, is allowed.
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(UnicodeString("a<bcmxy>z") == replacedText);
+
+ status = U_ZERO_ERROR;
+ replacedText = m->replaceAll(UnicodeString("<$1>"), status); // group 1 by number.
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(UnicodeString("a<m>z") == replacedText);
+
+ status = U_ZERO_ERROR;
+ replacedText = m->replaceAll(UnicodeString("<${one}>"), status); // group 1 by name.
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(UnicodeString("a<m>z") == replacedText);
+
+ status = U_ZERO_ERROR;
+ replacedText = m->replaceAll(UnicodeString("<$2>"), status); // group 2.
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(UnicodeString("a<x>z") == replacedText);
+
+ status = U_ZERO_ERROR;
+ replacedText = m->replaceAll(UnicodeString("<$3>"), status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(UnicodeString("a<y>z") == replacedText);
+
+ status = U_ZERO_ERROR;
+ replacedText = m->replaceAll(UnicodeString("<$4>"), status);
+ REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
+
+ status = U_ZERO_ERROR;
+ replacedText = m->replaceAll(UnicodeString("<$04>"), status); // group 0, leading 0,
+ REGEX_CHECK_STATUS; // trailing out-of-range 4 passes through.
+ REGEX_ASSERT(UnicodeString("a<bcmxy4>z") == replacedText);
+
+ status = U_ZERO_ERROR;
+ replacedText = m->replaceAll(UnicodeString("<$000016>"), status); // Consume leading zeroes. Don't consume digits
+ REGEX_CHECK_STATUS; // that push group num out of range.
+ REGEX_ASSERT(UnicodeString("a<m6>z") == replacedText); // This is group 1.
+
+ status = U_ZERO_ERROR;
+ replacedText = m->replaceAll(UnicodeString("<$3$2$1${one}>"), status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(UnicodeString("a<yxmm>z") == replacedText);
+
+ status = U_ZERO_ERROR;
+ replacedText = m->replaceAll(UnicodeString("$3$2$1${one}"), status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(UnicodeString("ayxmmz") == replacedText);
+
+ status = U_ZERO_ERROR;
+ replacedText = m->replaceAll(UnicodeString("<${noSuchName}>"), status);
+ REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+ status = U_ZERO_ERROR;
+ replacedText = m->replaceAll(UnicodeString("<${invalid-name}>"), status);
+ REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+ status = U_ZERO_ERROR;
+ replacedText = m->replaceAll(UnicodeString("<${one"), status);
+ REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+ status = U_ZERO_ERROR;
+ replacedText = m->replaceAll(UnicodeString("$not a capture group"), status);
+ REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+ delete m;
+
+ // Repeat the above replaceAll() tests using the plain C API, which
+ // has a separate implementation internally.
+ // TODO: factor out the test data.
+
+ status = U_ZERO_ERROR;
+ URegularExpression *re = uregex_openC("..(?<one>m)(.)(.)", 0, NULL, &status);
+ REGEX_CHECK_STATUS;
+ text = UnicodeString("abcmxyz");
+ uregex_setText(re, text.getBuffer(), text.length(), &status);
+ REGEX_CHECK_STATUS;
+
+ UChar resultBuf[100];
+ int32_t resultLength;
+ UnicodeString repl;
+
+ status = U_ZERO_ERROR;
+ repl = UnicodeString("<$0>");
+ resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(UnicodeString("a<bcmxy>z") == UnicodeString(resultBuf, resultLength));
+
+ status = U_ZERO_ERROR;
+ repl = UnicodeString("<$1>");
+ resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(UnicodeString("a<m>z") == UnicodeString(resultBuf, resultLength));
+
+ status = U_ZERO_ERROR;
+ repl = UnicodeString("<${one}>");
+ resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(UnicodeString("a<m>z") == UnicodeString(resultBuf, resultLength));
+
+ status = U_ZERO_ERROR;
+ repl = UnicodeString("<$2>");
+ resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(UnicodeString("a<x>z") == UnicodeString(resultBuf, resultLength));
+
+ status = U_ZERO_ERROR;
+ repl = UnicodeString("<$3>");
+ resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(UnicodeString("a<y>z") == UnicodeString(resultBuf, resultLength));
+
+ status = U_ZERO_ERROR;
+ repl = UnicodeString("<$4>");
+ resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+ REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
+
+ status = U_ZERO_ERROR;
+ repl = UnicodeString("<$04>");
+ resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(UnicodeString("a<bcmxy4>z") == UnicodeString(resultBuf, resultLength));
+
+ status = U_ZERO_ERROR;
+ repl = UnicodeString("<$000016>");
+ resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(UnicodeString("a<m6>z") == UnicodeString(resultBuf, resultLength));
+
+ status = U_ZERO_ERROR;
+ repl = UnicodeString("<$3$2$1${one}>");
+ resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(UnicodeString("a<yxmm>z") == UnicodeString(resultBuf, resultLength));
+
+ status = U_ZERO_ERROR;
+ repl = UnicodeString("$3$2$1${one}");
+ resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(UnicodeString("ayxmmz") == UnicodeString(resultBuf, resultLength));
+
+ status = U_ZERO_ERROR;
+ repl = UnicodeString("<${noSuchName}>");
+ resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+ REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+ status = U_ZERO_ERROR;
+ repl = UnicodeString("<${invalid-name}>");
+ resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+ REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+ status = U_ZERO_ERROR;
+ repl = UnicodeString("<${one");
+ resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+ REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+ status = U_ZERO_ERROR;
+ repl = UnicodeString("$not a capture group");
+ resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), resultBuf, UPRV_LENGTHOF(resultBuf), &status);
+ REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);
+
+ uregex_close(re);
+}
+
+//--------------------------------------------------------------
+//
+// NamedCaptureLimits Patterns with huge numbers of named capture groups.
+// The point is not so much what the exact limit is,
+// but that a largish number doesn't hit bad non-linear performance,
+// and that exceeding the limit fails cleanly.
+//
+//--------------------------------------------------------------
+void RegexTest::NamedCaptureLimits() {
+ if (quick) {
+ logln("Skipping test. Runs in exhuastive mode only.");
+ return;
+ }
+ const int32_t goodLimit = 1000000; // Pattern w this many groups builds successfully.
+ const int32_t failLimit = 10000000; // Pattern exceeds internal limits, fails to compile.
+ char nnbuf[100];
+ UnicodeString pattern;
+ int32_t nn;
+
+ for (nn=1; nn<goodLimit; nn++) {
+ sprintf(nnbuf, "(?<nn%d>)", nn);
+ pattern.append(UnicodeString(nnbuf, -1, US_INV));
+ }
+ UErrorCode status = U_ZERO_ERROR;
+ RegexPattern *pat = RegexPattern::compile(pattern, 0, status);
+ REGEX_CHECK_STATUS;
+ for (nn=1; nn<goodLimit; nn++) {
+ sprintf(nnbuf, "nn%d", nn);
+ int32_t groupNum = pat->groupNumberFromName(nnbuf, -1, status);
+ REGEX_ASSERT(nn == groupNum);
+ if (nn != groupNum) {
+ break;
+ }
+ }
+ delete pat;
+
+ pattern.remove();
+ for (nn=1; nn<failLimit; nn++) {
+ sprintf(nnbuf, "(?<nn%d>)", nn);
+ pattern.append(UnicodeString(nnbuf, -1, US_INV));
+ }
+ status = U_ZERO_ERROR;
+ pat = RegexPattern::compile(pattern, 0, status);
+ REGEX_ASSERT(status == U_REGEX_PATTERN_TOO_BIG);
+ delete pat;
+}
+
+
//--------------------------------------------------------------
//
// Bug7651 Regex pattern that exceeds default operator stack depth in matcher.
}
}
-#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
+void RegexTest::TestBug11480() {
+ // C API, get capture group of a group that does not participate in the match.
+ // (Returns a zero length string, with nul termination,
+ // indistinguishable from a group with a zero lenght match.)
+ UErrorCode status = U_ZERO_ERROR;
+ URegularExpression *re = uregex_openC("(A)|(B)", 0, NULL, &status);
+ REGEX_CHECK_STATUS;
+ UnicodeString text = UNICODE_STRING_SIMPLE("A");
+ uregex_setText(re, text.getBuffer(), text.length(), &status);
+ REGEX_CHECK_STATUS;
+ REGEX_ASSERT(uregex_lookingAt(re, 0, &status));
+ UChar buf[10] = {(UChar)13, (UChar)13, (UChar)13, (UChar)13};
+ int32_t length = uregex_group(re, 2, buf+1, UPRV_LENGTHOF(buf)-1, &status);
+ REGEX_ASSERT(length == 0);
+ REGEX_ASSERT(buf[0] == 13);
+ REGEX_ASSERT(buf[1] == 0);
+ REGEX_ASSERT(buf[2] == 13);
+ uregex_close(re);
+}
+
+
+#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */