UnicodeString& rebuiltPat,
uint32_t options,
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
+ int32_t depth,
UErrorCode& ec);
//----------------------------------------------------------------
// _applyPattern calls add() etc., which set pat to empty.
UnicodeString rebuiltPat;
RuleCharacterIterator chars(pattern, symbols, pos);
- applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status);
+ applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, 0, status);
if (U_FAILURE(status)) return *this;
if (chars.inVariable()) {
// syntaxError(chars, "Extra chars in variable value");
return i.fSet;
}
+namespace {
// Cache some sets for other services -------------------------------------- ***
void U_CALLCONV createUni32Set(UErrorCode &errorCode) {
// memory leak checker tools
#define _dbgct(me)
+} // namespace
+
//----------------------------------------------------------------
// Constructors &c
//----------------------------------------------------------------
// _applyPattern calls add() etc., which set pat to empty.
UnicodeString rebuiltPat;
RuleCharacterIterator chars(pattern, symbols, pos);
- applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status);
+ applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, 0, status);
if (U_FAILURE(status)) return;
if (chars.inVariable()) {
// syntaxError(chars, "Extra chars in variable value");
// Implementation: Pattern parsing
//----------------------------------------------------------------
+namespace {
+
/**
* A small all-inline class to manage a UnicodeSet pointer. Add
* operator->() etc. as needed.
}
};
+constexpr int32_t MAX_DEPTH = 100;
+
+} // namespace
+
/**
* Parse the pattern from the given RuleCharacterIterator. The
* iterator is advanced over the parsed pattern.
UnicodeString& rebuiltPat,
uint32_t options,
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
+ int32_t depth,
UErrorCode& ec) {
if (U_FAILURE(ec)) return;
+ if (depth > MAX_DEPTH) {
+ ec = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
// Syntax characters: [ ] ^ - & { }
}
switch (setMode) {
case 1:
- nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec);
+ nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec);
break;
case 2:
chars.skipIgnored(opts);
// Property set implementation
//----------------------------------------------------------------
+namespace {
+
static UBool numericValueFilter(UChar32 ch, void* context) {
return u_getNumericValue(ch) == *(double*)context;
}
return uscript_hasScript(ch, *(UScriptCode*)context);
}
+} // namespace
+
/**
* Generic filter-based scanning code for UCD property UnicodeSets.
*/
}
}
+namespace {
+
static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
/* Note: we use ' ' in compiler code page */
int32_t j = 0;
return TRUE;
}
+} // namespace
+
//----------------------------------------------------------------
// Property set API
//----------------------------------------------------------------
return left + UnicodeSetTest::escape(pat);
}
-#define CASE(id,test) case id: \
- name = #test; \
- if (exec) { \
- logln(#test "---"); \
- logln(); \
- test(); \
- } \
- break
-
UnicodeSetTest::UnicodeSetTest() : utf8Cnv(NULL) {
}
TESTCASE_AUTO(TestUCAUnsafeBackwards);
TESTCASE_AUTO(TestIntOverflow);
TESTCASE_AUTO(TestUnusedCcc);
+ TESTCASE_AUTO(TestDeepPattern);
TESTCASE_AUTO_END;
}
assertTrue("[:ccc=1.1:] -> empty set", ccc1_1.isEmpty());
#endif
}
+
+void UnicodeSetTest::TestDeepPattern() {
+ IcuTestErrorCode errorCode(*this, "TestDeepPattern");
+ // Nested ranges are parsed via recursion which can use a lot of stack space.
+ // After a reasonable limit, we should get an error.
+ constexpr int32_t DEPTH = 20000;
+ UnicodeString pattern, suffix;
+ for (int32_t i = 0; i < DEPTH; ++i) {
+ pattern.append(u"[a", 2);
+ suffix.append(']');
+ }
+ pattern.append(suffix);
+ UnicodeSet set(pattern, errorCode);
+ assertTrue("[a[a[a...1000s...]]] -> error", errorCode.isFailure());
+ errorCode.reset();
+}
void TestUCAUnsafeBackwards();
void TestIntOverflow();
void TestUnusedCcc();
+ void TestDeepPattern();
private:
StringBuilder rebuiltPat = new StringBuilder();
RuleCharacterIterator chars =
new RuleCharacterIterator(pattern, symbols, pos);
- applyPattern(chars, symbols, rebuiltPat, options);
+ applyPattern(chars, symbols, rebuiltPat, options, 0);
if (chars.inVariable()) {
syntaxError(chars, "Extra chars in variable value");
}
SETMODE2_PROPERTYPAT = 2,
SETMODE3_PREPARSED = 3;
+ private static final int MAX_DEPTH = 100;
+
/**
* Parse the pattern from the given RuleCharacterIterator. The
* iterator is advanced over the parsed pattern.
* IGNORE_SPACE, CASE.
*/
private void applyPattern(RuleCharacterIterator chars, SymbolTable symbols,
- Appendable rebuiltPat, int options) {
+ Appendable rebuiltPat, int options, int depth) {
+ if (depth > MAX_DEPTH) {
+ syntaxError(chars, "Pattern nested too deeply");
+ }
// Syntax characters: [ ] ^ - & { }
}
switch (setMode) {
case SETMODE1_UNICODESET:
- nested.applyPattern(chars, symbols, patBuf, options);
+ nested.applyPattern(chars, symbols, patBuf, options, depth + 1);
break;
case SETMODE2_PROPERTYPAT:
chars.skipIgnored(opts);
} catch (IllegalArgumentException expected) {
}
}
+
+ @Test
+ public void TestDeepPattern() {
+ // Nested ranges are parsed via recursion which can use a lot of stack space.
+ // After a reasonable limit, we should get an error.
+ final int DEPTH = 20000;
+ StringBuilder pattern = new StringBuilder();
+ StringBuilder suffix = new StringBuilder();
+ for (int i = 0; i < DEPTH; ++i) {
+ pattern.append("[a");
+ suffix.append(']');
+ }
+ pattern.append(suffix);
+ try {
+ new UnicodeSet(pattern.toString());
+ fail("[a[a[a...1000s...]]] did not throw an exception");
+ } catch(RuntimeException expected) {
+ }
+ }
}