uint32_t i;
UChar32 cp = 0;
char c0;
- UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */
+ static constexpr UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return error;
/* try extended names first */
if (lower[0] == '<') {
- if (nameChoice == U_EXTENDED_CHAR_NAME) {
+ if (nameChoice == U_EXTENDED_CHAR_NAME && lower[--i] == '>') {
// Parse a string like "<category-HHHH>" where HHHH is a hex code point.
- if (lower[--i] == '>' && i >= 3 && lower[--i] != '-') {
- while (i >= 3 && lower[--i] != '-') {}
-
- if (i >= 2 && lower[i] == '-') {
- uint32_t cIdx;
-
- lower[i] = 0;
-
- for (++i; lower[i] != '>'; ++i) {
- if (lower[i] >= '0' && lower[i] <= '9') {
- cp = (cp << 4) + lower[i] - '0';
- } else if (lower[i] >= 'a' && lower[i] <= 'f') {
- cp = (cp << 4) + lower[i] - 'a' + 10;
- } else {
- *pErrorCode = U_ILLEGAL_CHAR_FOUND;
- return error;
- }
- }
+ uint32_t limit = i;
+ while (i >= 3 && lower[--i] != '-') {}
+
+ // There should be 1 to 8 hex digits.
+ int32_t hexLength = limit - (i + 1);
+ if (i >= 2 && lower[i] == '-' && 1 <= hexLength && hexLength <= 8) {
+ uint32_t cIdx;
+
+ lower[i] = 0;
- /* Now validate the category name.
- We could use a binary search, or a trie, if
- we really wanted to. */
+ for (++i; i < limit; ++i) {
+ if (lower[i] >= '0' && lower[i] <= '9') {
+ cp = (cp << 4) + lower[i] - '0';
+ } else if (lower[i] >= 'a' && lower[i] <= 'f') {
+ cp = (cp << 4) + lower[i] - 'a' + 10;
+ } else {
+ *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+ return error;
+ }
+ // Prevent signed-integer overflow and out-of-range code points.
+ if (cp > UCHAR_MAX_VALUE) {
+ *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+ return error;
+ }
+ }
- for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
+ /* Now validate the category name.
+ We could use a binary search, or a trie, if
+ we really wanted to. */
+ uint8_t cat = getCharCat(cp);
+ for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
- if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
- if (getCharCat(cp) == cIdx) {
- return cp;
- }
- break;
+ if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
+ if (cat == cIdx) {
+ return cp;
}
+ break;
}
}
}
static void
TestUCharFromNameUnderflow() {
// Ticket #10889: Underflow crash when there is no dash.
+ const char *name="<NO BREAK SPACE>";
UErrorCode errorCode=U_ZERO_ERROR;
- UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<NO BREAK SPACE>", &errorCode);
+ UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
if(U_SUCCESS(errorCode)) {
- log_err("u_charFromName(<NO BREAK SPACE>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
+ log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
+ name, c, u_errorName(errorCode));
}
// Test related edge cases.
+ name="<-00a0>";
errorCode=U_ZERO_ERROR;
- c=u_charFromName(U_EXTENDED_CHAR_NAME, "<-00a0>", &errorCode);
+ c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
if(U_SUCCESS(errorCode)) {
- log_err("u_charFromName(<-00a0>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
+ log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
+ name, c, u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
- c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control->", &errorCode);
+ name="<control->";
+ c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
if(U_SUCCESS(errorCode)) {
- log_err("u_charFromName(<control->) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
+ log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
+ name, c, u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
- c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control-111111>", &errorCode);
+ name="<control-111111>";
+ c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
if(U_SUCCESS(errorCode)) {
- log_err("u_charFromName(<control-111111>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
+ log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
+ name, c, u_errorName(errorCode));
+ }
+
+ // ICU-20292: integer overflow
+ errorCode=U_ZERO_ERROR;
+ name="<noncharacter-10010FFFF>";
+ c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
+ if(U_SUCCESS(errorCode)) {
+ log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
+ name, c, u_errorName(errorCode));
+ }
+
+ errorCode=U_ZERO_ERROR;
+ name="<noncharacter-00010FFFF>"; // too many digits even if only leading 0s
+ c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
+ if(U_SUCCESS(errorCode)) {
+ log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
+ name, c, u_errorName(errorCode));
+ }
+
+ errorCode=U_ZERO_ERROR;
+ name="<noncharacter-fFFf>>";
+ c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
+ if(U_SUCCESS(errorCode)) {
+ log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
+ name, c, u_errorName(errorCode));
}
}
int startIndex = name.lastIndexOf('-');
if (startIndex >= 0) { // We've got a category.
startIndex ++;
+
+ // There should be 1 to 8 hex digits.
+ int hexLength = endIndex - startIndex;
+ if (hexLength < 1 || 8 < hexLength) {
+ return -1;
+ }
int result = -1;
try {
result = Integer.parseInt(
catch (NumberFormatException e) {
return -1;
}
+ if (result < 0 || 0x10ffff < result) {
+ return -1;
+ }
// Now validate the category name. We could use a
// binary search, or a trie, if we really wanted to.
+ int charType = getType(result);
String type = name.substring(1, startIndex - 1);
int length = TYPE_NAMES_.length;
for (int i = 0; i < length; ++ i) {
if (type.compareTo(TYPE_NAMES_[i]) == 0) {
- if (getType(result) == i) {
+ if (charType == i) {
return result;
}
break;
@Test
public void TestUCharFromNameUnderflow() {
// Ticket #10889: Underflow crash when there is no dash.
- int c = UCharacter.getCharFromExtendedName("<NO BREAK SPACE>");
+ String name = "<NO BREAK SPACE>";
+ int c = UCharacter.getCharFromExtendedName(name);
if(c >= 0) {
- errln("UCharacter.getCharFromExtendedName(<NO BREAK SPACE>) = U+" + hex(c) +
+ errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
" but should fail (-1)");
}
// Test related edge cases.
- c = UCharacter.getCharFromExtendedName("<-00a0>");
+ name = "<-00a0>";
+ c = UCharacter.getCharFromExtendedName(name);
if(c >= 0) {
- errln("UCharacter.getCharFromExtendedName(<-00a0>) = U+" + hex(c) +
+ errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
" but should fail (-1)");
}
- c = UCharacter.getCharFromExtendedName("<control->");
+ name = "<control->";
+ c = UCharacter.getCharFromExtendedName(name);
if(c >= 0) {
- errln("UCharacter.getCharFromExtendedName(<control->) = U+" + hex(c) +
+ errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
" but should fail (-1)");
}
- c = UCharacter.getCharFromExtendedName("<control-111111>");
+ name = "<control-111111>";
+ c = UCharacter.getCharFromExtendedName(name);
if(c >= 0) {
- errln("UCharacter.getCharFromExtendedName(<control-111111>) = U+" + hex(c) +
+ errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
+ " but should fail (-1)");
+ }
+
+ // ICU-20292: integer overflow
+ name = "<noncharacter-10010FFFF>";
+ c = UCharacter.getCharFromExtendedName(name);
+ if(c >= 0) {
+ errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
+ " but should fail (-1)");
+ }
+
+ name = "<noncharacter-00010FFFF>"; // too many digits even if only leading 0s
+ c = UCharacter.getCharFromExtendedName(name);
+ if(c >= 0) {
+ errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
+ " but should fail (-1)");
+ }
+
+ name = "<noncharacter-fFFf>>";
+ c = UCharacter.getCharFromExtendedName(name);
+ if(c >= 0) {
+ errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
" but should fail (-1)");
}
}