]> granicus.if.org Git - icu/commitdiff
ICU-20292 u_charFromName() prevent code point integer overflow, and limit to at most...
authorMarkus Scherer <markus.icu@gmail.com>
Wed, 12 Dec 2018 06:05:08 +0000 (22:05 -0800)
committerMarkus Scherer <markus.icu@gmail.com>
Wed, 12 Dec 2018 22:08:37 +0000 (14:08 -0800)
icu4c/source/common/unames.cpp
icu4c/source/test/cintltst/cucdtst.c
icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterName.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterTest.java

index 9c230dc93ee2f3be2ebe3c0ae8a285e5841eff7b..d9f61cac1575a9acfe9f8be9e153c8a4b0db5438 100644 (file)
@@ -1526,7 +1526,7 @@ u_charFromName(UCharNameChoice nameChoice,
     uint32_t i;
     UChar32 cp = 0;
     char c0;
-    UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */
+    static constexpr UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */
 
     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
         return error;
@@ -1560,39 +1560,45 @@ u_charFromName(UCharNameChoice nameChoice,
 
     /* try extended names first */
     if (lower[0] == '<') {
-        if (nameChoice == U_EXTENDED_CHAR_NAME) {
+        if (nameChoice == U_EXTENDED_CHAR_NAME && lower[--i] == '>') {
             // Parse a string like "<category-HHHH>" where HHHH is a hex code point.
-            if (lower[--i] == '>' && i >= 3 && lower[--i] != '-') {
-                while (i >= 3 && lower[--i] != '-') {}
-
-                if (i >= 2 && lower[i] == '-') {
-                    uint32_t cIdx;
-
-                    lower[i] = 0;
-
-                    for (++i; lower[i] != '>'; ++i) {
-                        if (lower[i] >= '0' && lower[i] <= '9') {
-                            cp = (cp << 4) + lower[i] - '0';
-                        } else if (lower[i] >= 'a' && lower[i] <= 'f') {
-                            cp = (cp << 4) + lower[i] - 'a' + 10;
-                        } else {
-                            *pErrorCode = U_ILLEGAL_CHAR_FOUND;
-                            return error;
-                        }
-                    }
+            uint32_t limit = i;
+            while (i >= 3 && lower[--i] != '-') {}
+
+            // There should be 1 to 8 hex digits.
+            int32_t hexLength = limit - (i + 1);
+            if (i >= 2 && lower[i] == '-' && 1 <= hexLength && hexLength <= 8) {
+                uint32_t cIdx;
+
+                lower[i] = 0;
 
-                    /* Now validate the category name.
-                       We could use a binary search, or a trie, if
-                       we really wanted to. */
+                for (++i; i < limit; ++i) {
+                    if (lower[i] >= '0' && lower[i] <= '9') {
+                        cp = (cp << 4) + lower[i] - '0';
+                    } else if (lower[i] >= 'a' && lower[i] <= 'f') {
+                        cp = (cp << 4) + lower[i] - 'a' + 10;
+                    } else {
+                        *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+                        return error;
+                    }
+                    // Prevent signed-integer overflow and out-of-range code points.
+                    if (cp > UCHAR_MAX_VALUE) {
+                        *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+                        return error;
+                    }
+                }
 
-                    for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
+                /* Now validate the category name.
+                   We could use a binary search, or a trie, if
+                   we really wanted to. */
+                uint8_t cat = getCharCat(cp);
+                for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
 
-                        if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
-                            if (getCharCat(cp) == cIdx) {
-                                return cp;
-                            }
-                            break;
+                    if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
+                        if (cat == cIdx) {
+                            return cp;
                         }
+                        break;
                     }
                 }
             }
index 9e94bcba0806ae07a0b718d61198250896d33760..7a778638732992bc717ce4febd98d0255e03b816 100644 (file)
@@ -2005,29 +2005,62 @@ TestCharNames() {
 static void
 TestUCharFromNameUnderflow() {
     // Ticket #10889: Underflow crash when there is no dash.
+    const char *name="<NO BREAK SPACE>";
     UErrorCode errorCode=U_ZERO_ERROR;
-    UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<NO BREAK SPACE>", &errorCode);
+    UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
     if(U_SUCCESS(errorCode)) {
-        log_err("u_charFromName(<NO BREAK SPACE>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
+        log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
+                name, c, u_errorName(errorCode));
     }
 
     // Test related edge cases.
+    name="<-00a0>";
     errorCode=U_ZERO_ERROR;
-    c=u_charFromName(U_EXTENDED_CHAR_NAME, "<-00a0>", &errorCode);
+    c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
     if(U_SUCCESS(errorCode)) {
-        log_err("u_charFromName(<-00a0>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
+        log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
+                name, c, u_errorName(errorCode));
     }
 
     errorCode=U_ZERO_ERROR;
-    c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control->", &errorCode);
+    name="<control->";
+    c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
     if(U_SUCCESS(errorCode)) {
-        log_err("u_charFromName(<control->) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
+        log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
+                name, c, u_errorName(errorCode));
     }
 
     errorCode=U_ZERO_ERROR;
-    c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control-111111>", &errorCode);
+    name="<control-111111>";
+    c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
     if(U_SUCCESS(errorCode)) {
-        log_err("u_charFromName(<control-111111>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
+        log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
+                name, c, u_errorName(errorCode));
+    }
+
+    // ICU-20292: integer overflow
+    errorCode=U_ZERO_ERROR;
+    name="<noncharacter-10010FFFF>";
+    c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
+    if(U_SUCCESS(errorCode)) {
+        log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
+                name, c, u_errorName(errorCode));
+    }
+
+    errorCode=U_ZERO_ERROR;
+    name="<noncharacter-00010FFFF>";  // too many digits even if only leading 0s
+    c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
+    if(U_SUCCESS(errorCode)) {
+        log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
+                name, c, u_errorName(errorCode));
+    }
+
+    errorCode=U_ZERO_ERROR;
+    name="<noncharacter-fFFf>>";
+    c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
+    if(U_SUCCESS(errorCode)) {
+        log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
+                name, c, u_errorName(errorCode));
     }
 }
 
index 6f27a15ecc3783cf58ad9972489f3597352f2aed..76570584eb649088c5367351f27c59aa9405975a 100644 (file)
@@ -1350,6 +1350,12 @@ public final class UCharacterName
                     int startIndex = name.lastIndexOf('-');
                     if (startIndex >= 0) { // We've got a category.
                         startIndex ++;
+
+                        // There should be 1 to 8 hex digits.
+                        int hexLength = endIndex - startIndex;
+                        if (hexLength < 1 || 8 < hexLength) {
+                            return -1;
+                        }
                         int result = -1;
                         try {
                             result = Integer.parseInt(
@@ -1359,13 +1365,17 @@ public final class UCharacterName
                         catch (NumberFormatException e) {
                             return -1;
                         }
+                        if (result < 0 || 0x10ffff < result) {
+                            return -1;
+                        }
                         // Now validate the category name. We could use a
                         // binary search, or a trie, if we really wanted to.
+                        int charType = getType(result);
                         String type = name.substring(1, startIndex - 1);
                         int length = TYPE_NAMES_.length;
                         for (int i = 0; i < length; ++ i) {
                             if (type.compareTo(TYPE_NAMES_[i]) == 0) {
-                                if (getType(result) == i) {
+                                if (charType == i) {
                                     return result;
                                 }
                                 break;
index 02bd4536e9748a97c3b69044ae39bc0563a8ca81..890d9bebb7f39e04cefbe4fb4edcc0aabdf590a6 100644 (file)
@@ -1232,28 +1232,54 @@ public final class UCharacterTest extends TestFmwk
     @Test
     public void TestUCharFromNameUnderflow() {
         // Ticket #10889: Underflow crash when there is no dash.
-        int c = UCharacter.getCharFromExtendedName("<NO BREAK SPACE>");
+        String name = "<NO BREAK SPACE>";
+        int c = UCharacter.getCharFromExtendedName(name);
         if(c >= 0) {
-            errln("UCharacter.getCharFromExtendedName(<NO BREAK SPACE>) = U+" + hex(c) +
+            errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
                     " but should fail (-1)");
         }
 
         // Test related edge cases.
-        c = UCharacter.getCharFromExtendedName("<-00a0>");
+        name = "<-00a0>";
+        c = UCharacter.getCharFromExtendedName(name);
         if(c >= 0) {
-            errln("UCharacter.getCharFromExtendedName(<-00a0>) = U+" + hex(c) +
+            errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
                     " but should fail (-1)");
         }
 
-        c = UCharacter.getCharFromExtendedName("<control->");
+        name = "<control->";
+        c = UCharacter.getCharFromExtendedName(name);
         if(c >= 0) {
-            errln("UCharacter.getCharFromExtendedName(<control->) = U+" + hex(c) +
+            errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
                     " but should fail (-1)");
         }
 
-        c = UCharacter.getCharFromExtendedName("<control-111111>");
+        name = "<control-111111>";
+        c = UCharacter.getCharFromExtendedName(name);
         if(c >= 0) {
-            errln("UCharacter.getCharFromExtendedName(<control-111111>) = U+" + hex(c) +
+            errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
+                    " but should fail (-1)");
+        }
+
+        // ICU-20292: integer overflow
+        name = "<noncharacter-10010FFFF>";
+        c = UCharacter.getCharFromExtendedName(name);
+        if(c >= 0) {
+            errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
+                    " but should fail (-1)");
+        }
+
+        name = "<noncharacter-00010FFFF>";  // too many digits even if only leading 0s
+        c = UCharacter.getCharFromExtendedName(name);
+        if(c >= 0) {
+            errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
+                    " but should fail (-1)");
+        }
+
+        name = "<noncharacter-fFFf>>";
+        c = UCharacter.getCharFromExtendedName(name);
+        if(c >= 0) {
+            errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
                     " but should fail (-1)");
         }
     }