From: Markus Scherer Date: Sat, 19 Jun 2021 02:01:58 +0000 (+0000) Subject: ICU-21645 test & fix unescapeAt(2 supp escapes) X-Git-Tag: cldr/2021-08-11~30 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d9aa1ecab0cc65e402ddc435db01cfce68940cda;p=icu ICU-21645 test & fix unescapeAt(2 supp escapes) --- diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Utility.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Utility.java index d74c2314360..cd906011552 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Utility.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Utility.java @@ -863,8 +863,7 @@ public final class Utility { // if there is a trail surrogate after it, either as an // escape or as a literal. If so, join them up into a // supplementary. - if (offset < length && - UTF16.isLeadSurrogate((char) result)) { + if (offset < length && result <= 0xffff && UTF16.isLeadSurrogate((char) result)) { int ahead = offset+1; c = s.charAt(offset); // [sic] get 16-bit code unit if (c == '\\' && ahead < length) { @@ -872,7 +871,7 @@ public final class Utility { c = unescapeAt(s, o); ahead = o[0]; } - if (UTF16.isTrailSurrogate((char) c)) { + if (c <= 0xffff && UTF16.isTrailSurrogate((char) c)) { offset = ahead; result = Character.toCodePoint((char) result, (char) c); } diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/UtilityTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/UtilityTest.java index 15aebdfc045..a7ee1d072d3 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/UtilityTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/UtilityTest.java @@ -49,6 +49,22 @@ public class UtilityTest extends TestFmwk { if (!result.equals(expect)) { errln("FAIL: Utility.unescape() returned " + result + ", exp. " + expect); } + + // Regression test for ICU-21645 + String s = "\\U0001DA8B\\U0001DF00-\\U0001DF1E"; + int[] offset16 = new int[] { 1 }; // after the backslash + // This returned U+B2F00 for the first _two_ escapes. + int c = Utility.unescapeAt(s, offset16); + assertEquals(s + " unescape at 1, code point", 0x1DA8B, c); + assertEquals(s + " unescape at 1, offset", 10, offset16[0]); + String pattern = "[" + s + "]"; + // This threw an IllegalArgumentException because the parser called Utility.unescapeAt() + // and saw an invalid range of B2F00..1DF1E (start >= end). + UnicodeSet set = new UnicodeSet(pattern); + assertEquals(pattern + " size", 32, set.size()); + assertTrue(pattern + " contains U+1DA8B", set.contains(0x1DA8B)); + assertTrue(pattern + " contains U+1DF00..U+1DF1E", set.contains(0x1DF00, 0x1DF1E)); + assertFalse(pattern + " contains U+1DF1F", set.contains(0x1DF1F)); } @Test