From: Markus Scherer Date: Thu, 30 Jun 2011 22:07:22 +0000 (+0000) Subject: ICU-8606 add Normalizer2.getCombiningClass(c) X-Git-Tag: milestone-59-0-1~4697 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e95274d0a441c260b571c1a856cd1219194fd853;p=icu ICU-8606 add Normalizer2.getCombiningClass(c) X-SVN-Rev: 30261 --- diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java index 920e515c80c..9b882f5b19e 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2009-2010, International Business Machines +* Copyright (C) 2009-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* */ @@ -129,6 +129,11 @@ public final class Norm2AllModes { return impl.getDecomposition(c); } + @Override + public int getCombiningClass(int c) { + return impl.getCC(impl.getNorm16(c)); + } + // quick checks @Override public boolean isNormalized(CharSequence s) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java index 9c8e9aaa713..e2413a9e9e2 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java @@ -492,8 +492,7 @@ public final class UCharacterProperty new IntProperty(0, BLOCK_MASK_, BLOCK_SHIFT_), new CombiningClassIntProperty(SRC_NFC) { // CANONICAL_COMBINING_CLASS int getValue(int c) { - Normalizer2Impl impl = Norm2AllModes.getNFCInstance().impl; - return impl.getCC(impl.getNorm16(c)); + return Norm2AllModes.getNFCInstance().decomp.getCombiningClass(c); } }, new IntProperty(2, DECOMPOSITION_TYPE_MASK_, 0), diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UTS46.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UTS46.java index 809b772eda1..800f15a84a2 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UTS46.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UTS46.java @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2010, International Business Machines +* Copyright (C) 2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* */ @@ -684,7 +684,7 @@ public final class UTS46 extends IDNA { int j=i; c=Character.codePointBefore(label, j); j-=Character.charCount(c); - if(UCharacter.getCombiningClass(c)==9) { + if(uts46Norm2.getCombiningClass(c)==9) { continue; } // check precontext (Joining_Type:{L,D})(Joining_Type:T)* diff --git a/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java b/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java index c059b620d5f..ba424ae0089 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java @@ -16,7 +16,6 @@ import java.util.Map; import com.ibm.icu.impl.IllegalIcuArgumentException; import com.ibm.icu.impl.Norm2AllModes; -import com.ibm.icu.impl.Normalizer2Impl; import com.ibm.icu.impl.Trie2; import com.ibm.icu.impl.UBiDiProps; import com.ibm.icu.impl.UCaseProps; @@ -3890,8 +3889,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection if (ch < MIN_VALUE || ch > MAX_VALUE) { throw new IllegalArgumentException("Codepoint out of bounds"); } - Normalizer2Impl impl = Norm2AllModes.getNFCInstance().impl; - return impl.getCC(impl.getNorm16(ch)); + return Norm2AllModes.getNFCInstance().decomp.getCombiningClass(ch); } /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/FilteredNormalizer2.java b/icu4j/main/classes/core/src/com/ibm/icu/text/FilteredNormalizer2.java index f53487d70dd..c2f5f834b74 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/FilteredNormalizer2.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/FilteredNormalizer2.java @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2009-2010, International Business Machines +* Copyright (C) 2009-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* */ @@ -89,6 +89,16 @@ public class FilteredNormalizer2 extends Normalizer2 { return set.contains(c) ? norm2.getDecomposition(c) : null; } + /** + * {@inheritDoc} + * @draft ICU 49 + * @provisional This API might change or be removed in a future release. + */ + @Override + public int getCombiningClass(int c) { + return set.contains(c) ? norm2.getCombiningClass(c) : 0; + } + /** * {@inheritDoc} * @stable ICU 4.4 diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/Normalizer2.java b/icu4j/main/classes/core/src/com/ibm/icu/text/Normalizer2.java index c472edb2b54..c0e33541460 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/Normalizer2.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/Normalizer2.java @@ -208,6 +208,17 @@ public abstract class Normalizer2 { */ public abstract String getDecomposition(int c); + /** + * Gets the combining class of c. + * The default implementation returns 0 + * but all standard implementations return the Unicode Canonical_Combining_Class value. + * @param c code point + * @return c's combining class + * @draft ICU 49 + * @provisional This API might change or be removed in a future release. + */ + public int getCombiningClass(int c) { return 0; } + /** * Tests if the string is normalized. * Internally, in cases where the quickCheck() method would return "maybe" diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterTest.java index 5024473e278..12b4aa02057 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterTest.java @@ -684,6 +684,8 @@ public final class UCharacterTest extends TestFmwk type = 0, dir = 0; + Normalizer2 nfkc = Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE); + try { BufferedReader input = TestUtil.getDataReader( @@ -758,6 +760,12 @@ public final class UCharacterTest extends TestFmwk "class " + cc); break; } + if (nfkc.getCombiningClass(ch) != cc) + { + errln("FAIL \\u" + hex(ch) + " expected NFKC combining " + + "class " + cc); + break; + } // testing the direction if (d.length() == 1) diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/BasicTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/BasicTest.java index 19338e0f431..2b615520a39 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/BasicTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/normalizer/BasicTest.java @@ -1,6 +1,6 @@ /* ******************************************************************************* - * Copyright (C) 1996-2010, International Business Machines Corporation and + * Copyright (C) 1996-2011, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ @@ -2572,4 +2572,19 @@ public class BasicTest extends TestFmwk { } } } + + public void TestFilteredNormalizer2() { + Normalizer2 nfcNorm2=Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.COMPOSE); + UnicodeSet filter=new UnicodeSet("[^\u00a0-\u00ff\u0310-\u031f]"); + FilteredNormalizer2 fn2=new FilteredNormalizer2(nfcNorm2, filter); + int c; + for(c=0; c<=0x3ff; ++c) { + int expectedCC= filter.contains(c) ? nfcNorm2.getCombiningClass(c) : 0; + int cc=fn2.getCombiningClass(c); + assertEquals( + "FilteredNormalizer2(NFC, ^A0-FF,310-31F).getCombiningClass(U+"+hex(c)+ + ")==filtered NFC.getCC()", + expectedCC, cc); + } + } }