From: Mark Davis Date: Wed, 13 Jul 2011 04:12:10 +0000 (+0000) Subject: ICU-8522 Now using the new special labels, tests passing. X-Git-Tag: milestone-59-0-1~4652 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d56fad92d043c070c27179285283e3a65277649d;p=icu ICU-8522 Now using the new special labels, tests passing. X-SVN-Rev: 30325 --- diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java index 7688b097ffe..8701026171d 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java @@ -9,6 +9,7 @@ package com.ibm.icu.text; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; @@ -25,6 +26,7 @@ import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UProperty; import com.ibm.icu.lang.UScript; import com.ibm.icu.text.AlphabeticIndex.Bucket; +import com.ibm.icu.text.AlphabeticIndex.Bucket.LabelType; import com.ibm.icu.util.LocaleData; import com.ibm.icu.util.ULocale; @@ -130,14 +132,24 @@ public final class AlphabeticIndex implements Iterable> { */ static final boolean HACK_CODED_FIRSTS = true; - private static UnicodeSet UNIHAN = new UnicodeSet("[:script=Hani:]"); + private static UnicodeSet UNIHAN = new UnicodeSet("[:script=Hani:]").freeze(); + + static final String BASE = "\uFDD0"; + // these are generated. Later, get from CLDR data. + + static final UnicodeSet PINYIN_LABELS = new UnicodeSet("[A-Z{\uFDD0A}{\uFDD0B}{\uFDD0C}{\uFDD0D}{\uFDD0E}{\uFDD0F}{\uFDD0G}{\uFDD0H}{\uFDD0I}{\uFDD0J}{\uFDD0K}{\uFDD0L}{\uFDD0M}{\uFDD0N}{\uFDD0O}{\uFDD0P}{\uFDD0Q}{\uFDD0R}{\uFDD0S}{\uFDD0T}{\uFDD0U}{\uFDD0V}{\uFDD0W}{\uFDD0X}{\uFDD0Y}{\uFDD0Z}]").freeze(); + static final UnicodeSet STROKE_LABELS = new UnicodeSet("[{\uFDD0\u2801}{\uFDD0\u2802}{\uFDD0\u2803}{\uFDD0\u2804}{\uFDD0\u2805}{\uFDD0\u2806}{\uFDD0\u2807}{\uFDD0\u2808}{\uFDD0\u2809}{\uFDD0\u280A}{\uFDD0\u280B}{\uFDD0\u280C}{\uFDD0\u280D}{\uFDD0\u280E}{\uFDD0\u280F}{\uFDD0\u2810}{\uFDD0\u2811}{\uFDD0\u2812}{\uFDD0\u2813}{\uFDD0\u2814}{\uFDD0\u2815}{\uFDD0\u2816}{\uFDD0\u2817}{\uFDD0\u2818}{\uFDD0\u2819}{\uFDD0\u281A}{\uFDD0\u281B}{\uFDD0\u281C}{\uFDD0\u281D}{\uFDD0\u281E}{\uFDD0\u281F}{\uFDD0\u2820}{\uFDD0\u2821}{\uFDD0\u2822}{\uFDD0\u2823}{\uFDD0\u2824}{\uFDD0\u2825}{\uFDD0\u2826}{\uFDD0\u2827}{\uFDD0\u2828}{\uFDD0\u2829}{\uFDD0\u282A}{\uFDD0\u282B}{\uFDD0\u282C}{\uFDD0\u282E}{\uFDD0\u2830}{\uFDD0\u2834}{\uFDD0\u2840}]").freeze(); + static final UnicodeSet RADICAL_LABELS = new UnicodeSet("[{\uFDD0\u2E80}{\uFDD0\u2E81}{\uFDD0\u2E84}{\uFDD0\u2E85}{\uFDD0\u2E86}{\uFDD0\u2E87}{\uFDD0\u2E88}{\uFDD0\u2E8A}{\uFDD0\u2E8B}{\uFDD0\u2E8C}{\uFDD0\u2E91}{\uFDD0\u2E92}{\uFDD0\u2E93}{\uFDD0\u2E95}{\uFDD0\u2E97}{\uFDD0\u2E98}{\uFDD0\u2E99}{\uFDD0\u2E9B}{\uFDD0\u2E9D}{\uFDD0\u2E9E}{\uFDD0\u2E9F}{\uFDD0\u2EA0}{\uFDD0\u2EA2}{\uFDD0\u2EA3}{\uFDD0\u2EA4}{\uFDD0\u2EA7}{\uFDD0\u2EA8}{\uFDD0\u2EA9}{\uFDD0\u2EAA}{\uFDD0\u2EAB}{\uFDD0\u2EAC}{\uFDD0\u2EAE}{\uFDD0\u2EAF}{\uFDD0\u2EB0}{\uFDD0\u2EB4}{\uFDD0\u2EB8}{\uFDD0\u2EB9}{\uFDD0\u2EBB}{\uFDD0\u2EBC}{\uFDD0\u2EBD}{\uFDD0\u2EC0}{\uFDD0\u2EC1}{\uFDD0\u2EC2}{\uFDD0\u2EC3}{\uFDD0\u2EC5}{\uFDD0\u2EC6}{\uFDD0\u2EC8}{\uFDD0\u2EC9}{\uFDD0\u2ECA}{\uFDD0\u2ECB}{\uFDD0\u2ECF}{\uFDD0\u2ED0}{\uFDD0\u2ED1}{\uFDD0\u2ED3}{\uFDD0\u2ED4}{\uFDD0\u2ED6}{\uFDD0\u2ED7}{\uFDD0\u2ED8}{\uFDD0\u2ED9}{\uFDD0\u2EDA}{\uFDD0\u2EDB}{\uFDD0\u2EDC}{\uFDD0\u2EDD}{\uFDD0\u2EE0}{\uFDD0\u2EE1}{\uFDD0\u2EE2}{\uFDD0\u2EE3}{\uFDD0\u2EE4}{\uFDD0\u2EE5}{\uFDD0\u2EE6}{\uFDD0\u2EE7}{\uFDD0\u2EE8}{\uFDD0\u2EEA}{\uFDD0\u2EEB}{\uFDD0\u2EED}{\uFDD0\u2EEE}{\uFDD0\u2EEF}{\uFDD0\u2EF0}{\uFDD0\u2EF2}{\uFDD0\u2EF3}{\uFDD0\u2F00}{\uFDD0\u2F01}{\uFDD0\u2F02}{\uFDD0\u2F03}{\uFDD0\u2F05}{\uFDD0\u2F06}{\uFDD0\u2F07}{\uFDD0\u2F09}{\uFDD0\u2F0A}{\uFDD0\u2F0B}{\uFDD0\u2F0D}{\uFDD0\u2F0E}{\uFDD0\u2F10}{\uFDD0\u2F12}{\uFDD0\u2F13}{\uFDD0\u2F14}{\uFDD0\u2F15}{\uFDD0\u2F16}{\uFDD0\u2F17}{\uFDD0\u2F1B}{\uFDD0\u2F1D}{\uFDD0\u2F1E}{\uFDD0\u2F1F}{\uFDD0\u2F20}{\uFDD0\u2F21}{\uFDD0\u2F22}{\uFDD0\u2F23}{\uFDD0\u2F24}{\uFDD0\u2F25}{\uFDD0\u2F26}{\uFDD0\u2F27}{\uFDD0\u2F28}{\uFDD0\u2F2B}{\uFDD0\u2F2C}{\uFDD0\u2F2D}{\uFDD0\u2F2E}{\uFDD0\u2F2F}{\uFDD0\u2F31}{\uFDD0\u2F32}{\uFDD0\u2F34}{\uFDD0\u2F35}{\uFDD0\u2F36}{\uFDD0\u2F37}{\uFDD0\u2F38}{\uFDD0\u2F3A}{\uFDD0\u2F3B}{\uFDD0\u2F3D}{\uFDD0\u2F3E}{\uFDD0\u2F40}{\uFDD0\u2F42}{\uFDD0\u2F43}{\uFDD0\u2F44}{\uFDD0\u2F45}{\uFDD0\u2F46}{\uFDD0\u2F48}{\uFDD0\u2F4A}{\uFDD0\u2F4B}{\uFDD0\u2F4C}{\uFDD0\u2F4E}{\uFDD0\u2F50}{\uFDD0\u2F51}{\uFDD0\u2F53}{\uFDD0\u2F57}{\uFDD0\u2F58}{\uFDD0\u2F59}{\uFDD0\u2F5A}{\uFDD0\u2F5B}{\uFDD0\u2F5E}{\uFDD0\u2F60}{\uFDD0\u2F61}{\uFDD0\u2F62}{\uFDD0\u2F63}{\uFDD0\u2F64}{\uFDD0\u2F65}{\uFDD0\u2F67}{\uFDD0\u2F68}{\uFDD0\u2F69}{\uFDD0\u2F6A}{\uFDD0\u2F6B}{\uFDD0\u2F6D}{\uFDD0\u2F6E}{\uFDD0\u2F6F}{\uFDD0\u2F71}{\uFDD0\u2F72}{\uFDD0\u2F73}{\uFDD0\u2F74}{\uFDD0\u2F76}{\uFDD0\u2F78}{\uFDD0\u2F7B}{\uFDD0\u2F7D}{\uFDD0\u2F7E}{\uFDD0\u2F7F}{\uFDD0\u2F82}{\uFDD0\u2F83}{\uFDD0\u2F84}{\uFDD0\u2F86}{\uFDD0\u2F87}{\uFDD0\u2F88}{\uFDD0\u2F89}{\uFDD0\u2F8A}{\uFDD0\u2F8D}{\uFDD0\u2F8E}{\uFDD0\u2F8F}{\uFDD0\u2F92}{\uFDD0\u2F94}{\uFDD0\u2F95}{\uFDD0\u2F96}{\uFDD0\u2F97}{\uFDD0\u2F98}{\uFDD0\u2F99}{\uFDD0\u2F9A}{\uFDD0\u2F9B}{\uFDD0\u2F9D}{\uFDD0\u2F9E}{\uFDD0\u2F9F}{\uFDD0\u2FA0}{\uFDD0\u2FA1}{\uFDD0\u2FA3}{\uFDD0\u2FA4}{\uFDD0\u2FA5}{\uFDD0\u2FA6}{\uFDD0\u2FA8}{\uFDD0\u2FAA}{\uFDD0\u2FAB}{\uFDD0\u2FAE}{\uFDD0\u2FAF}{\uFDD0\u2FB0}{\uFDD0\u2FB1}{\uFDD0\u2FB2}{\uFDD0\u2FB3}{\uFDD0\u2FB4}{\uFDD0\u2FB5}{\uFDD0\u2FB6}{\uFDD0\u2FB9}{\uFDD0\u2FBA}{\uFDD0\u2FBC}{\uFDD0\u2FBD}{\uFDD0\u2FBE}{\uFDD0\u2FBF}{\uFDD0\u2FC0}{\uFDD0\u2FC2}{\uFDD0\u2FC3}{\uFDD0\u2FC4}{\uFDD0\u2FC5}{\uFDD0\u2FC6}{\uFDD0\u2FC7}{\uFDD0\u2FC8}{\uFDD0\u2FC9}{\uFDD0\u2FCA}{\uFDD0\u2FCB}{\uFDD0\u2FCC}{\uFDD0\u2FCD}{\uFDD0\u2FCE}{\uFDD0\u2FCF}{\uFDD0\u2FD0}{\uFDD0\u2FD1}{\uFDD0\u2FD5}]").freeze(); + static final List PROBES = Arrays.asList("\u4E00", "\uFDD0A", "\uFDD0\u2801", "\uFDD0\u2E80"); + static final int PINYIN_PROBE_INDEX = 1; + static final UnicodeSet[] MATCHING = {null, PINYIN_LABELS, STROKE_LABELS, RADICAL_LABELS}; private static final char CGJ = '\u034F'; - private static final UnicodeSet ALPHABETIC = new UnicodeSet("[[:alphabetic:]-[:mark:]]"); + private static final UnicodeSet ALPHABETIC = new UnicodeSet("[[:alphabetic:]-[:mark:]]").add(BASE).freeze(); private static final UnicodeSet HANGUL = new UnicodeSet( - "[\uAC00 \uB098 \uB2E4 \uB77C \uB9C8 \uBC14 \uC0AC \uC544 \uC790 \uCC28 \uCE74 \uD0C0 \uD30C \uD558]"); - private static final UnicodeSet ETHIOPIC = new UnicodeSet("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"); - private static final UnicodeSet CORE_LATIN = new UnicodeSet("[a-z]"); + "[\uAC00 \uB098 \uB2E4 \uB77C \uB9C8 \uBC14 \uC0AC \uC544 \uC790 \uCC28 \uCE74 \uD0C0 \uD30C \uD558]").freeze(); + private static final UnicodeSet ETHIOPIC = new UnicodeSet("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]").freeze(); + private static final UnicodeSet CORE_LATIN = new UnicodeSet("[a-z]").freeze(); private final RuleBasedCollator collatorOriginal; private final RuleBasedCollator collatorPrimaryOnly; @@ -160,7 +172,7 @@ public final class AlphabeticIndex implements Iterable> { private String overflowLabel = "\u2026"; private String underflowLabel = "\u2026"; private String inflowLabel = "\u2026"; - private LangType langType; + private boolean hasPinyin; /** * Create the index object. @@ -171,7 +183,7 @@ public final class AlphabeticIndex implements Iterable> { * @provisional This API might change or be removed in a future release. */ public AlphabeticIndex(ULocale locale) { - this(locale, null, getIndexExemplars(locale)); + this(locale, null, null); } /** @@ -186,52 +198,53 @@ public final class AlphabeticIndex implements Iterable> { this(ULocale.forLocale(locale)); } - /** - * @internal - * @deprecated This API is ICU internal only, for testing purposes and use with CLDR. - */ - public enum LangType { - /** - * @internal - * @deprecated This API is ICU internal only, for testing purposes and use with CLDR. - */ - NORMAL, - /** - * @internal - * @deprecated This API is ICU internal only, for testing purposes and use with CLDR. - */ - SIMPLIFIED, - /** - * @internal - * @deprecated This API is ICU internal only, for testing purposes and use with CLDR. - */ - TRADITIONAL; - /** - * @internal - * @deprecated This API is ICU internal only, for testing purposes and use with CLDR. - */ - public static LangType fromLocale(ULocale locale) { - String lang = locale.getLanguage(); - if (lang.equals("zh")) { - if ("Hant".equals(locale.getScript()) || "TW".equals(locale.getCountry())) { - return TRADITIONAL; - } - return SIMPLIFIED; - } - return NORMAL; - } - } + // /** + // * @internal + // * @deprecated This API is ICU internal only, for testing purposes and use with CLDR. + // */ + // public enum LangType { + // /** + // * @internal + // * @deprecated This API is ICU internal only, for testing purposes and use with CLDR. + // */ + // NORMAL, + // /** + // * @internal + // * @deprecated This API is ICU internal only, for testing purposes and use with CLDR. + // */ + // SIMPLIFIED, + // /** + // * @internal + // * @deprecated This API is ICU internal only, for testing purposes and use with CLDR. + // */ + // TRADITIONAL; + // /** + // * @internal + // * @deprecated This API is ICU internal only, for testing purposes and use with CLDR. + // */ + // public static LangType fromLocale(ULocale locale) { + // String lang = locale.getLanguage(); + // if (lang.equals("zh")) { + // if ("Hant".equals(locale.getScript()) || "TW".equals(locale.getCountry())) { + // return TRADITIONAL; + // } + // return SIMPLIFIED; + // } + // return NORMAL; + // } + // } /** * @internal * @deprecated This API is ICU internal only, for testing purposes and use with CLDR. */ public AlphabeticIndex(ULocale locale, RuleBasedCollator collator, UnicodeSet exemplarChars) { - langType = LangType.fromLocale(locale); - // HACK because we have to know the type of the collation for Chinese - if (langType != LangType.NORMAL) { - locale = locale.setKeywordValue("collation", langType == LangType.TRADITIONAL ? "stroke" : "pinyin"); - } + // langType = LangType.fromLocale(locale); + // // HACK because we have to know the type of the collation for Chinese + // if (langType != LangType.NORMAL) { + // locale = locale.setKeywordValue("collation", langType == LangType.TRADITIONAL ? "stroke" : "pinyin"); + // } + hasPinyin = false; collatorOriginal = collator != null ? collator : (RuleBasedCollator) Collator.getInstance(locale); try { collatorPrimaryOnly = (RuleBasedCollator) (collatorOriginal.clone()); @@ -240,6 +253,9 @@ public final class AlphabeticIndex implements Iterable> { throw new IllegalStateException("Collator cannot be cloned", e); } collatorPrimaryOnly.setStrength(Collator.PRIMARY); + if (exemplarChars == null) { + exemplarChars = getIndexExemplars(locale); + } addLabels(exemplarChars); } @@ -455,10 +471,42 @@ public final class AlphabeticIndex implements Iterable> { * @param locale * @return */ - private static UnicodeSet getIndexExemplars(ULocale locale) { - UnicodeSet exemplars = LocaleData.getExemplarSet(locale, 0, LocaleData.ES_INDEX); + private UnicodeSet getIndexExemplars(ULocale locale) { + UnicodeSet exemplars; + exemplars = LocaleData.getExemplarSet(locale, 0, LocaleData.ES_INDEX); if (exemplars != null) { + // HACK + final String language = locale.getLanguage(); + if (language.equals("zh") || language.equals("ja") || language.equals("ko")) { + // find out which one we are using + TreeSet probeSet = new TreeSet(collatorOriginal); + + // UnicodeSet tailored = collatorOriginal.getTailoredSet(); + // tailored.addAllTo(probeSet); + // System.out.println(probeSet); + // probeSet.clear(); + + probeSet.addAll(PROBES); + String first = probeSet.iterator().next(); + int location = PROBES.indexOf(first); + if (location > 0) { + if (location == PINYIN_PROBE_INDEX) { + hasPinyin = true; + } + exemplars.clear().addAll(MATCHING[location]); + } + } + // LangType langType2 = LangType.fromLocale(locale); + // if (langType2 == LangType.TRADITIONAL) { + // Collator collator = Collator.getInstance(locale); + // if (collator.getTailoredSet().contains(probeCharInLongStroke)) { + // exemplars = HACK_LONG_TRAD_EXEMPLARS; + // } else { + // exemplars = HACK_SHORT_TRAD_EXEMPLARS; + // } + // return exemplars; + // } return exemplars; } @@ -487,7 +535,7 @@ public final class AlphabeticIndex implements Iterable> { } } } - + UnicodeSet uppercased = new UnicodeSet(); for (String item : exemplars) { uppercased.add(UCharacter.toUpperCase(locale, item)); @@ -597,29 +645,52 @@ public final class AlphabeticIndex implements Iterable> { if (buckets == null) { initBuckets(); } - if (langType == LangType.SIMPLIFIED) { - String hackPrefix = hackName(name, collatorPrimaryOnly); - if (hackPrefix != null) { - name = hackPrefix + name; - } - } + // if (langType == LangType.SIMPLIFIED) { + // String hackPrefix = hackName(name, collatorPrimaryOnly); + // if (hackPrefix != null) { + // name = hackPrefix + name; + // } + // } return rawGetBucketIndex(name); } private int rawGetBucketIndex(CharSequence name) { // TODO use a binary search - int result = -1; - for (Bucket bucket : buckets) { + int result = 0; + Bucket lastBucket = null; + Bucket bucket = null; + for (Iterator> it = buckets.fullIterator(); it.hasNext();) { + bucket = it.next(); if (bucket.lowerBoundary == null) { // last bucket - return result; + bucket = lastBucket; // back up the bucket + --result; + break; } - int comp = collatorPrimaryOnly.compare(name, bucket.lowerBoundary); - if (comp < 0) { // the first boundary is always "", and so -1 will never be returned - return result; - } else if (comp == 0) { - return result + 1; + int bucketLower2name = collatorPrimaryOnly.compare(bucket.lowerBoundary, name); + if (bucketLower2name > 0) { // the first boundary is always "", and so -1 will never be returned + bucket = lastBucket; // back up the bucket + --result; + break; + } else if (bucketLower2name == 0) { + break; } result++; + lastBucket = bucket; + } + // we will always have at least one bucket + // see if we need to remap + if (buckets.rebucket != null) { + Bucket temp = buckets.rebucket.get(bucket); + if (temp != null) { + bucket = temp; + } + result = 0; + for (Bucket bucket2 : buckets) { + if (bucket2 == bucket) { + break; + } + ++result; + } } return result; } @@ -701,21 +772,21 @@ public final class AlphabeticIndex implements Iterable> { } }; - // If we have Pinyin, then we have a special hack to bucket items with ASCII. - if (langType == LangType.SIMPLIFIED) { - Map> rebucketMap = new HashMap>(); - for (Record name : inputList) { - String key = hackName(name.name, collatorOriginal); - if (key == null) continue; - Bucket bucket = rebucketMap.get(key); - if (bucket == null) { - int index = rawGetBucketIndex(key); - bucket = buckets.bucketList.get(index); - } - rebucketMap.put(key, bucket); - name.rebucket = bucket; - } - } + // // If we have Pinyin, then we have a special hack to bucket items with ASCII. + // if (hasPinyin) { + // Map> rebucketMap = new HashMap>(); + // for (Record name : inputList) { + // String key = hackName(name.name, collatorOriginal); + // if (key == null) continue; + // Bucket bucket = rebucketMap.get(key); + // if (bucket == null) { + // int index = rawGetBucketIndex(key); + // bucket = buckets.bucketList.get(index); + // } + // rebucketMap.put(key, bucket); + // name.rebucket = bucket; + // } + // } // Set up a sorted list of the input TreeSet> sortedInput = new TreeSet>(fullComparator); @@ -727,17 +798,17 @@ public final class AlphabeticIndex implements Iterable> { // However, if the user adds item at a time and then gets the buckets, this isn't efficient, so // we need to improve it for that case. - Iterator> bucketIterator = buckets.iterator(); + Iterator> bucketIterator = buckets.fullIterator(); Bucket currentBucket = bucketIterator.next(); Bucket nextBucket = bucketIterator.next(); String upperBoundary = nextBucket.lowerBoundary; // there is always at least one bucket, so this is safe boolean atEnd = false; for (Record s : sortedInput) { - // special hack for pinyin - if (s.rebucket != null) { - s.rebucket.records.add(s); - continue; - } +// // special hack for pinyin +// if (s.rebucket != null) { +// s.rebucket.records.add(s); +// continue; +// } // if the current bucket isn't the right one, find the one that is // We have a special flag for the last bucket so that we don't look any further while (!atEnd && collatorPrimaryOnly.compare(s.name, upperBoundary) >= 0) { @@ -754,7 +825,7 @@ public final class AlphabeticIndex implements Iterable> { } } // now put the record into the bucket. - currentBucket.records.add(s); + buckets.addTo(s, currentBucket); } } @@ -825,6 +896,9 @@ public final class AlphabeticIndex implements Iterable> { } private static UnicodeSet getScriptSet(String codePoint) { + if (codePoint.startsWith(BASE)) { + return new UnicodeSet(UNIHAN); + } return new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, UScript.getScript(codePoint.codePointAt(0))); } @@ -870,7 +944,7 @@ public final class AlphabeticIndex implements Iterable> { * @provisional This API might change or be removed in a future release. */ public static class Record { - private Bucket rebucket = null; // special hack for Pinyin + //private Bucket rebucket = null; // special hack for Pinyin private CharSequence name; private V data; private int counter; @@ -909,7 +983,9 @@ public final class AlphabeticIndex implements Iterable> { * @provisional This API might change or be removed in a future release. */ public String toString() { - return name + "=" + data + (rebucket == null ? "" : "{" + rebucket.label + "}"); + return name + "=" + data + //+ (rebucket == null ? "" : "{" + rebucket.label + "}") + ; } } @@ -951,11 +1027,19 @@ public final class AlphabeticIndex implements Iterable> { * @provisional This API might change or be removed in a future release. */ private Bucket(String label, String lowerBoundary, LabelType labelType) { + // String hackLabel = HACK_TRADITIONAL.get(label); + // if (hackLabel != null) { + // label = hackLabel; + // } this.label = label; this.lowerBoundary = lowerBoundary; this.labelType = labelType; } + String getLowerBoundary() { + return lowerBoundary; + } + /** * Get the label * @@ -1017,9 +1101,11 @@ public final class AlphabeticIndex implements Iterable> { } private class BucketList implements Iterable> { - private ArrayList> bucketList = new ArrayList>(); + private final ArrayList> bucketList = new ArrayList>(); + private final HashMap,Bucket> rebucket; + private final List> immutableVisibleList; - BucketList() { + private BucketList() { // initialize indexCharacters; List indexCharacters = initLabels(); @@ -1029,7 +1115,7 @@ public final class AlphabeticIndex implements Iterable> { // fix up the list, adding underflow, additions, overflow // insert infix labels as needed, using \uFFFF. String last = indexCharacters.get(0); - bucketList.add(new Bucket(last, last, Bucket.LabelType.NORMAL)); + bucketList.add(new Bucket(fixLabel(last), last, Bucket.LabelType.NORMAL)); UnicodeSet lastSet = getScriptSet(last).removeAll(IGNORE_SCRIPTS); for (int i = 1; i < indexCharacters.size(); ++i) { @@ -1041,121 +1127,285 @@ public final class AlphabeticIndex implements Iterable> { if (collatorPrimaryOnly.compare(overflowComparisonString, current) < 0) { bucketList.add(new Bucket(getInflowLabel(), overflowComparisonString, Bucket.LabelType.INFLOW)); - i++; + //i++; lastSet = set; } } - bucketList.add(new Bucket(current, current, Bucket.LabelType.NORMAL)); + bucketList.add(new Bucket(fixLabel(current), current, Bucket.LabelType.NORMAL)); last = current; lastSet = set; } // overflow bucket String limitString = getOverflowComparisonString(last); - bucketList.add(new Bucket(getOverflowLabel(), limitString, Bucket.LabelType.OVERFLOW)); // final, - + bucketList.add(new Bucket(getOverflowLabel(), limitString, Bucket.LabelType.OVERFLOW)); // final + + // add some redirects for Pinyin + + ArrayList> publicBucketList; + if (hasPinyin) { + rebucket = new HashMap,Bucket>(); + publicBucketList = new ArrayList>(); + HashMap> rebucketLabel = new HashMap>(); + Bucket flowBefore = null; // special handling for flow bucket before pinyin + boolean flowRedirect = false; + boolean havePinyin = false; + + for (Bucket bucket : bucketList) { + String label = bucket.getLabel(); + String lowerBound = bucket.getLowerBoundary(); + if (lowerBound != null && lowerBound.startsWith(BASE)) { // pinyin + rebucket.put(bucket, rebucketLabel.get(label)); + havePinyin = true; + } else { // not pinyin + if (bucket.labelType != LabelType.NORMAL) { // special handling for flows + if (flowRedirect == false) { + if (havePinyin) { + // do a redirect from the last before pinyin to the first before; + // we do it this way so that the buckets are joined, and any between stuff goes to the end + // eg a b c alpha chinese gorp + // we want to show as ... a b c ... with the alpha and gorp both in the final bucket. + rebucket.put(flowBefore, bucket); + publicBucketList.remove(flowBefore); + flowRedirect = true; + } else { + flowBefore = bucket; + } + } + } else { // is NORMAL + rebucketLabel.put(label, bucket); + } + publicBucketList.add(bucket); + } + } + } else { + rebucket = null; + publicBucketList = bucketList; + } + immutableVisibleList = Collections.unmodifiableList(publicBucketList); } - public Iterator> iterator() { - return bucketList.iterator(); - } - } - - /* - * HACKS - */ - - /** - * Only gets called for simplified Chinese. Uses further hack to distinguish long from short pinyin table. - */ - private String hackName(CharSequence name, RuleBasedCollator comparator) { - if (!UNIHAN.contains(Character.codePointAt(name, 0))) { - return null; - } - synchronized (PINYIN_LOWER_BOUNDS_LONG) { - if (PINYIN_LOWER_BOUNDS == null) { - if (comparator.getTailoredSet().contains(probeCharInLong)) { - PINYIN_LOWER_BOUNDS = PINYIN_LOWER_BOUNDS_LONG; - HACK_PINYIN_LOOKUP = HACK_PINYIN_LOOKUP_LONG; - } else { - PINYIN_LOWER_BOUNDS = PINYIN_LOWER_BOUNDS_SHORT; - HACK_PINYIN_LOOKUP = HACK_PINYIN_LOOKUP_SHORT; + /** + * @param s + * @param currentBucket + */ + private void addTo(Record s, Bucket currentBucket) { + if (rebucket != null) { + Bucket newBucket = rebucket.get(currentBucket); + if (newBucket != null) { + currentBucket = newBucket; } } + currentBucket.records.add(s); } - int index = Arrays.binarySearch(HACK_PINYIN_LOOKUP, name, comparator); - if (index < 0) { - index = -index - 2; - } - return PINYIN_LOWER_BOUNDS.substring(index, index + 1); - } - private static String PINYIN_LOWER_BOUNDS; + /** + * @param current + * @return + */ + private String fixLabel(String current) { + if (!current.startsWith(BASE)) { + return current; + } + int rest = current.charAt(1); + if (0x2800 < rest && rest <= 0x28FF) { // stroke count + return (rest-0x2800) + "\u5283"; // HACK + } + return current.substring(1); + } - private static String[] HACK_PINYIN_LOOKUP; + /** + * Private iterator over all the buckets, visible and invisible + */ + private Iterator> fullIterator() { + return bucketList.iterator(); + } + /** + * Iterator over just the visible buckets. + */ + public Iterator> iterator() { + return immutableVisibleList.iterator(); // use immutable list to prevent remove(). + } + } - /** + /* * HACKS - * Generated with org.unicode.draft.GenerateUnihanCollator. */ - private int probeCharInLong = 0x28EAD; - - private static String PINYIN_LOWER_BOUNDS_LONG = "\u0101bcd\u0113fghjkl\u1E3F\u0144\u014Dpqrstwxyz"; - - private static String[] HACK_PINYIN_LOOKUP_LONG = { - "", // A - "\u516B", // b : \u516B [b\u0101] - "\uD863\uDEAD", // c : \U00028EAD [c\u0101] - "\uD844\uDE51", // d : \U00021251 [d\u0101] - "\u59B8", // e : \u59B8 [\u0113] - "\u53D1", // f : \u53D1 [f\u0101] - "\uD844\uDE45", // g : \U00021245 [g\u0101] - "\u54C8", // h : \u54C8 [h\u0101] - "\u4E0C", // j : \u4E0C [j\u012B] - "\u5494", // k : \u5494 [k\u0101] - "\u3547", // l : \u3547 [l\u0101] - "\u5452", // m : \u5452 [\u1E3F] - "\u5514", // n : \u5514 [\u0144] - "\u5594", // o : \u5594 [\u014D] - "\uD84F\uDC7A", // p : \U00023C7A [p\u0101] - "\u4E03", // q : \u4E03 [q\u012B] - "\u513F", // r : \u513F [r] - "\u4EE8", // s : \u4EE8 [s\u0101] - "\u4ED6", // t : \u4ED6 [t\u0101] - "\u7A75", // w : \u7A75 [w\u0101] - "\u5915", // x : \u5915 [x\u012B] - "\u4E2B", // y : \u4E2B [y\u0101] - "\u5E00", // z : \u5E00 [z\u0101] - }; - - private static String PINYIN_LOWER_BOUNDS_SHORT = "\u0101bcd\u0113fghjkl\u1E3F\u0144\u014Dpqrstwxyz"; - - private static String[] HACK_PINYIN_LOOKUP_SHORT = { - "", // A - "\u516B", // b : \u516B [b\u0101] - "\u5693", // c : \u5693 [c\u0101] - "\u5491", // d : \u5491 [d\u0101] - "\u59B8", // e : \u59B8 [\u0113] - "\u53D1", // f : \u53D1 [f\u0101] - "\u65EE", // g : \u65EE [g\u0101] - "\u54C8", // h : \u54C8 [h\u0101] - "\u4E0C", // j : \u4E0C [j\u012B] - "\u5494", // k : \u5494 [k\u0101] - "\u3547", // l : \u3547 [l\u0101] - "\u5452", // m : \u5452 [\u1E3F] - "\u5514", // n : \u5514 [\u0144] - "\u5594", // o : \u5594 [\u014D] - "\u5991", // p : \u5991 [p\u0101] - "\u4E03", // q : \u4E03 [q\u012B] - "\u513F", // r : \u513F [r] - "\u4EE8", // s : \u4EE8 [s\u0101] - "\u4ED6", // t : \u4ED6 [t\u0101] - "\u7A75", // w : \u7A75 [w\u0101] - "\u5915", // x : \u5915 [x\u012B] - "\u4E2B", // y : \u4E2B [y\u0101] - "\u5E00", // z : \u5E00 [z\u0101] - }; + // /** + // * Only gets called for simplified Chinese. Uses further hack to distinguish long from short pinyin table. + // */ + // private String hackName(CharSequence name, RuleBasedCollator comparator) { + // if (!UNIHAN.contains(Character.codePointAt(name, 0))) { + // return null; + // } + // synchronized (PINYIN_LOWER_BOUNDS_LONG) { + // if (PINYIN_LOWER_BOUNDS == null) { + // if (comparator.getTailoredSet().contains(probeCharInLong)) { + // PINYIN_LOWER_BOUNDS = PINYIN_LOWER_BOUNDS_LONG; + // HACK_PINYIN_LOOKUP = HACK_PINYIN_LOOKUP_LONG; + // } else { + // PINYIN_LOWER_BOUNDS = PINYIN_LOWER_BOUNDS_SHORT; + // HACK_PINYIN_LOOKUP = HACK_PINYIN_LOOKUP_SHORT; + // } + // } + // } + // int index = Arrays.binarySearch(HACK_PINYIN_LOOKUP, name, comparator); + // if (index < 0) { + // index = -index - 2; + // } + // return PINYIN_LOWER_BOUNDS.substring(index, index + 1); + // } + // + // private static String PINYIN_LOWER_BOUNDS; + // + // private static String[] HACK_PINYIN_LOOKUP; + // + // + // /** + // * HACKS + // * Generated with org.unicode.draft.GenerateUnihanCollator. + // */ + // + // private static final int probeCharInLong = 0x28EAD; + // private static final int probeCharInLongStroke = 0x2A6A5; + // + // private static final String PINYIN_LOWER_BOUNDS_LONG = "\u0101bcd\u0113fghjkl\u1E3F\u0144\u014Dpqrstwxyz"; + // + // private static final String[] HACK_PINYIN_LOOKUP_LONG = { + // "", // A + // "\u516B", // b : \u516B [b\u0101] + // "\uD863\uDEAD", // c : \U00028EAD [c\u0101] + // "\uD844\uDE51", // d : \U00021251 [d\u0101] + // "\u59B8", // e : \u59B8 [\u0113] + // "\u53D1", // f : \u53D1 [f\u0101] + // "\uD844\uDE45", // g : \U00021245 [g\u0101] + // "\u54C8", // h : \u54C8 [h\u0101] + // "\u4E0C", // j : \u4E0C [j\u012B] + // "\u5494", // k : \u5494 [k\u0101] + // "\u3547", // l : \u3547 [l\u0101] + // "\u5452", // m : \u5452 [\u1E3F] + // "\u5514", // n : \u5514 [\u0144] + // "\u5594", // o : \u5594 [\u014D] + // "\uD84F\uDC7A", // p : \U00023C7A [p\u0101] + // "\u4E03", // q : \u4E03 [q\u012B] + // "\u513F", // r : \u513F [r] + // "\u4EE8", // s : \u4EE8 [s\u0101] + // "\u4ED6", // t : \u4ED6 [t\u0101] + // "\u7A75", // w : \u7A75 [w\u0101] + // "\u5915", // x : \u5915 [x\u012B] + // "\u4E2B", // y : \u4E2B [y\u0101] + // "\u5E00", // z : \u5E00 [z\u0101] + // }; + // + // private static String PINYIN_LOWER_BOUNDS_SHORT = "\u0101bcd\u0113fghjkl\u1E3F\u0144\u014Dpqrstwxyz"; + // + // private static String[] HACK_PINYIN_LOOKUP_SHORT = { + // "", // A + // "\u516B", // b : \u516B [b\u0101] + // "\u5693", // c : \u5693 [c\u0101] + // "\u5491", // d : \u5491 [d\u0101] + // "\u59B8", // e : \u59B8 [\u0113] + // "\u53D1", // f : \u53D1 [f\u0101] + // "\u65EE", // g : \u65EE [g\u0101] + // "\u54C8", // h : \u54C8 [h\u0101] + // "\u4E0C", // j : \u4E0C [j\u012B] + // "\u5494", // k : \u5494 [k\u0101] + // "\u3547", // l : \u3547 [l\u0101] + // "\u5452", // m : \u5452 [\u1E3F] + // "\u5514", // n : \u5514 [\u0144] + // "\u5594", // o : \u5594 [\u014D] + // "\u5991", // p : \u5991 [p\u0101] + // "\u4E03", // q : \u4E03 [q\u012B] + // "\u513F", // r : \u513F [r] + // "\u4EE8", // s : \u4EE8 [s\u0101] + // "\u4ED6", // t : \u4ED6 [t\u0101] + // "\u7A75", // w : \u7A75 [w\u0101] + // "\u5915", // x : \u5915 [x\u012B] + // "\u4E2B", // y : \u4E2B [y\u0101] + // "\u5E00", // z : \u5E00 [z\u0101] + // }; + // + // private static final Map HACK_TRADITIONAL; + // static { + // Map temp = new HashMap(); + // temp.put("\u4E00", "1\u5283"); + // temp.put("\u4E01", "2\u5283"); + // temp.put("\u4E07", "3\u5283"); + // temp.put("\u4E0D", "4\u5283"); + // temp.put("\u4E17", "5\u5283"); + // temp.put("\u3401", "6\u5283"); + // temp.put("\u4E23", "7\u5283"); + // temp.put("\u4E26", "8\u5283"); + // temp.put("\u4E34", "9\u5283"); + // temp.put("\uD840\uDC35", "9\u5283"); + // temp.put("\uD840\uDC3E", "10\u5283"); + // temp.put("\uD840\uDC3D", "10\u5283"); + // temp.put("\u3422", "11\u5283"); + // temp.put("\uD840\uDC41", "11\u5283"); + // temp.put("\uD840\uDC46", "12\u5283"); + // temp.put("\u4E82", "13\u5283"); + // temp.put("\uD840\uDC4C", "13\u5283"); + // temp.put("\uD840\uDC4E", "14\u5283"); + // temp.put("\u3493", "15\u5283"); + // temp.put("\uD840\uDC53", "15\u5283"); + // temp.put("\u4EB8", "16\u5283"); + // temp.put("\uD840\uDC55", "16\u5283"); + // temp.put("\u511F", "17\u5283"); + // temp.put("\uD840\uDC56", "17\u5283"); + // temp.put("\u512D", "18\u5283"); + // temp.put("\uD840\uDC5F", "18\u5283"); + // temp.put("\u3426", "19\u5283"); + // temp.put("\uD840\uDC7A", "19\u5283"); + // temp.put("\u34A5", "20\u5283"); + // temp.put("\uD840\uDC60", "20\u5283"); + // temp.put("\u34A7", "21\u5283"); + // temp.put("\uD840\uDD9E", "21\u5283"); + // temp.put("\u4EB9", "22\u5283"); + // temp.put("\uD840\uDC7B", "22\u5283"); + // temp.put("\u513D", "23\u5283"); + // temp.put("\uD840\uDCC8", "23\u5283"); + // temp.put("\u513E", "24\u5283"); + // temp.put("\uD840\uDD9F", "24\u5283"); + // temp.put("\u56D4", "25\u5283"); + // temp.put("\uD842\uDCCA", "25\u5283"); + // temp.put("\u3536", "26\u5283"); + // temp.put("\u34AA", "26\u5283"); + // temp.put("\u7065", "27\u5283"); + // temp.put("\uD842\uDE0B", "27\u5283"); + // temp.put("\u56D6", "28\u5283"); + // temp.put("\uD840\uDDA0", "28\u5283"); + // temp.put("\u7E9E", "29\u5283"); + // temp.put("\uD840\uDDA1", "29\u5283"); + // temp.put("\u53B5", "30\u5283"); + // temp.put("\uD842\uDD6C", "30\u5283"); + // temp.put("\u7069", "31\u5283"); + // temp.put("\uD844\uDD9F", "31\u5283"); + // temp.put("\u706A", "32\u5283"); + // temp.put("\uD842\uDED1", "32\u5283"); + // temp.put("\uD846\uDD3B", "33\u5283"); + // temp.put("\uD842\uDE0C", "33\u5283"); + // temp.put("\uD842\uDCCB", "34\u5283"); + // temp.put("\u9F7E", "35\u5283"); + // temp.put("\uD84C\uDF5C", "35\u5283"); + // temp.put("\u9F49", "36\u5283"); + // temp.put("\uD845\uDD19", "36\u5283"); + // temp.put("\uD86B\uDE9A", "37\u5283"); + // temp.put("\uD861\uDC04", "38\u5283"); + // temp.put("\u9750", "39\u5283"); + // temp.put("\uD845\uDD1A", "39\u5283"); + // temp.put("\uD864\uDDD3", "40\u5283"); + // temp.put("\uD869\uDCCA", "41\u5283"); + // temp.put("\uD85A\uDDC4", "42\u5283"); + // temp.put("\uD85C\uDD98", "43\u5283"); + // temp.put("\uD85E\uDCB1", "44\u5283"); + // temp.put("\uD865\uDE63", "46\u5283"); + // temp.put("\u9F98", "48\u5283"); + // temp.put("\uD85A\uDDC5", "48\u5283"); + // temp.put("\u4A3B", "52\u5283"); + // temp.put("\uD841\uDD3B", "64\u5283"); + // HACK_TRADITIONAL = Collections.unmodifiableMap(temp); + // } /** * HACKS @@ -1177,6 +1427,14 @@ public final class AlphabeticIndex implements Iterable> { "\uD802\uDF40", "\uD802\uDF60", "\uD800\uDF80", "\uD800\uDFA0", "\uD808\uDC00", "\uD80C\uDC00", "\u4E00" }); + // private static final UnicodeSet HACK_SHORT_TRAD_EXEMPLARS = new UnicodeSet( + // "[\u3401 \u3422 \u3426 \u3493 \u34A5 \u34A7 \u3536 \u4E00 \u4E01 \u4E07 \u4E0D \u4E17 \u4E23 \u4E26 \u4E34 \u4E82 \u4EB8 \u4EB9 \u511F \u512D \u513D" + + // " \u513E \u53B5 \u56D4 \u56D6 \u7065 \u7069 \u706A \u7E9E \u9750 \u9F49 \u9F7E \u9F98 \\U0002003E \\U00020046 \\U0002004E \\U0002193B]").freeze(); + // private static final UnicodeSet HACK_LONG_TRAD_EXEMPLARS = new UnicodeSet( + // "[\u3401\u34AA\u4A3B\u4E00\u4E01\u4E07\u4E0D\u4E17\u4E23\u4E26" + + // "\\U00020035\\U0002003D\\U00020041\\U00020046\\U0002004C\\U0002004E\\U00020053\\U00020055\\U00020056\\U0002005F\\U00020060\\U0002007A\\U0002007B\\U000200C8" + + // "\\U0002019E-\\U000201A1\\U0002053B\\U000208CA\\U000208CB\\U0002096C\\U00020A0B\\U00020A0C\\U00020AD1\\U0002119F\\U00021519\\U0002151A\\U0002335C\\U000269C4" + + // "\\U000269C5\\U00027198\\U000278B1\\U00028404\\U000291D3\\U00029663\\U0002A4CA\\U0002AE9A]").freeze(); /** * Only for testing... * @internal diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/AlphabeticIndexTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/AlphabeticIndexTest.java index 40a31283799..68df9f49f97 100644 --- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/AlphabeticIndexTest.java +++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/AlphabeticIndexTest.java @@ -20,7 +20,9 @@ import java.util.TreeSet; import com.ibm.icu.dev.test.TestFmwk; import com.ibm.icu.dev.test.util.CollectionUtilities; import com.ibm.icu.impl.ICUDebug; +import com.ibm.icu.impl.ICUResourceBundle; import com.ibm.icu.impl.Row; +import com.ibm.icu.impl.Utility; import com.ibm.icu.impl.Row.R4; import com.ibm.icu.lang.UProperty; import com.ibm.icu.lang.UScript; @@ -35,6 +37,9 @@ import com.ibm.icu.text.RawCollationKey; import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.ULocale.Type; +import com.ibm.icu.util.UResourceBundle; +import com.ibm.icu.util.UResourceBundleIterator; /** * @author markdavis @@ -92,7 +97,7 @@ public class AlphabeticIndexTest extends TestFmwk { /* Ukrainian*/ {"uk", "\u0410:\u0411:\u0412:\u0413:\u0490:\u0414:\u0415:\u0404:\u0416:\u0417:\u0418:\u0406:\u0407:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u0429:\u042E:\u042F"}, /* Vietnamese*/ {"vi", "A:\u0102:\u00C2:B:C:D:\u0110:E:\u00CA:F:G:H:I:J:K:L:M:N:O:\u00D4:\u01A0:P:Q:R:S:T:U:\u01AF:V:W:X:Y:Z"}, /* Chinese*/ {"zh", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, - /* Chinese (Traditional Han)*/ {"zh_Hant", "\u4E00:\u4E01:\u4E08:\u4E0D:\u4E14:\u4E1E:\u4E32:\u4E26:\u4EAD:\u4E58:\u4E7E:\u5080:\u4E82:\u50CE:\u50F5:\u5110:\u511F:\u53E2:\u5133:\u56B4:\u5137:\u513B:\u56CC:\u56D1:\u5EF3"}, + /* Chinese (Traditional Han)*/ {"zh_Hant", "1\u5283:2\u5283:3\u5283:4\u5283:5\u5283:6\u5283:7\u5283:8\u5283:9\u5283:10\u5283:11\u5283:12\u5283:13\u5283:14\u5283:15\u5283:16\u5283:17\u5283:18\u5283:19\u5283:20\u5283:21\u5283:22\u5283:23\u5283:24\u5283:25\u5283:26\u5283:27\u5283:28\u5283:29\u5283:30\u5283:31\u5283:32\u5283:33\u5283:35\u5283:36\u5283:39\u5283:48\u5283"}, // Comment these out to make the test run faster. Later, make these run under extended @@ -167,6 +172,79 @@ public class AlphabeticIndexTest extends TestFmwk { public static void main(String[] args) throws Exception{ new AlphabeticIndexTest().run(args); } + +// public void TestAAKeyword() { +// ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle.getBundleInstance( +// ICUResourceBundle.ICU_COLLATION_BASE_NAME, "zh"); +// showBundle(rb, 0); +// String[] keywords = Collator.getKeywords(); +// System.out.println(Arrays.asList(keywords)); +// String locale = "zh"; +// ULocale ulocale = new ULocale(locale); +// for (String keyword : keywords) { +// List values = Arrays.asList(Collator.getKeywordValuesForLocale(keyword, ulocale, false)); +// List allValues = Arrays.asList(Collator.getKeywordValues(keyword)); +// for (String value : allValues) { +// System.out.println(keyword + "=" + value); +// checkKeyword(locale, value, values.contains(value)); +// } +// } +// } +// +// private void checkKeyword(String locale, String collationValue, boolean shouldExist) { +// final ULocale base = new ULocale(locale); +// final ULocale desired = new ULocale(locale + "@collation=" + collationValue); +// Collator foo = Collator.getInstance(desired); +// ULocale actual = foo.getLocale(ULocale.ACTUAL_LOCALE); +// if (shouldExist) { +// assertEquals("actual should match desired", desired, actual); +// } else { +// assertEquals("actual should match base", base, actual); +// } +// int comp = foo.compare("a", "ā"); +// assertEquals("should fall back to default for zh", -1, comp); +// } + + /** + * @param rb + * @param i + */ + private static void showBundle(UResourceBundle rb, int i) { + for (String key : rb.keySet()) { + System.out.print("\n" + Utility.repeat(" ", i) + key); + UResourceBundle rb2 = rb.get(key); + showBundle(rb2, i+1); + } + } + + + public void TestA() { + String[][] tests = {{"zh_Hant", "渡辺", "12劃"}, + {"zh", "渡辺", "D"} + /*, "zh@collation=unihan", "ja@collation=unihan", "ko@collation=unihan"*/ + }; + for (String[] test : tests) { + AlphabeticIndex alphabeticIndex = new AlphabeticIndex(new ULocale(test[0])); + final String probe = test[1]; + final String expectedLabel = test[2]; + alphabeticIndex.addRecord(probe, 1); + List labels = alphabeticIndex.getBucketLabels(); + logln(labels.toString()); + Bucket bucket = find(alphabeticIndex, probe); + assertEquals(probe + " found in right bucket", expectedLabel, bucket.getLabel()); + } + } + + private Bucket find(AlphabeticIndex alphabeticIndex, final String probe) { + for (Bucket bucket : alphabeticIndex) { + for (Record record : bucket) { + if (record.getName().equals(probe)) { + return bucket; + } + } + } + return null; + } public void TestFirstCharacters() { @@ -317,6 +395,7 @@ public class AlphabeticIndexTest extends TestFmwk { } public void showIndex(AlphabeticIndex index, boolean showEmpty) { + logln("Actual"); StringBuilder UI = new StringBuilder(); for (AlphabeticIndex.Bucket bucket : index) { if (showEmpty || bucket.size() != 0) { @@ -335,6 +414,7 @@ public class AlphabeticIndexTest extends TestFmwk { * @param b */ private void showIndex(List myBucketLabels, ArrayList>> myBucketContents, boolean showEmpty) { + logln("Alternative"); StringBuilder UI = new StringBuilder(); for (int i = 0; i < myBucketLabels.size(); ++i) { @@ -468,7 +548,7 @@ public class AlphabeticIndexTest extends TestFmwk { // } public void TestClientSupport() { - for (String localeString : KEY_LOCALES) { // KEY_LOCALES, new String[] {"zh"} + for (String localeString : new String[] {"zh"}) { // KEY_LOCALES, new String[] {"zh"} ULocale ulocale = new ULocale(localeString); AlphabeticIndex alphabeticIndex = new AlphabeticIndex(ulocale).addLabels(ULocale.ENGLISH); RuleBasedCollator collator = alphabeticIndex.getCollator(); @@ -498,6 +578,9 @@ public class AlphabeticIndexTest extends TestFmwk { } for (String name : shortTest) { int bucketIndex = alphabeticIndex.getBucketIndex(name); + if (bucketIndex > myBucketContents.size()) { + alphabeticIndex.getBucketIndex(name); // call again for debugging + } Set> myBucket = myBucketContents.get(bucketIndex); RawCollationKey rawCollationKey = collator.getRawCollationKey(name, null); R4 row = Row.of(rawCollationKey, name, name.length(), testValue++); @@ -639,21 +722,25 @@ public class AlphabeticIndexTest extends TestFmwk { checkBuckets("zh_Hant", traditionalNames, ULocale.ENGLISH, "\u4e9f", "\u5357\u9580"); } - static final String[] SimpleTests = { "$", "\u00a3", "12", "2", + static final String[] SimpleTests = { + "斎藤", + "\u1f2d\u03c1\u03b1", + "$", "\u00a3", "12", "2", "Davis", "Davis", "Abbot", "\u1D05avis", "Zach", "\u1D05avis", "\u01b5", "\u0130stanbul", "Istanbul", "istanbul", "\u0131stanbul", "\u00deor", "\u00c5berg", "\u00d6stlund", - "\u1f2d\u03c1\u03b1", "\u1f08\u03b8\u03b7\u03bd\u1fb6", "\u0396\u03b5\u03cd\u03c2", "\u03a0\u03bf\u03c3\u03b5\u03b9\u03b4\u1f63\u03bd", "\u1f0d\u03b9\u03b4\u03b7\u03c2", "\u0394\u03b7\u03bc\u03ae\u03c4\u03b7\u03c1", "\u1f19\u03c3\u03c4\u03b9\u03ac", + "\u1f2d\u03c1\u03b1", "\u1f08\u03b8\u03b7\u03bd\u1fb6", + "\u0396\u03b5\u03cd\u03c2", "\u03a0\u03bf\u03c3\u03b5\u03b9\u03b4\u1f63\u03bd", "\u1f0d\u03b9\u03b4\u03b7\u03c2", "\u0394\u03b7\u03bc\u03ae\u03c4\u03b7\u03c1", "\u1f19\u03c3\u03c4\u03b9\u03ac", //"\u1f08\u03c0\u03cc\u03bb\u03bb\u03c9\u03bd", "\u1f0c\u03c1\u03c4\u03b5\u03bc\u03b9\u03c2", "\u1f19\u03c1\u03bc\u1f23\u03c2", "\u1f0c\u03c1\u03b7\u03c2", "\u1f08\u03c6\u03c1\u03bf\u03b4\u03af\u03c4\u03b7", "\u1f2d\u03c6\u03b1\u03b9\u03c3\u03c4\u03bf\u03c2", "\u0394\u03b9\u03cc\u03bd\u03c5\u03c3\u03bf\u03c2", "\u6589\u85e4", "\u4f50\u85e4", "\u9234\u6728", "\u9ad8\u6a4b", "\u7530\u4e2d", "\u6e21\u8fba", "\u4f0a\u85e4", "\u5c71\u672c", "\u4e2d\u6751", "\u5c0f\u6797", "\u658e\u85e4", "\u52a0\u85e4", //"\u5409\u7530", "\u5c71\u7530", "\u4f50\u3005\u6728", "\u5c71\u53e3", "\u677e\u672c", "\u4e95\u4e0a", "\u6728\u6751", "\u6797", "\u6e05\u6c34" }; static final String[] hackPinyin = { - "\u0101", "\u5416", "\u58ba", // + "a", "\u5416", "\u58ba", // "b", "\u516b", "\u62d4", "\u8500", // "c", "\u5693", "\u7938", "\u9e7e", // "d", "\u5491", "\u8fcf", "\u964a", // - "\u0113","\u59b8", "\u92e8", "\u834b", // + "e","\u59b8", "\u92e8", "\u834b", // "f", "\u53d1", "\u9197", "\u99a5", // "g", "\u7324", "\u91d3", "\u8142", // "h", "\u598e", "\u927f", "\u593b", // @@ -662,7 +749,7 @@ public class AlphabeticIndexTest extends TestFmwk { "l", "\u5783", "\u62c9", "\u9ba5", // "m", "\u5638", "\u9ebb", "\u65c0", // "n", "\u62ff", "\u80ad", "\u685b", // - "\u014D", "\u5662", "\u6bee", "\u8bb4", // + "o", "\u5662", "\u6bee", "\u8bb4", // "p", "\u5991", "\u8019", "\u8c31", // "q", "\u4e03", "\u6053", "\u7f56", // "r", "\u5465", "\u72aa", "\u6e03", // @@ -702,33 +789,71 @@ public class AlphabeticIndexTest extends TestFmwk { "\u5546", "\u725f", "\u4f58", "\u4f74", "\u4f2f", "\u8d4f", "\u5357\u5bab", "\u58a8", "\u54c8", "\u8c2f", "\u7b2a", "\u5e74", "\u7231", "\u9633", "\u4f5f" }; - static final String[] traditionalNames = { - "Abbot", "Morton", "Zachary", "Williams", "\u8d99", "\u9322", "\u5b6b", "\u674e", "\u5468", "\u5433", "\u912d", "\u738b", "\u99ae", "\u9673", "\u696e", "\u885b", "\u8523", "\u6c88", - "\u97d3", "\u694a", "\u6731", "\u79e6", "\u5c24", "\u8a31", "\u4f55", "\u5442", "\u65bd", "\u5f35", "\u5b54", "\u66f9", "\u56b4", "\u83ef", "\u91d1", "\u9b4f", "\u9676", "\u59dc", "\u621a", "\u8b1d", "\u9112", - "\u55bb", "\u67cf", "\u6c34", "\u7ac7", "\u7ae0", "\u96f2", "\u8607", "\u6f58", "\u845b", "\u595a", "\u7bc4", "\u5f6d", "\u90ce", "\u9b6f", "\u97cb", "\u660c", "\u99ac", "\u82d7", "\u9cf3", "\u82b1", "\u65b9", - "\u4fde", "\u4efb", "\u8881", "\u67f3", "\u9146", "\u9b91", "\u53f2", "\u5510", "\u8cbb", "\u5ec9", "\u5c91", "\u859b", "\u96f7", "\u8cc0", "\u502a", "\u6e6f", "\u6ed5", "\u6bb7", "\u7f85", "\u7562", "\u90dd", - "\u9114", "\u5b89", "\u5e38", "\u6a02", "\u65bc", "\u6642", "\u5085", "\u76ae", "\u535e", "\u9f4a", "\u5eb7", "\u4f0d", "\u9918", "\u5143", "\u535c", "\u9867", "\u5b5f", "\u5e73", "\u9ec3", "\u548c", "\u7a46", - "\u856d", "\u5c39", "\u59da", "\u90b5", "\u6e5b", "\u6c6a", "\u7941", "\u6bdb", "\u79b9", "\u72c4", "\u7c73", "\u8c9d", "\u660e", "\u81e7", "\u8a08", "\u4f0f", "\u6210", "\u6234", "\u8ac7", "\u5b8b", "\u8305", - "\u9f90", "\u718a", "\u7d00", "\u8212", "\u5c48", "\u9805", "\u795d", "\u8463", "\u6881", "\u675c", "\u962e", "\u85cd", "\u95a9", "\u5e2d", "\u5b63", "\u9ebb", "\u5f37", "\u8cc8", "\u8def", "\u5a41", "\u5371", - "\u6c5f", "\u7ae5", "\u984f", "\u90ed", "\u6885", "\u76db", "\u6797", "\u5201", "\u937e", "\u5f90", "\u4e18", "\u99f1", "\u9ad8", "\u590f", "\u8521", "\u7530", "\u6a0a", "\u80e1", "\u51cc", "\u970d", "\u865e", - "\u842c", "\u652f", "\u67ef", "\u661d", "\u7ba1", "\u76e7", "\u83ab", "\u7d93", "\u623f", "\u88d8", "\u7e46", "\u5e79", "\u89e3", "\u61c9", "\u5b97", "\u4e01", "\u5ba3", "\u8cc1", "\u9127", "\u9b31", "\u55ae", - "\u676d", "\u6d2a", "\u5305", "\u8af8", "\u5de6", "\u77f3", "\u5d14", "\u5409", "\u9215", "\u9f94", "\u7a0b", "\u5d47", "\u90a2", "\u6ed1", "\u88f4", "\u9678", "\u69ae", "\u7fc1", "\u8340", "\u7f8a", "\u65bc", - "\u60e0", "\u7504", "\u9eb4", "\u5bb6", "\u5c01", "\u82ae", "\u7fbf", "\u5132", "\u9773", "\u6c72", "\u90b4", "\u7cdc", "\u677e", "\u4e95", "\u6bb5", "\u5bcc", "\u5deb", "\u70cf", "\u7126", "\u5df4", "\u5f13", - "\u7267", "\u9697", "\u5c71", "\u8c37", "\u8eca", "\u4faf", "\u5b93", "\u84ec", "\u5168", "\u90d7", "\u73ed", "\u4ef0", "\u79cb", "\u4ef2", "\u4f0a", "\u5bae", "\u5be7", "\u4ec7", "\u6b12", "\u66b4", "\u7518", - "\u659c", "\u53b2", "\u620e", "\u7956", "\u6b66", "\u7b26", "\u5289", "\u666f", "\u8a79", "\u675f", "\u9f8d", "\u8449", "\u5e78", "\u53f8", "\u97f6", "\u90dc", "\u9ece", "\u858a", "\u8584", "\u5370", "\u5bbf", - "\u767d", "\u61f7", "\u84b2", "\u90b0", "\u5f9e", "\u9102", "\u7d22", "\u54b8", "\u7c4d", "\u8cf4", "\u5353", "\u85fa", "\u5c60", "\u8499", "\u6c60", "\u55ac", "\u9670", "\u9b31", "\u80e5", "\u80fd", "\u84bc", - "\u96d9", "\u805e", "\u8398", "\u9ee8", "\u7fdf", "\u8b5a", "\u8ca2", "\u52de", "\u9004", "\u59ec", "\u7533", "\u6276", "\u5835", "\u5189", "\u5bb0", "\u9148", "\u96cd", "\u90e4", "\u74a9", "\u6851", "\u6842", - "\u6fee", "\u725b", "\u58fd", "\u901a", "\u908a", "\u6248", "\u71d5", "\u5180", "\u90df", "\u6d66", "\u5c1a", "\u8fb2", "\u6eab", "\u5225", "\u838a", "\u664f", "\u67f4", "\u77bf", "\u95bb", "\u5145", "\u6155", - "\u9023", "\u8339", "\u7fd2", "\u5ba6", "\u827e", "\u9b5a", "\u5bb9", "\u5411", "\u53e4", "\u6613", "\u614e", "\u6208", "\u5ed6", "\u5ebe", "\u7d42", "\u66a8", "\u5c45", "\u8861", "\u6b65", "\u90fd", "\u803f", - "\u6eff", "\u5f18", "\u5321", "\u570b", "\u6587", "\u5bc7", "\u5ee3", "\u797f", "\u95d5", "\u6771", "\u6b50", "\u6bb3", "\u6c83", "\u5229", "\u851a", "\u8d8a", "\u5914", "\u9686", "\u5e2b", "\u978f", "\u5399", - "\u8076", "\u6641", "\u52fe", "\u6556", "\u878d", "\u51b7", "\u8a3e", "\u8f9b", "\u95de", "\u90a3", "\u7c21", "\u9952", "\u7a7a", "\u66fe", "\u6bcb", "\u6c99", "\u4e5c", "\u990a", "\u97a0", "\u9808", "\u8c50", - "\u5de2", "\u95dc", "\u84af", "\u76f8", "\u67e5", "\u5f8c", "\u834a", "\u7d05", "\u904a", "\u7afa", "\u6b0a", "\u9011", "\u84cb", "\u76ca", "\u6853", "\u516c", "\u4e07\u4fdf", "\u53f8\u99ac", "\u4e0a\u5b98", "\u6b50\u967d", - "\u590f\u4faf", "\u8af8\u845b", "\u805e\u4eba", "\u6771\u65b9", "\u8d6b\u9023", "\u7687\u752b", "\u5c09\u9072", "\u516c\u7f8a", "\u6fb9\u53f0", "\u516c\u51b6", "\u5b97\u653f", "\u6fee\u967d", "\u6df3\u4e8e", "\u55ae\u4e8e", "\u592a\u53d4", "\u7533\u5c60", "\u516c\u5b6b", "\u4ef2\u5b6b", - "\u8ed2\u8f45", "\u4ee4\u72d0", "\u937e\u96e2", "\u5b87\u6587", "\u9577\u5b6b", "\u6155\u5bb9", "\u9bae\u4e8e", "\u95ad\u4e18", "\u53f8\u5f92", "\u53f8\u7a7a", "\u4e0c\u5b98", "\u53f8\u5bc7", "\u4ec9", "\u7763", "\u5b50\u8eca", "\u9853\u5b6b", "\u7aef\u6728", "\u5deb\u99ac", - "\u516c\u897f", "\u6f06\u96d5", "\u6a02\u6b63", "\u58e4\u99df", "\u516c\u826f", "\u62d3\u62d4", "\u593e\u8c37", "\u5bb0\u7236", "\u7a40\u6881", "\u6649", "\u695a", "\u95bb", "\u6cd5", "\u6c5d", "\u9122", "\u5857", "\u6b3d", "\u6bb5\u5e72", "\u767e\u91cc", - "\u6771\u90ed", "\u5357\u9580", "\u547c\u5ef6", "\u6b78", "\u6d77", "\u7f8a\u820c", "\u5fae\u751f", "\u5cb3", "\u5e25", "\u7df1", "\u4ea2", "\u6cc1", "\u5f8c", "\u6709", "\u7434", "\u6881\u4e18", "\u5de6\u4e18", "\u6771\u9580", "\u897f\u9580", - "\u5546", "\u725f", "\u4f58", "\u4f74", "\u4f2f", "\u8cde", "\u5357\u5bae", "\u58a8", "\u54c8", "\u8b59", "\u7b2a", "\u5e74", "\u611b", "\u967d", "\u4f5f" - }; + static final String[] traditionalNames = { "丁", "Abbot", "Morton", "Zachary", "Williams", "\u8d99", "\u9322", "\u5b6b", + "\u674e", "\u5468", "\u5433", "\u912d", "\u738b", "\u99ae", "\u9673", "\u696e", "\u885b", "\u8523", + "\u6c88", "\u97d3", "\u694a", "\u6731", "\u79e6", "\u5c24", "\u8a31", "\u4f55", "\u5442", "\u65bd", + "\u5f35", "\u5b54", "\u66f9", "\u56b4", "\u83ef", "\u91d1", "\u9b4f", "\u9676", "\u59dc", "\u621a", + "\u8b1d", "\u9112", "\u55bb", "\u67cf", "\u6c34", "\u7ac7", "\u7ae0", "\u96f2", "\u8607", "\u6f58", + "\u845b", "\u595a", "\u7bc4", "\u5f6d", "\u90ce", "\u9b6f", "\u97cb", "\u660c", "\u99ac", "\u82d7", + "\u9cf3", "\u82b1", "\u65b9", "\u4fde", "\u4efb", "\u8881", "\u67f3", "\u9146", "\u9b91", "\u53f2", + "\u5510", "\u8cbb", "\u5ec9", "\u5c91", "\u859b", "\u96f7", "\u8cc0", "\u502a", "\u6e6f", "\u6ed5", + "\u6bb7", "\u7f85", "\u7562", "\u90dd", "\u9114", "\u5b89", "\u5e38", "\u6a02", "\u65bc", "\u6642", + "\u5085", "\u76ae", "\u535e", "\u9f4a", "\u5eb7", "\u4f0d", "\u9918", "\u5143", "\u535c", "\u9867", + "\u5b5f", "\u5e73", "\u9ec3", "\u548c", "\u7a46", "\u856d", "\u5c39", "\u59da", "\u90b5", "\u6e5b", + "\u6c6a", "\u7941", "\u6bdb", "\u79b9", "\u72c4", "\u7c73", "\u8c9d", "\u660e", "\u81e7", "\u8a08", + "\u4f0f", "\u6210", "\u6234", "\u8ac7", "\u5b8b", "\u8305", "\u9f90", "\u718a", "\u7d00", "\u8212", + "\u5c48", "\u9805", "\u795d", "\u8463", "\u6881", "\u675c", "\u962e", "\u85cd", "\u95a9", "\u5e2d", + "\u5b63", "\u9ebb", "\u5f37", "\u8cc8", "\u8def", "\u5a41", "\u5371", "\u6c5f", "\u7ae5", "\u984f", + "\u90ed", "\u6885", "\u76db", "\u6797", "\u5201", "\u937e", "\u5f90", "\u4e18", "\u99f1", "\u9ad8", + "\u590f", "\u8521", "\u7530", "\u6a0a", "\u80e1", "\u51cc", "\u970d", "\u865e", "\u842c", "\u652f", + "\u67ef", "\u661d", "\u7ba1", "\u76e7", "\u83ab", "\u7d93", "\u623f", "\u88d8", "\u7e46", "\u5e79", + "\u89e3", "\u61c9", "\u5b97", "\u4e01", "\u5ba3", "\u8cc1", "\u9127", "\u9b31", "\u55ae", "\u676d", + "\u6d2a", "\u5305", "\u8af8", "\u5de6", "\u77f3", "\u5d14", "\u5409", "\u9215", "\u9f94", "\u7a0b", + "\u5d47", "\u90a2", "\u6ed1", "\u88f4", "\u9678", "\u69ae", "\u7fc1", "\u8340", "\u7f8a", "\u65bc", + "\u60e0", "\u7504", "\u9eb4", "\u5bb6", "\u5c01", "\u82ae", "\u7fbf", "\u5132", "\u9773", "\u6c72", + "\u90b4", "\u7cdc", "\u677e", "\u4e95", "\u6bb5", "\u5bcc", "\u5deb", "\u70cf", "\u7126", "\u5df4", + "\u5f13", "\u7267", "\u9697", "\u5c71", "\u8c37", "\u8eca", "\u4faf", "\u5b93", "\u84ec", "\u5168", + "\u90d7", "\u73ed", "\u4ef0", "\u79cb", "\u4ef2", "\u4f0a", "\u5bae", "\u5be7", "\u4ec7", "\u6b12", + "\u66b4", "\u7518", "\u659c", "\u53b2", "\u620e", "\u7956", "\u6b66", "\u7b26", "\u5289", "\u666f", + "\u8a79", "\u675f", "\u9f8d", "\u8449", "\u5e78", "\u53f8", "\u97f6", "\u90dc", "\u9ece", "\u858a", + "\u8584", "\u5370", "\u5bbf", "\u767d", "\u61f7", "\u84b2", "\u90b0", "\u5f9e", "\u9102", "\u7d22", + "\u54b8", "\u7c4d", "\u8cf4", "\u5353", "\u85fa", "\u5c60", "\u8499", "\u6c60", "\u55ac", "\u9670", + "\u9b31", "\u80e5", "\u80fd", "\u84bc", "\u96d9", "\u805e", "\u8398", "\u9ee8", "\u7fdf", "\u8b5a", + "\u8ca2", "\u52de", "\u9004", "\u59ec", "\u7533", "\u6276", "\u5835", "\u5189", "\u5bb0", "\u9148", + "\u96cd", "\u90e4", "\u74a9", "\u6851", "\u6842", "\u6fee", "\u725b", "\u58fd", "\u901a", "\u908a", + "\u6248", "\u71d5", "\u5180", "\u90df", "\u6d66", "\u5c1a", "\u8fb2", "\u6eab", "\u5225", "\u838a", + "\u664f", "\u67f4", "\u77bf", "\u95bb", "\u5145", "\u6155", "\u9023", "\u8339", "\u7fd2", "\u5ba6", + "\u827e", "\u9b5a", "\u5bb9", "\u5411", "\u53e4", "\u6613", "\u614e", "\u6208", "\u5ed6", "\u5ebe", + "\u7d42", "\u66a8", "\u5c45", "\u8861", "\u6b65", "\u90fd", "\u803f", "\u6eff", "\u5f18", "\u5321", + "\u570b", "\u6587", "\u5bc7", "\u5ee3", "\u797f", "\u95d5", "\u6771", "\u6b50", "\u6bb3", "\u6c83", + "\u5229", "\u851a", "\u8d8a", "\u5914", "\u9686", "\u5e2b", "\u978f", "\u5399", "\u8076", "\u6641", + "\u52fe", "\u6556", "\u878d", "\u51b7", "\u8a3e", "\u8f9b", "\u95de", "\u90a3", "\u7c21", "\u9952", + "\u7a7a", "\u66fe", "\u6bcb", "\u6c99", "\u4e5c", "\u990a", "\u97a0", "\u9808", "\u8c50", "\u5de2", + "\u95dc", "\u84af", "\u76f8", "\u67e5", "\u5f8c", "\u834a", "\u7d05", "\u904a", "\u7afa", "\u6b0a", + "\u9011", "\u84cb", "\u76ca", "\u6853", "\u516c", "\u4e07\u4fdf", "\u53f8\u99ac", "\u4e0a\u5b98", + "\u6b50\u967d", "\u590f\u4faf", "\u8af8\u845b", "\u805e\u4eba", "\u6771\u65b9", "\u8d6b\u9023", + "\u7687\u752b", "\u5c09\u9072", "\u516c\u7f8a", "\u6fb9\u53f0", "\u516c\u51b6", "\u5b97\u653f", + "\u6fee\u967d", "\u6df3\u4e8e", "\u55ae\u4e8e", "\u592a\u53d4", "\u7533\u5c60", "\u516c\u5b6b", + "\u4ef2\u5b6b", "\u8ed2\u8f45", "\u4ee4\u72d0", "\u937e\u96e2", "\u5b87\u6587", "\u9577\u5b6b", + "\u6155\u5bb9", "\u9bae\u4e8e", "\u95ad\u4e18", "\u53f8\u5f92", "\u53f8\u7a7a", "\u4e0c\u5b98", + "\u53f8\u5bc7", "\u4ec9", "\u7763", "\u5b50\u8eca", "\u9853\u5b6b", "\u7aef\u6728", "\u5deb\u99ac", + "\u516c\u897f", "\u6f06\u96d5", "\u6a02\u6b63", "\u58e4\u99df", "\u516c\u826f", "\u62d3\u62d4", + "\u593e\u8c37", "\u5bb0\u7236", "\u7a40\u6881", "\u6649", "\u695a", "\u95bb", "\u6cd5", "\u6c5d", "\u9122", + "\u5857", "\u6b3d", "\u6bb5\u5e72", "\u767e\u91cc", "\u6771\u90ed", "\u5357\u9580", "\u547c\u5ef6", + "\u6b78", "\u6d77", "\u7f8a\u820c", "\u5fae\u751f", "\u5cb3", "\u5e25", "\u7df1", "\u4ea2", "\u6cc1", + "\u5f8c", "\u6709", "\u7434", "\u6881\u4e18", "\u5de6\u4e18", "\u6771\u9580", "\u897f\u9580", "\u5546", + "\u725f", "\u4f58", "\u4f74", "\u4f2f", "\u8cde", "\u5357\u5bae", "\u58a8", "\u54c8", "\u8b59", "\u7b2a", + "\u5e74", "\u611b", "\u967d", "\u4f5f", "\u3401", "\u3422", "\u3426", "\u3493", "\u34A5", "\u34A7", + "\u34AA", "\u3536", "\u4A3B", "\u4E00", "\u4E01", "\u4E07", "\u4E0D", "\u4E17", "\u4E23", "\u4E26", + "\u4E34", "\u4E82", "\u4EB8", "\u4EB9", "\u511F", "\u512D", "\u513D", "\u513E", "\u53B5", "\u56D4", + "\u56D6", "\u7065", "\u7069", "\u706A", "\u7E9E", "\u9750", "\u9F49", "\u9F7E", "\u9F98", "\uD840\uDC35", + "\uD840\uDC3D", "\uD840\uDC3E", "\uD840\uDC41", "\uD840\uDC46", "\uD840\uDC4C", "\uD840\uDC4E", + "\uD840\uDC53", "\uD840\uDC55", "\uD840\uDC56", "\uD840\uDC5F", "\uD840\uDC60", "\uD840\uDC7A", + "\uD840\uDC7B", "\uD840\uDCC8", "\uD840\uDD9E", "\uD840\uDD9F", "\uD840\uDDA0", "\uD840\uDDA1", + "\uD841\uDD3B", "\uD842\uDCCA", "\uD842\uDCCB", "\uD842\uDD6C", "\uD842\uDE0B", "\uD842\uDE0C", + "\uD842\uDED1", "\uD844\uDD9F", "\uD845\uDD19", "\uD845\uDD1A", "\uD846\uDD3B", "\uD84C\uDF5C", + "\uD85A\uDDC4", "\uD85A\uDDC5", "\uD85C\uDD98", "\uD85E\uDCB1", "\uD861\uDC04", "\uD864\uDDD3", + "\uD865\uDE63", "\uD869\uDCCA", "\uD86B\uDE9A", }; }