]> granicus.if.org Git - icu/commitdiff
ICU-20467 replace the LocaleMatcher implementation, load data from new bundle
authorMarkus Scherer <markus.icu@gmail.com>
Sat, 16 Mar 2019 00:13:11 +0000 (17:13 -0700)
committerMarkus Scherer <markus.icu@gmail.com>
Tue, 26 Mar 2019 20:27:42 +0000 (13:27 -0700)
- remove the old LocaleMatcher implementation code
- move the XLocaleMatcher code into LocaleMatcher, same for test
- remove unused internal methods
- stop comparing old vs. new performance
- generate langInfo.txt resource bundle file with precomputed likely-subtags and matcher data
- make genrb handle multi-line binary values
- load likely-subtags & distance data from new langInfo.res bundle
- test that built data == loaded data
- move data builders to tools, no more runtime dependency on builder code

23 files changed:
docs/userguide/icu_data/buildtool.md
icu4c/source/data/misc/langInfo.txt [new file with mode: 0644]
icu4c/source/tools/genrb/genrb.cpp
icu4c/source/tools/genrb/parse.cpp
icu4j/build.xml
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java [deleted file]
icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java
icu4j/main/shared/build/common-targets.xml
icu4j/main/shared/data/icudata.jar
icu4j/main/tests/core/.classpath
icu4j/main/tests/core/.project
icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/SerializableChecker.java [moved from icu4j/tools/misc/src/com/ibm/icu/dev/tool/serializable/SerializableChecker.java with 90% similarity]
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleDistanceTest.java [moved from icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java with 92% similarity]
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherShim.java [deleted file]
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java [deleted file]
icu4j/tools/misc/.classpath
icu4j/tools/misc/.project
icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LikelySubtagsBuilder.java [moved from icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LikelySubtagsBuilder.java with 97% similarity]
icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LocaleDistanceBuilder.java [moved from icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistanceBuilder.java with 87% similarity]

index 839db06c4faa0ad394549b6ee4fa35365e4d51d8..f691c9a986f3df62f012cbff249d7aecab07bbf2 100644 (file)
@@ -191,7 +191,7 @@ summarizes the ICU data files and their corresponding features and categories:
 | Confusables | `"confusables"` | unidata/confusables\*.txt | 45 KiB |
 | Currencies | `"misc"` <br/> `"curr_supplemental"` <br/> `"curr_tree"` | misc/currencyNumericCodes.txt <br/> curr/supplementalData.txt <br/> curr/\*.txt | 3.1 KiB <br/> 27 KiB <br/> **2.5 MiB** |
 | Language Display <br/> Names | `"lang_tree"` | lang/\*.txt | **2.1 MiB** |
-| Language Tags | `"misc"` | misc/keyTypeData.txt <br/> misc/likelySubtags.txt <br/> misc/metadata.txt | 6.8 KiB <br/> 53 KiB <br/> 33 KiB |
+| Language Tags | `"misc"` | misc/keyTypeData.txt <br/> misc/langInfo.txt <br/> misc/likelySubtags.txt <br/> misc/metadata.txt | 6.8 KiB <br/> 37 KiB <br/> 53 KiB <br/> 33 KiB |
 | Normalization | `"normalization"` | in/\*.nrm except in/nfc.nrm | 160 KiB |
 | Plural Rules | `"misc"` | misc/pluralRanges.txt <br/> misc/plurals.txt | 3.3 KiB <br/> 33 KiB |
 | Region Display <br/> Names | `"region_tree"` | region/\*.txt | **1.1 MiB** |
diff --git a/icu4c/source/data/misc/langInfo.txt b/icu4c/source/data/misc/langInfo.txt
new file mode 100644 (file)
index 0000000..ca804f0
--- /dev/null
@@ -0,0 +1,2614 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+// Generated by ICU4J LocaleDistanceBuilder.
+langInfo:table(nofallback){
+    likely{
+        languageAliases{  // 164
+            "aam","aas",
+            "adp","dz",
+            "aju","jrb",
+            "alb","sq",
+            "als","sq",
+            "arb","ar",
+            "arm","hy",
+            "aue","ktz",
+            "ayr","ay",
+            "ayx","nun",
+            "azj","az",
+            "baq","eu",
+            "bcc","bal",
+            "bcl","bik",
+            "bgm","bcg",
+            "bh","bho",
+            "bjd","drl",
+            "bur","my",
+            "bxk","luy",
+            "bxr","bua",
+            "ccq","rki",
+            "chi","zh",
+            "cjr","mom",
+            "cka","cmr",
+            "cld","syr",
+            "cmk","xch",
+            "cmn","zh",
+            "coy","pij",
+            "cqu","quh",
+            "cwd","cr",
+            "cze","cs",
+            "dgo","doi",
+            "dhd","mwr",
+            "dik","din",
+            "diq","zza",
+            "drh","mn",
+            "dut","nl",
+            "ekk","et",
+            "emk","man",
+            "esk","ik",
+            "fat","ak",
+            "fre","fr",
+            "fuc","ff",
+            "gav","dev",
+            "gaz","om",
+            "gbo","grb",
+            "geo","ka",
+            "ger","de",
+            "gfx","vaj",
+            "ggn","gvr",
+            "gno","gon",
+            "gre","el",
+            "gti","nyc",
+            "gug","gn",
+            "guv","duz",
+            "gya","gba",
+            "hdn","hai",
+            "hea","hmn",
+            "him","srx",
+            "hrr","jal",
+            "ibi","opa",
+            "ice","is",
+            "ike","iu",
+            "ilw","gal",
+            "in","id",
+            "iw","he",
+            "jeg","oyb",
+            "ji","yi",
+            "jw","jv",
+            "kgc","tdf",
+            "kgh","kml",
+            "khk","mn",
+            "kmr","ku",
+            "knc","kr",
+            "kng","kg",
+            "knn","kok",
+            "koj","kwv",
+            "kpv","kv",
+            "krm","bmf",
+            "ktr","dtp",
+            "kvs","gdj",
+            "kwq","yam",
+            "kxe","tvd",
+            "kzj","dtp",
+            "kzt","dtp",
+            "lbk","bnc",
+            "lii","raq",
+            "lmm","rmx",
+            "lvs","lv",
+            "mac","mk",
+            "mao","mi",
+            "may","ms",
+            "meg","cir",
+            "mhr","chm",
+            "mnk","man",
+            "mo","ro",
+            "mst","mry",
+            "mup","raj",
+            "mwj","vaj",
+            "myt","mry",
+            "nad","xny",
+            "ncp","kdz",
+            "nnx","ngv",
+            "no","nb",
+            "npi","ne",
+            "nts","pij",
+            "ojg","oj",
+            "ory","or",
+            "oun","vaj",
+            "pbu","ps",
+            "pcr","adx",
+            "per","fa",
+            "pes","fa",
+            "plt","mg",
+            "pmc","huw",
+            "pmu","phr",
+            "pnb","lah",
+            "ppa","bfy",
+            "ppr","lcq",
+            "pry","prt",
+            "puz","pub",
+            "quz","qu",
+            "rmy","rom",
+            "rum","ro",
+            "sca","hle",
+            "scc","sr",
+            "scr","hr",
+            "skk","oyb",
+            "slo","sk",
+            "spy","kln",
+            "src","sc",
+            "swh","sw",
+            "tdu","dtp",
+            "thc","tpo",
+            "thx","oyb",
+            "tib","bo",
+            "tie","ras",
+            "tkk","twm",
+            "tl","fil",
+            "tlw","weo",
+            "tmp","tyj",
+            "tne","kak",
+            "tsf","taj",
+            "ttq","tmh",
+            "tw","ak",
+            "umu","del",
+            "uok","ema",
+            "uzn","uz",
+            "wel","cy",
+            "xba","cax",
+            "xia","acn",
+            "xkh","waw",
+            "xpe","kpe",
+            "xsj","suj",
+            "xsl","den",
+            "ybd","rki",
+            "ydd","yi",
+            "yma","lrr",
+            "ymt","mtm",
+            "yos","zom",
+            "yuu","yug",
+            "zai","zap",
+            "zsm","ms",
+            "zyb","za",
+        }  // languageAliases
+        regionAliases{  // 38
+            "062","034",
+            "172","RU",
+            "200","CZ",
+            "230","ET",
+            "280","DE",
+            "532","CW",
+            "582","FM",
+            "736","SD",
+            "830","JE",
+            "886","YE",
+            "890","RS",
+            "AN","CW",
+            "BU","MM",
+            "CS","RS",
+            "CT","KI",
+            "DD","DE",
+            "DY","BJ",
+            "FQ","AQ",
+            "FX","FR",
+            "HV","BF",
+            "JT","UM",
+            "MI","UM",
+            "NH","VU",
+            "NQ","AQ",
+            "NT","SA",
+            "PC","FM",
+            "PU","UM",
+            "PZ","PA",
+            "QU","EU",
+            "RH","ZW",
+            "SU","RU",
+            "TP","TL",
+            "UK","GB",
+            "VD","VN",
+            "WK","UM",
+            "YD","YE",
+            "YU","RS",
+            "ZR","CD",
+        }  // regionAliases
+        trie:bin{  // BytesTrie: 9782 bytes
+001a6dcc0b74c4e677c26077a2b378a4
+4e79a4bb7a0e6d7f7a5b7a30e1ad8ce8
+2ef5af2f10e1a537022a3c42cae54811
+616e01e2a3fcf4a3f70b4d24540e54ca
+8c552a5610cea3fa10d3a3f94dca9550
+2a5310d2a3f502c1a3f4c6a3f2c8a3f3
+47d4a2473248d81dfb4910c4a3ef01c2
+a3f8c6a3ed6d326e36733a7910e2ad8c
+10e9af2d10e5af2e10ed22022aa90643
+b4fd4910c4a908671767326836693a6c
+10edaf2c10e8adb910f8ada910e1af2b
+612e64326510e1af2a10e7af2810eaaf
+2900126d46741b743475387742e1addc
+efadf310edadf801f5adf9f6adfa10e1
+adfb6d3c6e406f50725a7301e7ad79eb
+adf710efadef02e3adf0e9adf1f5adf2
+01e2adf4f3adf510f3adf66726673668
+3a693e6a4e6c10f3adee10e9ade710e7
+ade802e2ade9f5adeaf6adeb01e1adec
+e9aded6138625463646501eca51ef2ad
+e604e5adddeaaddeecaddfeeade0f2ad
+e102f0ade2f1ade3f2ade410e9ade50c
+6e3d7221722e7332774ee8adff10e2af
+0504e1adb1e9af06eca52dedaf07f2af
+0810e5af096e346f3e7001e5a78af2ad
+ae01e1ada7f2af0201e7af03eeaf0465
+1d653a6c3e6d02e6af01eead9df2ada0
+10f3adfe02e1af00e3ad99e4ad9a612e
+62326310f2ad6910f6adfc10e9adfd0d
+6d49733073307534e9a731efaf1c10f3
+af2104e1af22e532eaaf25f4af26f7af
+27012a2e4812616ef3af24012aaf2343
+10ceaf246d3a6f3e7202e2af1ee5af1f
+ecaf2010ecaf1b10eeaf1d6520653e67
+426b4c6c02e5af18e7af19ecaf1a10f2
+af1401f2af15f7af1610efaf17612e62
+606410e4a73107f40cf4af0df6af0ef9
+af0ffaaf10eda7b2efaf0af0af0bf3af
+0c02e1af11e2af12f9af1374a25275a4
+d0760a6f1fe509e5adcfe9ad57efadd7
+6f3472387501eeaddaf4addb10f4add8
+10efadd9613c654c69566c606d01e6ad
+d5f7add602e7adcde9adbeeeadce01e3
+add0f0add101e3add2f6add310f3add4
+00267576e91ff20ef2abbdf3abc1f4ab
+c7f767f9abd7e9aba2ebaba9ecabadee
+abb3efabb57a247a36e1ab82e5ab93e7
+2ee8ab9e10edabda012a2e41127261e2
+ab9a012aab995010cbab9a753e765477
+6478ccca7901e1abd8f6abd903e8abce
+ecabcfedabd0f1abd102e4a7b5ecabd2
+f5abd301e8abd4f1abd56b7d7049705e
+716e727273827406f10cf1abb1f2abcb
+f3abccf4abcde4abc8e5abc9eaabca02
+e9abb9edabbafaabbb10efabbc02f5ab
+bef6abbff7abc004e4abc2e6abc3e7ab
+c4eaabc5f7abc66b426c526d626e6c6f
+02e6abb6e7abb7f1abb802ecabaaf2ab
+abf4abac02e6abaef8abaff9abb001e8
+abb1f9abb210e8abb46546655e667467
+7868886906eb0cebaba5edaba6efaba7
+f6aba8e2a3b0e6aba3e7aba403e4ab94
+edab95efab96f4ab9710e9ab9802e3ab
+9befab9cf5ab9d02ecab9ff1aba0f2ab
+a16144625a63866403e4ab90e7ab91e8
+ab92f5a54303eaab83ecab84eeab85f1
+ab8606e70ce7ab8aefab8bf7ab8cfaab
+8de3ab87e4ab88e6ab8901e9ab8ef9ab
+8f0f735fe72ce742ebabe1f2ad3ffa01
+2a8641127261e2adcb012a2e43127972
+ecabde022aabdd4b2a4d10ceabdf10da
+abde7360746476687a10ee012a2e4112
+7261e2adcb022aabec412a4310ceadcc
+10c6adcb10e1adc710f2adc801e8adc9
+ecadca6d326d3e6e426f707202e9adc4
+f4adc5f7adc610e2abe301f226f8adc3
+012a2e44126576e1adc2012aadc14e10
+d0adc210eba55a62326436673a6c10e9
+abe210f5abdb10edabdc10e1abe070c3
+9570a4f971a6d772a6e47300287884ec
+39f229f509f5ab71f6ab77f7ab78f22c
+f3ab6af4ab6e22032aab244da4bf522a
+5410d2ab2801cfab27d5ab25ecab4ced
+ab51eeab58efab5ef16de429e438e5ab
+34e7ab39e9ab42ebab48032aab2e4432
+4b3a5312696ee4ab31126576e1ab2f12
+686feaab307834793e7a48e1ab18e3ab
+2301eeab7ef7ab7f01ecab80f2a50a10
+ecab816cad724f725873747484758e77
+05e809e8ab78f0ab7cf6ab7de2ab79e3
+ab7ae7ab7b04e2ab67e3ab23eeab68f2
+ab69f8a5ef02e4ab6be7ab6cf9ab6d01
+ebab6ff1ab7004e1ab72e5ab73ebab74
+f2ab75f3ab766c4a6d666e8c6fa24370
+03e4ab64ecab65f3ab66f9a77204e4ab
+4de9ab4eecab4fefab48f9ab5005f009
+f0ab55f1ab56f3ab57e1ab52eaab53ee
+ab5404e3ab59ebab5af0ab5bf8ab5cf9
+ab5d04e7ab5febab60f1ab61f5ab62f9
+ab63673e67426858696e6a846b02e3ab
+49f2ab4af3ab4b03e1ab3af3ab3bf7ab
+3cfaab3d03e9ab3eebab3feeab40f5ab
+4103e4ab43e7ab44ecab45edab4610f2
+ab47614a6276638664a26b6503e6ab35
+e8ab36e9ab37f3ab3806f30cf3ab1cf4
+ab1df6ab1efaab1fe6ab19e8ab1af1ab
+1b02e1ab20e5ab21f0ab2206ee0ceeab
+2befab2cf2a5fdf3ab2de32cebab29ec
+ab2a22055211522e53325410d2ab2810
+cfab2710d5ab252aab24312a4d10c5ab
+261137b2ab2501e3ab32e8ab3300166e
+63752bec09eca9e4f3a9d5f4a9f0754c
+7750e1012a2e41127261e2a9cd012aa9
+cc5010cba9cd10f5a9f210e1a9f37217
+722e733e7410f0a9f102e1a9ece4a9ed
+e7a9ee10f3a9ef6e346f4a7001e1a386
+efa9eb03e2a7c4e7a9e7eea9e8f4a9e9
+10eea9ea66306b176b2e6c386d10f3a9
+e601e1a9e2efa9e301e1a9e5f4a92766
+3468386901eca9e0f0a9e110eca9dd01
+eca9deeea9df614e627e638864926504
+e4a9daefa9dbf2a56ef3a56ef8a9dc04
+e7a9ceec32eda9d1f0a9d2f5a9d3012a
+a9cf5012686cf0a9d001e9a9d4f5a9d5
+01e4a9d6eda9d701e3a9d8f4a9d90175
+26f5a9f402e3a9f5e7a9f6faa9f40014
+6f457917ef09efa944f5ab11f7ab1479
+2cedab04eeab0910f5ab176f3c724c74
+5075547701ebab15efab1602e2ab0ce6
+ab0defab0e10efab0f10edab1002e5ab
+12e7ab13eda9446934693c6a566b5a6d
+5e6e01e1ab0ae7ab0b01e1a9ffe62201
+2aab004e10ccab0110f3ab0210f4ab03
+03e6ab05efab06f4ab07f5ab08613663
+46654a675a6810e7a9fe02e9a9f7eaa9
+59efa9f810e6a9f902eaa9faeca9fbf3
+a9fc10eea9fd6da2576ea6446f0b741d
+ed09eda59ff2a9c4f3a9c7742a7a34e3
+a9bc01e1a9c9eba9ca10eda9cb701770
+2e72327310e1a9c810eda9c302efa9c5
+f5a9c6f9a9c4673a6b3e6e02e7a9c0ee
+a9c1f3a9c210e3a9bd01f2a9bef6a9bf
+0022749fe926ef0fefa944f2a94ff3a4
+92f4a953f9a3cde9a903eba7fdeca939
+ee012abaab4d126f6ee7a53e783d7834
+793e7a64e7a927e8a92d01e3a962eda9
+6305f709f7a967f8a968faa969eba964
+eda965f6a96605f009f0a96df7a96efa
+a96feba96aeda96beea96c7444755a76
+707703eba95ff2a535f6a960f7a96103
+e3a954e6a955e9a956f2a95703e1a958
+f0a959f2a95af3a95b02e1a95ceea95d
+f9a95e69856e446e426f5e707a719072
+02e4a950eaa951efa95204e1a940e6a9
+41e9a942ebb603f7a94304e1a945e5a9
+46e8a947f3a948f8a94903f0a94af3a9
+4bf4a94cf8a94d10eca94e693e6b686c
+7e6d02efa93df5a93ef8a93f03e6a930
+eea931f326f7a934012aa9324d126564
+e6a93303e9a935eca936f0a937f7a938
+02e5a93af0a93bf3a93c6549653e666a
+67866802e9a92eeca92ff2a3fe06ee0c
+eea91ef2a91ff4a920f5a921e4a91be5
+a91ceba91d04e1a922e5a923eea924ef
+a925f1a92604e8a928eca929efa92af0
+a92bf9a92c615e62a25663a2636406e8
+0ce8a917eaa918f2a919f8a91ae1a914
+e5a915e6a9160bee23f718f7a905f926
+faa90922022aa906432a4910c4a90810
+c3a907eeb414efa903f3a904e709e7a9
+00e9a901eba902e3a7fde4a7fee6a7ff
+04e8a90aefa90bf1a90cf5a90df7a90e
+04e9a90ff0a910f1a911f2a912f5a913
+0021745fe41dee0feea99aefa9a0f2a9
+a7f6a9b4f9a9b8e4a97fe5a982e7a987
+eca54a791579307a3ae1a970e2a97801
+eda9b9eea9ba10e9a9bb74387542775e
+7801f1a9b6f2a9b701eda9adf2a9ae04
+e9a9aff0a9b0f3a9b1f6a9b2f8a9b310
+e2a9b56a5d6f326f4870647168726c73
+03eba9a9eea9aaefa9abf3a9ac04e4a9
+a1e5a9a2eea9a3f0a9a4f5a9a510e9a9
+8210efa9a610e2a9a86a4a6b4e6d586e
+04e6a99be8a99ceba99deda99ef0a99f
+10efa99501e7a996efa99701e7a998fa
+a9996637665a675e686e6906ee0ceea9
+91f5a992f9a993faa994e6a98ee9a98f
+eaa99010f2a98602e1a988e2a989eca9
+8a02e2a98be5a98cf7a98d613e636a64
+906502e2a983f7a984f8a98506ee0cee
+a974f0a975f1a976f3a977e3a971e6a9
+72eba97305e809e8a97cefa97df5a97e
+e1a979e5a97ae6a97b01e3a980f3a981
+66c73c69c48269a4166aa4c46ba60e6c
+001b744ae717ef0cefa7ebf4a7f3f5a7
+f5f6a7f9e7a7d5e9a7d7eea7e8771577
+307a34e1a7c0e2a7c710eca7fa01e8a7
+fbfaa7fc742e75327610f3a7f910e7a7
+f403e1a7f6efa7f7f9a3d4faa7f86a4a
+6d316d326e426f4c7210e3a7f202eea7
+e5efa7e6f0a7e701f3a7e9f5a7ea05f2
+09f2a7eff3a7f0faa7f1eaa7eceba7ed
+eca7ee6a346b386c01e5a7e3eea7e410
+f0a7e001e9a7e1f4a7e26448646c6570
+679c6906e70ce7a7dce8a7ddeaa7def3
+a7dfe1a7d8e4a7d9e6012aa7da4c1269
+6de2a7db10e2a7cd06f00cf0a7d1f1a7
+d2f5a7d3faa7d4e4a7cee5a7cfeda7d0
+10e7a7d66134625a6301eda7cbf0a7cc
+05e809e8a7c4eaa7c5f3a7c6e2a7c1e4
+a7c2e7a7c302e5a7c8f5a7c9f7a7ca00
+197739eb17f30cf3a70bf4a723f5a717
+f7a724eba566eea71eefa720e409e4a7
+0de7a712e9a715772a7a34e1a70401ed
+a725f3a72601e8a727e9a7286a366d17
+6d326e366f3a7210e9a72210efa71d10
+e8a71f10f5a7216a2e6b326c10efa71c
+10eaa71604e5a717eba718f4a719f7a7
+1af8a71b641a643466446701e2a713e5
+a71402e4a70ee9a70ff5a71010e5a711
+613a62446302e1a70ae5a70be8a70c01
+eea705f2a70602e1a707e2a708f9a709
+0b721ae909e9a731f6a737f7a738722a
+752ee1a72910e1a73510f4a736671767
+34693e6d01e3a733eca73401eba72fef
+a73010e2a732612e62386510eea72e01
+e2a72aeda72b01efa72cf5a72d002875
+c0e4eb71f23cf628f6a78ef7a7aff902
+2a32414c4c126174eea7bc022aa7ba43
+2a5410d2a7bc10cea7bb127261e2a7bb
+f2a781f3a79af5012aa45941127261e2
+a77ceb38eca771eda776eea780efa785
+012a2e41127261e2a76c042aa76a4132
+4336493a4d10cea76e10c6a76b10cea7
+6c10d2a76d7a187a38e1a5aee7a755e9
+a761eaa76502eaa543f2a7bff4a54375
+3c766e777e788e7901e5a7bdf8a7be07
+ed0ceda7a8eea7a9f0a7aaf3a7abe2a7
+a4e4a7a5e5a7a6eaa7a702e7a7acf2a7
+adf8a7ae02eaa7b0efa7b1f1a7b206ed
+0ceda7b6f0a7b7f7a7b8faa7b9e1a7b3
+e3a7b4e5a7b56bc0cd7063704a717072
+8673a2457403e2a7a1eda7a2efa7a3f2
+a54305f209f2a78df6a78ef8a78fe5a7
+8ae6a78befa78c03e2a790e6a791f3a7
+92f9a79305ec09eca797f3a798f5a799
+e3a794e9a795eaa79605e809e8a79eea
+a79ff2a7a0e2a79be4a79ce6a79d6b50
+6c5a6d706ea24f6f04e9a786eba783ec
+a787f3a788faa78901e3a76feaa77003
+eea772f1a773f4a774f8a77506f21ef2
+32f3a77df5a77ef7a77f012a2e411272
+61e2a77c012aa77a4c10c2a77be2a777
+e8a778efa77904e3a781e6a782e7a755
+eea783f0a784665266486758686869a0
+6a03e4a766e7a767f3a768f9a76902ef
+a752f2a753f9a75402e5a756e6a757f0
+a75808f10ff1a75cf3a75df4a75ef7a7
+5ffaa760e1a759e2a75aebac79eea75b
+02eaa762f5a763f7a76461446276639c
+64a2486502e1a74feea750faa75107e9
+0ce9a73deaa73eeda73fefa740e1a739
+e2a73ae3a73be4a73c05f109f1a744f8
+a745f9a746e4a741eda742f0a74303e7
+a747eba748eca749f4a74a03e5a74be8
+a74ceca74df4a74e66a2b567a46a6800
+137539f20ff2a5fdf4a700f5a701f9a3
+41faa7037536e13ae5a5e7e9a5e9efa5
+f910e9a70222032aa5dd372e43345310
+c4a5de1133b6a5de10cda5df6c386c3c
+6d466e506f6c7301e2a5feeea5ff01e1
+a5f0f5a5f101e4a5f2f4a5f304e4a5f4
+e5a5f5eaa5f6eea5f7efa5f802e3a5fa
+eaa5fbf4a5fc615862746478687c6905
+e809e8a5edeca5eeeda5efe1a5eae6a5
+ebe7a5ec04e7a5e0eba5e1eda5e2f7a5
+e3faa5e410e2a5e510f9a5e610f9a5e8
+00117553e90fe9a578eaa57cefa57ff2
+a585f9a59475427694e1a56ee6012aa5
+744112646ceda57509e80fe8a58ef1a5
+8ff2a590f6a591f9a592e2a58ae332e4
+a58be5a58ce6a58d012aa5744112646c
+eda57510f2a5936d2f6d4e6f52706271
+667204e3a586e5a585f0a587f2a588f3
+a58910f0a57e02e4a580eea581f2a582
+10e5a58310f3a5846132665669606c10
+f2a57d05e908e9a572eea573f467e1a5
+6fe2a570e7a57101e9a576eda57702e1
+a579eca57af4a57b001a6e9f791dec0c
+eca5baeea5bef5a5cef6a5d6792ce1a5
+95e4a5a701e1a5a0e9a5dc733a733e75
+4276747702e3a5d9e9a5daf4a5db10f7
+a5cd07f20cf2a5d2f7a5d3f8a5d4faa5
+d5e2a5cfe3a5d0e4a5d1e7a5be02e6a5
+d7f2a5b1f3a5d86e5a6f6a7204e2a5a3
+e332e5a559f4a5cbf7a5cc012aa5c94c
+12696ee2a5ca02e4a5bfe7a5c0efa5c1
+07ee0ceea5c1f2a5c6f3a5c7f4a5c8e4
+a5c2e6a5c3e9a5c4eda5c567396a236a
+386b486c526d01eda5bcf6a5bd02eba5
+b5eea5b6f5a5b701eea5b8f0a5b910eb
+a5bb673468386901eca5b3eda5b410ee
+a5b110f3a5b26428642e653e6610eba5
+b002e5a5a8eea5a9f2a5aa05ef09efa5
+aef2a52bfaa5afe2a5abeaa5aceca5ad
+612e626c6310f2a5a609ed0feda59bee
+a59cf7a59df9a59efaa59fe1a596e6a5
+97e7a598e8a599eaa59a05ef09efa5a3
+f9a5a4faa5a5e1a5a0e6a5a1eda5a263
+c2e863a29d64a4f2650011772dee19ee
+38efa562f3a564f4a557f5a364012aa5
+5e53126861f7a55f77307834e5a552ec
+a55910efa56c10f4a56d6d466d486e7a
+728473887403f2a568f4a569f5a56af8
+a56b02e1a55ae9a55beb012a2e4e126b
+6fefa55d012aa55c4710cea55d01eea5
+60f1a56110e9a56302e7a565eba566f5
+a567623e664267466b02e1a556eba557
+f9a55810f5a55110e9a55301eca554f9
+a555001a725ce821f316f3a517f52cf6
+a51df9a51e012aa51b47126c61e7a51c
+e8a3e9efa50df2a510771277307a34e1
+a3e0e5a3e510e4a51010e5a517722e73
+547410e4a51a05ec09eca514eda515f3
+a516e8a511eaa512eba51301e2a518f7
+a51968c0ce6c306c326d3c6f6c7010f3
+a50f01e1a509e4a50a02e5a50be7a50c
+ee022aa24a42a29b4811616e01e2a3fc
+f4a3f710f0a50e68426aa28d6b03e2a5
+05eca506efa507f9a50805ef09efa3ff
+f0a500f2a501e92ceba3fdeda3fe022a
+3c42a25f4811616e01e2a3fcf4a3f70b
+4d335417542e55385610c4a3fa01c8a3
+f6d7a3f701cba3f8d3a3f94d2e503853
+10d2a3f501cfa3f0d9a3f102c6a3f2c8
+a3f3daa3f4470e473048b4684910c4a3
+ef10c6a3ed2aa3ea412a4210cea3ec10
+d5a3eb126f70efa3fb02e1a502eda503
+f6a5046511652e66326710e7a3e810e2
+a3e610e1a3e7613462386301e8a3e3f0
+a3e410eea3e110eaa3e20016726f791c
+e508e5a52bf6a54bfa45792a7a34e1a5
+1f01efa54ef5a54f10e7a550751d752e
+764a7710f7a54d04e1a546e3a547e4a5
+48e7a549f4a54a10e1a54c7240737474
+03eda542f0a543f3a544f9a54502e82c
+e9a53ff3a540012a2e4d126f6ee7a53e
+012aa53d4310cea53e10e2a54167446a
+1a6a406e446f03e2a53ae9a532f0a53b
+f7a53c10e5a53810eaa5396734686069
+01e1a536f1a53706ec0ceca531efa532
+f2a533faa534e1a52ee8a52fe9a53010
+e4a535613c6268637264766501e4a52c
+eea52d06e80ce8a523eba524f2a525f6
+a526e4a520e6a521e7a52201e4a527f1
+a52810e3a52910eea52a2aa4fa61be83
+62002373a5e51ded0feda3a7eea3acef
+a3b0f2a3b9f3a3bee5a37ae7a387e8a3
+8be9a39277417732783c794c7a72e1a3
+6001e4a3d1f2a3d202e8a3d3eba3d4f2
+a3c605f309f3a3d8f6a3d9f8a3dae5a3
+d5eea3d6f2a3d704e1a3dbe5a3dce6a3
+dde8a3def7a3df7332744875587610e2
+a3d003eaa3bff1a3c0f3a3c1f4a3c202
+efa3c3f4a3c4f6a3c509ed0feda3cbef
+a3ccf2a3cdf3a3cef5a3cfe1a3c6e3a3
+c7e4a3c8e7a3c9eba3ca6a7d6e3b6e48
+6f587068716c7203e1a3bae8a3bbf8a3
+bcfaa3bd02e7a3adeda3aef0a3af02ea
+a3b1eda3b2eea3b310f9a3b403e3a3b5
+e9a3b6f0a3b7f6a3b86a466b786ca4a6
+6d03e8a3a8eba3a9f1a3aaf5a3ab07ef
+0cefa39df2a39ef4a39ffaa3a0e8a399
+e9a39aeaa39beea39c04e3a3a1eda3a2
+f1a3a3f5a3a4f6a3a5656d6562669467
+a24468a24b6906ed0ceda395eea396ef
+a397f1a398e2a393e7a394eba37307f4
+0cf4a37ff7a380f8a381faa382e6a37b
+e8a37ceaa37deda37e03e4a383f1a384
+f4a385f9a38602e3a388eea389f8a38a
+06eb0ceba38feca390efa38bf9a391e2
+a38ce7a38de9a38e6132626463906410
+e4a37907f20cf2a365f3a366f6a367f8
+a368eca361eea362f0a363f1a36406e4
+0ce4a36ceaa36df0a36ef2a36fe1a369
+e2a36ae3a36b09ed0feda374eea375ef
+a376f1a377f5a378e3a361e6a370e8a3
+71e9a372eca37300184dc25d54c0c257
+2e5736584e59645a12616ee2adc00161
+2c631168efa99f1172e1adbf01702a73
+1175f8691165efa9db126969e9a71554
+3455a28756126169e9adbe056746674a
+68506901622a7210e8adbd10f4012aa3
+b04210d445116ce7adba106101e158e9
+042aab9e43324b364c3a5410c8ab9e10
+cea7cc10c8adbb10c1adbc012aa54b4d
+10d6a54b6130657c66116ee7adb9056d
+116d2e6e327610f4a3a610ecab8210e7
+abd667346b386c01e5ab90f5a75a10e2
+adb710f2adb8116cf5ab93126761f2ab
+e050c0c350a27252a2a8530669436942
+6f6a759079016c2a7210e3a50a10efad
+b601643e6e01e4ab31e8012aab424c10
+cbab4210e4adb302672e72387910efa5
+0c01e4ab5fefadb410e1ab67116ee4ad
+b5613e675e6801612a7210e4adb210f7
+a55f026d2e72327510f2ab1f10f2ab55
+10e2adb1116ef7a349046138654a6850
+6c76721174e9adae016c2a7510e3a51a
+10ed9f1172edadac02612e6c326e10f8
+a9df10e7adad01e9a9cff0a9d01172e4
+a5f2026a306f3675116ef2a9a3116ee7
+adaf1168e7adb04d7c4ea2a44f036742
+6c48724e7301672a6d10e1adab10e5a9
+c81161edab3a1163ebadaa016b2a7910
+e1a9c410e8a9ca07722e725a74607566
+79116df2032aa3cd492e4d325410c8ad
+a510cea75e10cda3cd116fefa9521165
+e9ada3116cf4ada4614265746c9a6f01
+642a6e10e7a53e10e9ada20368326b36
+6e3a7210e3ad9e10eaad9b10e1ad9c01
+e4a969e9ad9d0264346e387201e3ada0
+efada110e6a93310e4ad9f1179eda939
+046138624c65506b56731168f5ada901
+6e2a7210e2ada710e4ada61161f49d11
+77e1ada8116fefa55d47c40d4ac3214a
+a6ba4ba6d34c03614665a69669a69879
+01632a6410e9ad9a10e9ad99026ea673
+6fa6747410ee001a4bc139545b571457
+ac5458ac5859305a01c1abe8d7ab5810
+d4ad5b544055825603c1ad56c5ad58ce
+ad57d5a3920bce14d609d6abd2d7abbf
+daab78cead97cfabb5d2abbdcbc5b8cb
+abaaccad52cdaba904c1ad98c7ad54d3
+a55ed9ad55daabec4e824e8a50a24a52
+a26d530cce17d40cd4ad4cd5a797d6ad
+4dd9ad96cead4acfab5ed2ad4bca09ca
+ad48cbab48cdad49c3ad47c5ab77c9ab
+4c06c70cc7abe4c9ad3acca54acfa978
+c1ad37c3ad38c5ad390acc14d409d4ad
+43d7a9d3d9a5becca9e4cdad40d2ad41
+c1ad3cc5ad3dc6ad3ec7abb9c8a57a03
+c5ad46cfa944d5a797d7ab144b9e4ca2
+434d0010cd1dd40fd4a953d5a923d8ab
+e6d9a906daad36cda73bcfad94d1ad34
+d2ad95c60cc6ad33c7a927cbad93cca3
+a7c1ad92c3ad31c4ad32c5ab2601c5ad
+26cdad9104c9ad2cd3ad2dd4a7f3d5ad
+2ed6a7f943bd465f464c475c48a24849
+04c3ad23c4a70dd2ad90d3a70bd4a723
+02c9a578cfa57fd2a5850cce17d30cd3
+ad1ed4ad1fd5abebd7ad20cead1bd0ad
+1cd1ad1dc608c6ad1ac867cca771c1ad
+19c2abeec5ad8f04cdad21cead22d2a5
+fdd4a700d5a701435244a2424505d309
+d3a564d4ad89daad18c1ad14c3ad15c5
+a5570fcf1ad60cd6ad0fd7abf0d9ad8d
+daa517cfad0cd0ad0dd2ad0ed5abe9c9
+c676c9ad09ccad0acdad0bcead8c05cb
+09cba51fcfad12daad8ec5a52bc7ad45
+caad11345e34ae714178420bce14d209
+d2a9f0d5a73bd6ad05cead02cfad03d1
+ad04c909c9ab09caad00ccad01c1a3be
+c5abfdc6abfe0cd217d50cd5abe5d7ab
+fbd8abfcdaa35cd2abf8d3abf9d4abfa
+cd09cdad8bcfabf6d1abf7c4abf3c6ad
+8acc6d2aa55e312c321133b0ad891137
+b2a79710e1a9a110ef012aa7eb4c10c1
+a7eb1170e3a7d1026d2e6e327310f5a7
+df10e2a7db01e1a7c1e2a5ca01613a70
+1161ee012aa7294a10d0a729016d2a76
+10e1ad8610efad85046138684c6e7a6f
+80741168e9ad88016c2a6e10e1ad8710
+e9a55802612e6d326f10eaab3010f2a9
+ec10f2012aa7764b10c8a7761164e1a7
+801172e5012aa7854b01d0ad28d2a785
+473448a25c49127461eca5690465466c
+5a6f60727a75016a2a7210f5a9cc10f2
+a5ce116ff2012aa5ae4710c5a5ae1161
+e7a51c016e2a7410e8a5c801e7ad79ed
+a5650161506510eb032aa559432e4732
+5410d2a38a10d9ad1010d2a55910eead
+7a056c186c306d3675116ee7ad841175
+f7a5f1106e01e7a5f6f0a9616130659c
+691172e1ad83016e2a7410f2a93205ef
+24efad7df34cf4032aa3f7482e4d3254
+10d7a3f710cba3ee10cfa3f0012aa3ea
+4310cea3eae2a3fce7ad7be9ad7c1162
+f2054917493a533e5502c1ad81cbad7f
+d3ad8210cca5e710c5ad802aa5e7432a
+4710c2ad7f10c1ad7e43c0f9437044a2
+b9450267406c46741168e9012a754501
+d2ad17d4751179f0a55501622a7910ed
+ad7810e1ad770461a26868a2776fa280
+70a282791172ec0a4d335414542e5538
+5810cbad7101caab99d2ad7010c1abe1
+4d2e523e5310cbad6f02c4ad6dcba7fd
+cea53d02cfad6ed3ab24d5ab112aab11
+4138423c474c4b01c7a7badaad2a10cc
+ad6a02c1ad6bc7a387d9a37a01c52dd2
+ad6c026b2e6e327210e9ad6910eda3e4
+10f3a51001612a6510f2a50110eda503
+1170f4a50e1172f4a5c90265306f8275
+1170ecad761176e106491749324d364e
+3a5010cba3c510cea5e910d5ad7410d0
+a9822aa5e9422a4610caad7310d4ad72
+1167f2ad752aa25441aa3142056f1f6f
+3e72447501672a6810e4ad6810e9ad67
+1170efa3fb106101e8a9e2e9ad666130
+655c68116bf3ad65036c326d36733a74
+10ebad6410e9ad6310f5a36810f3a3c0
+116ee7012aa3ac4210c4a3ac00214ac1
+c052c0cc5638563c5758586259665a01
+d2abe7d7ab5804c1ad56c4ad57c5ad58
+cead57d5a39201c6ad59d3ab5110cbad
+5a03c4abf2c5abf2d4ad5bd5ab245246
+536c54a2575503c1abe1c7ad54d9ad55
+daabec05d309d3ab24d5ab11d7ab14c5
+ad46c8ab58cfa9440ecd1ad40cd4ad4c
+d5ab11d6ad4dd9ad4ecdad49cead4acf
+ab5ed2ad4bc50cc5ab77c9ab4ccaad48
+cbab48c1abedc3ad47c4abf10ecd1ad2
+0cd2abbdd6abd2d7a3f7daab78cdaba9
+cead53cfabb5d0ad52c80cc8ab9ecaab
+99cbabaaccad52c4ad4fc6ad50c7ad51
+4e644e404fa89650785102c1ad44cfad
+45d5abee09cc0fcca54acfa978d0a982
+d1abf7d4abedc1ad37c3ad38c5ad39c8
+a392c9ad3a0dcd17d40cd4ad43d7a9d3
+d9a5bedaad3ccdad40d2ad41d3ad42c7
+0cc7abb9c8a57acbad3fcca9e4c1ad3c
+c5ad3dc6ad3e4aa2484ba24c4ca2644d
+0012ce20d50fd5a923d6a54bd8abe6d9
+a906daad36cea53dcfa3f0d1ad34d2ad
+35d4a953c60fc6ad33c7a927cba7fdcc
+a3a7cda3cdc1ad30c3ad31c4ad32c5ab
+2601cfad25d0a72907d00cd0ad28d2a7
+85d7ad29daad2ac5ad26c7a7bac8a776
+cdad2708d30fd3ad2dd4a7f3d5ad2ed6
+a7f9d9ad2fc1a7ebc2ad2bc9ad2ccbab
+4241c1604598456a46a24147a24e48a2
+754907d10cd1ad24d2a56ed3a70bd4a7
+23c3ad23c4a70dcca5e7cea5e909d20e
+d2ad17d3a564d475d5abeedaad18c1ad
+14c3ad15c5a557c799c8ad1604c9a578
+cfa57fd1abf7d2a585d8a5850bd014d3
+09d3ad1ed4ad1fd7ad20d0ad1cd1ad1d
+d2a559c808c867cca771cead1bc1ad19
+c5a5aec6ad1a06d20cd2a5fdd4a700d5
+a701d6abfecba3eecdad21cead22415e
+42a24c43a2834406cb0ccba51fcfad12
+d9ad00daad13c4a52bc5a52bcaad110d
+d117d40cd4abfad7abfbd8abfcdaa35c
+d1abf7d2abf8d3abf9cc0bcc6dcda341
+ceabf0cfabf6c4abf3c5abf4c6abf500
+10cc1cd20ed2a9f0d445d5a3cdd6ad05
+d9a37accad01cead02cfad03d1ad04c7
+0cc7a387c8abffc9ab09caad00c1a3be
+c4a3acc5abfdc6abfe0010cf1dd50fd5
+abe9d6ad0fd7abf0d9ad10daa517cfad
+0cd0ad0dd2ad0ed3ab24c90cc9ad09cc
+ad0acdad0bcea3eac4abe7c6ad06c7ad
+07c8ad08342234423548374e3801382a
+3910b0ab2410b6abf21131b9abef1133
+b2abf01133b6abf12aa55e305031a26b
+3202302e33383810b0a52b01b0a517b2
+abe410b0750533233334354a3601b1ab
+51b2a5e903b0a3eab4a5e9b5a70db9a7
+2302b3abe5b4abeab7abeb3034314a32
+01b1a55eb9abe903b2abe4b3a55eb5a9
+f0b9abe506b50bb599b7abe7b8abe8b9
+a55eb1abe4b3abe6b4ab7802342e353e
+3710b2ab1102b2a3eab3abecb5abed03
+b0ab11b1ab11b4abeeb5a52b04643667
+3c68427246761173f44b116ceda57511
+68e2ad5c116fed6301613c6d01e99bee
+012aa3414110cda34110e200134c6251
+31513c53405450556a5901c5abf2d4ab
+7910c1ad4402c1abedc4abf1d9ad4e04
+c78fc8a922caad61cead53d2ad6210cb
+ad5d4c3c4d464e624f665001cbad3fd3
+ad4201c2ad2bd9ad2f04c1ad30cda9fe
+cea76ed2ad35d5ad5f10c7ad6010cdad
+3b452e4542474a494e4a644b02c8a502
+cdad27d7ad2901c799c8ad1610c2ad5d
+03c4a908cead5ed1ad24d2a56e10cfad
+252a994132423c43464410daad1301c5
+abf4c6abf501c8abffd5a9fe01c3a907
+ceabdd00207492e52bf221f299f3a347
+f6a351f9a35afa012a2e41127261e2a3
+5e022aa35c49985210d5a35fe54be651
+eb67ed757934792c7a36e125e22d01e2
+a35bf2a35a10ea012a2e41127261e2a3
+5e032aa35c312e49345310d5a35f1137
+b2a35f01d1a35dd2a35e744475547658
+7703e1a356e2a357efa358f8a35902e1
+a34de7a34eeaa34f10f9a35003eca352
+eea353f4a354f5a3556b856f586f4a70
+54726e7304e1a348e5a349e7a34aefa3
+4bf4a34c02ea87ed89fa8b05f206f293
+f395fa97e38de48fe59108ee0feea342
+efa343f1a344f9a345faa346e299e32a
+e8a1eda341022a9b4e2c5012616ced9f
+126261f49d6b3c6c3e6d586e03e37feb
+81ee83f98510eb6905ee06ee71f36df4
+73e16be26de96f03ed77ee79ef7bf07d
+65256530673668506a10e76501e24df9
+4f05ed06ed59ef5bf15de353e455e757
+02e15fec61ef636148625263646405f0
+06f045f947fa49e13fe541ea4302e927
+eb29f52b04e92ff131f233f435f93702
+e439e53be83d
+        }  // trie
+        lsrs{  // 1584
+            "","","",
+            "skip","script","",
+            "aa","Latn","ET",
+            "aai","Latn","ZZ",
+            "aak","Latn","ZZ",
+            "aau","Latn","ZZ",
+            "ab","Cyrl","GE",
+            "abi","Latn","ZZ",
+            "abq","Cyrl","ZZ",
+            "abr","Latn","GH",
+            "abt","Latn","ZZ",
+            "aby","Latn","ZZ",
+            "acd","Latn","ZZ",
+            "ace","Latn","ID",
+            "ach","Latn","UG",
+            "ada","Latn","GH",
+            "ade","Latn","ZZ",
+            "adj","Latn","ZZ",
+            "dz","Tibt","BT",
+            "ady","Cyrl","RU",
+            "adz","Latn","ZZ",
+            "ae","Avst","IR",
+            "aeb","Arab","TN",
+            "aey","Latn","ZZ",
+            "af","Latn","ZA",
+            "agc","Latn","ZZ",
+            "agd","Latn","ZZ",
+            "agg","Latn","ZZ",
+            "agm","Latn","ZZ",
+            "ago","Latn","ZZ",
+            "agq","Latn","CM",
+            "aha","Latn","ZZ",
+            "ahl","Latn","ZZ",
+            "aho","Ahom","IN",
+            "ajg","Latn","ZZ",
+            "ak","Latn","GH",
+            "akk","Xsux","IQ",
+            "ala","Latn","ZZ",
+            "sq","Latn","AL",
+            "ali","Latn","ZZ",
+            "aln","Latn","XK",
+            "alt","Cyrl","RU",
+            "am","Ethi","ET",
+            "amm","Latn","ZZ",
+            "amn","Latn","ZZ",
+            "amo","Latn","NG",
+            "amp","Latn","ZZ",
+            "anc","Latn","ZZ",
+            "ank","Latn","ZZ",
+            "ann","Latn","ZZ",
+            "any","Latn","ZZ",
+            "aoj","Latn","ZZ",
+            "aom","Latn","ZZ",
+            "aoz","Latn","ID",
+            "apc","Arab","ZZ",
+            "apd","Arab","TG",
+            "ape","Latn","ZZ",
+            "apr","Latn","ZZ",
+            "aps","Latn","ZZ",
+            "apz","Latn","ZZ",
+            "ar","Arab","EG",
+            "arc","Armi","IR",
+            "arc","Nbat","JO",
+            "arc","Palm","SY",
+            "arh","Latn","ZZ",
+            "hy","Armn","AM",
+            "arn","Latn","CL",
+            "aro","Latn","BO",
+            "arq","Arab","DZ",
+            "ary","Arab","MA",
+            "arz","Arab","EG",
+            "as","Beng","IN",
+            "asa","Latn","TZ",
+            "ase","Sgnw","US",
+            "asg","Latn","ZZ",
+            "aso","Latn","ZZ",
+            "ast","Latn","ES",
+            "ata","Latn","ZZ",
+            "atg","Latn","ZZ",
+            "atj","Latn","CA",
+            "auy","Latn","ZZ",
+            "av","Cyrl","RU",
+            "avl","Arab","ZZ",
+            "avn","Latn","ZZ",
+            "avt","Latn","ZZ",
+            "avu","Latn","ZZ",
+            "awa","Deva","IN",
+            "awb","Latn","ZZ",
+            "awo","Latn","ZZ",
+            "awx","Latn","ZZ",
+            "ay","Latn","BO",
+            "ayb","Latn","ZZ",
+            "az","Latn","AZ",
+            "az","Arab","IQ",
+            "az","Arab","IR",
+            "az","Cyrl","RU",
+            "ba","Cyrl","RU",
+            "bal","Arab","PK",
+            "ban","Latn","ID",
+            "bap","Deva","NP",
+            "eu","Latn","ES",
+            "bar","Latn","AT",
+            "bas","Latn","CM",
+            "bav","Latn","ZZ",
+            "bax","Bamu","CM",
+            "bba","Latn","ZZ",
+            "bbb","Latn","ZZ",
+            "bbc","Latn","ID",
+            "bbd","Latn","ZZ",
+            "bbj","Latn","CM",
+            "bbp","Latn","ZZ",
+            "bbr","Latn","ZZ",
+            "bcf","Latn","ZZ",
+            "bch","Latn","ZZ",
+            "bci","Latn","CI",
+            "bik","Latn","PH",
+            "bcm","Latn","ZZ",
+            "bcn","Latn","ZZ",
+            "bco","Latn","ZZ",
+            "bcq","Ethi","ZZ",
+            "bcu","Latn","ZZ",
+            "bdd","Latn","ZZ",
+            "be","Cyrl","BY",
+            "bef","Latn","ZZ",
+            "beh","Latn","ZZ",
+            "bej","Arab","SD",
+            "bem","Latn","ZM",
+            "bet","Latn","ZZ",
+            "bew","Latn","ID",
+            "bex","Latn","ZZ",
+            "bez","Latn","TZ",
+            "bfd","Latn","CM",
+            "bfq","Taml","IN",
+            "bft","Arab","PK",
+            "bfy","Deva","IN",
+            "bg","Cyrl","BG",
+            "bgc","Deva","IN",
+            "bgn","Arab","PK",
+            "bgx","Grek","TR",
+            "bho","Deva","IN",
+            "bhb","Deva","IN",
+            "bhg","Latn","ZZ",
+            "bhi","Deva","IN",
+            "bhk","Latn","PH",
+            "bhl","Latn","ZZ",
+            "bhy","Latn","ZZ",
+            "bi","Latn","VU",
+            "bib","Latn","ZZ",
+            "big","Latn","ZZ",
+            "bim","Latn","ZZ",
+            "bin","Latn","NG",
+            "bio","Latn","ZZ",
+            "biq","Latn","ZZ",
+            "bjh","Latn","ZZ",
+            "bji","Ethi","ZZ",
+            "bjj","Deva","IN",
+            "bjn","Latn","ID",
+            "bjo","Latn","ZZ",
+            "bjr","Latn","ZZ",
+            "bjt","Latn","SN",
+            "bjz","Latn","ZZ",
+            "bkc","Latn","ZZ",
+            "bkm","Latn","CM",
+            "bkq","Latn","ZZ",
+            "bku","Latn","PH",
+            "bkv","Latn","ZZ",
+            "blt","Tavt","VN",
+            "bm","Latn","ML",
+            "bmh","Latn","ZZ",
+            "bmk","Latn","ZZ",
+            "bmq","Latn","ML",
+            "bmu","Latn","ZZ",
+            "bn","Beng","BD",
+            "bng","Latn","ZZ",
+            "bnm","Latn","ZZ",
+            "bnp","Latn","ZZ",
+            "bo","Tibt","CN",
+            "boj","Latn","ZZ",
+            "bom","Latn","ZZ",
+            "bon","Latn","ZZ",
+            "bpy","Beng","IN",
+            "bqc","Latn","ZZ",
+            "bqi","Arab","IR",
+            "bqp","Latn","ZZ",
+            "bqv","Latn","CI",
+            "br","Latn","FR",
+            "bra","Deva","IN",
+            "brh","Arab","PK",
+            "brx","Deva","IN",
+            "brz","Latn","ZZ",
+            "bs","Latn","BA",
+            "bsj","Latn","ZZ",
+            "bsq","Bass","LR",
+            "bss","Latn","CM",
+            "bst","Ethi","ZZ",
+            "bto","Latn","PH",
+            "btt","Latn","ZZ",
+            "btv","Deva","PK",
+            "bua","Cyrl","RU",
+            "buc","Latn","YT",
+            "bud","Latn","ZZ",
+            "bug","Latn","ID",
+            "buk","Latn","ZZ",
+            "bum","Latn","CM",
+            "buo","Latn","ZZ",
+            "my","Mymr","MM",
+            "bus","Latn","ZZ",
+            "buu","Latn","ZZ",
+            "bvb","Latn","GQ",
+            "bwd","Latn","ZZ",
+            "bwr","Latn","ZZ",
+            "bxh","Latn","ZZ",
+            "luy","Latn","KE",
+            "bye","Latn","ZZ",
+            "byn","Ethi","ER",
+            "byr","Latn","ZZ",
+            "bys","Latn","ZZ",
+            "byv","Latn","CM",
+            "byx","Latn","ZZ",
+            "bza","Latn","ZZ",
+            "bze","Latn","ML",
+            "bzf","Latn","ZZ",
+            "bzh","Latn","ZZ",
+            "bzw","Latn","ZZ",
+            "ca","Latn","ES",
+            "can","Latn","ZZ",
+            "cbj","Latn","ZZ",
+            "cch","Latn","NG",
+            "ccp","Cakm","BD",
+            "ce","Cyrl","RU",
+            "ceb","Latn","PH",
+            "cfa","Latn","ZZ",
+            "cgg","Latn","UG",
+            "ch","Latn","GU",
+            "zh","Hans","CN",
+            "zh","Hant","AU",
+            "zh","Hant","BN",
+            "zh","Hant","GF",
+            "zh","Hant","HK",
+            "zh","Hant","ID",
+            "zh","Hant","MO",
+            "zh","Hant","MY",
+            "zh","Hant","PF",
+            "zh","Hant","PH",
+            "zh","Hant","PA",
+            "zh","Hant","SR",
+            "zh","Hant","TH",
+            "zh","Hant","TW",
+            "zh","Hant","GB",
+            "zh","Hant","US",
+            "zh","Hant","VN",
+            "zh","Bopo","TW",
+            "zh","Hanb","TW",
+            "chk","Latn","FM",
+            "chm","Cyrl","RU",
+            "cho","Latn","US",
+            "chp","Latn","CA",
+            "chr","Cher","US",
+            "cja","Arab","KH",
+            "cjm","Cham","VN",
+            "cjv","Latn","ZZ",
+            "ckb","Arab","IQ",
+            "ckl","Latn","ZZ",
+            "cko","Latn","ZZ",
+            "cky","Latn","ZZ",
+            "cla","Latn","ZZ",
+            "syr","Syrc","IQ",
+            "cme","Latn","ZZ",
+            "cmg","Soyo","MN",
+            "co","Latn","FR",
+            "cop","Copt","EG",
+            "cps","Latn","PH",
+            "cr","Cans","CA",
+            "crh","Cyrl","UA",
+            "crj","Cans","CA",
+            "crk","Cans","CA",
+            "crl","Cans","CA",
+            "crm","Cans","CA",
+            "crs","Latn","SC",
+            "cs","Latn","CZ",
+            "csb","Latn","PL",
+            "csw","Cans","CA",
+            "ctd","Pauc","MM",
+            "cu","Cyrl","RU",
+            "cu","Glag","BG",
+            "cv","Cyrl","RU",
+            "cy","Latn","GB",
+            "da","Latn","DK",
+            "dad","Latn","ZZ",
+            "daf","Latn","ZZ",
+            "dag","Latn","ZZ",
+            "dah","Latn","ZZ",
+            "dak","Latn","US",
+            "dar","Cyrl","RU",
+            "dav","Latn","KE",
+            "dbd","Latn","ZZ",
+            "dbq","Latn","ZZ",
+            "dcc","Arab","IN",
+            "ddn","Latn","ZZ",
+            "de","Latn","DE",
+            "ded","Latn","ZZ",
+            "den","Latn","CA",
+            "dga","Latn","ZZ",
+            "dgh","Latn","ZZ",
+            "dgi","Latn","ZZ",
+            "dgl","Arab","ZZ",
+            "doi","Arab","IN",
+            "dgr","Latn","CA",
+            "dgz","Latn","ZZ",
+            "mwr","Deva","IN",
+            "dia","Latn","ZZ",
+            "zza","Latn","TR",
+            "dje","Latn","NE",
+            "dnj","Latn","CI",
+            "dob","Latn","ZZ",
+            "dop","Latn","ZZ",
+            "dow","Latn","ZZ",
+            "mn","Cyrl","MN",
+            "mn","Mong","CN",
+            "dri","Latn","ZZ",
+            "drs","Ethi","ZZ",
+            "dsb","Latn","DE",
+            "dtm","Latn","ML",
+            "dtp","Latn","MY",
+            "dts","Latn","ZZ",
+            "dty","Deva","NP",
+            "dua","Latn","CM",
+            "duc","Latn","ZZ",
+            "dud","Latn","ZZ",
+            "dug","Latn","ZZ",
+            "nl","Latn","NL",
+            "dv","Thaa","MV",
+            "dva","Latn","ZZ",
+            "dww","Latn","ZZ",
+            "dyo","Latn","SN",
+            "dyu","Latn","BF",
+            "dzg","Latn","ZZ",
+            "ebu","Latn","KE",
+            "ee","Latn","GH",
+            "efi","Latn","NG",
+            "egl","Latn","IT",
+            "egy","Egyp","EG",
+            "eka","Latn","ZZ",
+            "et","Latn","EE",
+            "eky","Kali","MM",
+            "el","Grek","GR",
+            "ema","Latn","ZZ",
+            "emi","Latn","ZZ",
+            "man","Latn","GM",
+            "man","Nkoo","GN",
+            "en","Latn","US",
+            "en","Shaw","GB",
+            "enn","Latn","ZZ",
+            "enq","Latn","ZZ",
+            "eo","Latn","001",
+            "eri","Latn","ZZ",
+            "es","Latn","ES",
+            "esg","Gonm","IN",
+            "ik","Latn","US",
+            "esu","Latn","US",
+            "etr","Latn","ZZ",
+            "ett","Ital","IT",
+            "etu","Latn","ZZ",
+            "etx","Latn","ZZ",
+            "ewo","Latn","CM",
+            "ext","Latn","ES",
+            "fa","Arab","IR",
+            "faa","Latn","ZZ",
+            "fab","Latn","ZZ",
+            "fag","Latn","ZZ",
+            "fai","Latn","ZZ",
+            "fan","Latn","GQ",
+            "ff","Latn","SN",
+            "ff","Adlm","GN",
+            "ffi","Latn","ZZ",
+            "ffm","Latn","ML",
+            "fi","Latn","FI",
+            "fia","Arab","SD",
+            "fil","Latn","PH",
+            "fit","Latn","SE",
+            "fj","Latn","FJ",
+            "flr","Latn","ZZ",
+            "fmp","Latn","ZZ",
+            "fo","Latn","FO",
+            "fod","Latn","ZZ",
+            "fon","Latn","BJ",
+            "for","Latn","ZZ",
+            "fpe","Latn","ZZ",
+            "fqs","Latn","ZZ",
+            "fr","Latn","FR",
+            "frc","Latn","US",
+            "frp","Latn","FR",
+            "frr","Latn","DE",
+            "frs","Latn","DE",
+            "fub","Arab","CM",
+            "fud","Latn","WF",
+            "fue","Latn","ZZ",
+            "fuf","Latn","GN",
+            "fuh","Latn","ZZ",
+            "fuq","Latn","NE",
+            "fur","Latn","IT",
+            "fuv","Latn","NG",
+            "fuy","Latn","ZZ",
+            "fvr","Latn","SD",
+            "fy","Latn","NL",
+            "ga","Latn","IE",
+            "gaa","Latn","GH",
+            "gaf","Latn","ZZ",
+            "gag","Latn","MD",
+            "gah","Latn","ZZ",
+            "gaj","Latn","ZZ",
+            "gam","Latn","ZZ",
+            "gan","Hans","CN",
+            "gaw","Latn","ZZ",
+            "gay","Latn","ID",
+            "om","Latn","ET",
+            "gba","Latn","ZZ",
+            "gbf","Latn","ZZ",
+            "gbm","Deva","IN",
+            "grb","Latn","ZZ",
+            "gby","Latn","ZZ",
+            "gbz","Arab","IR",
+            "gcr","Latn","GF",
+            "gd","Latn","GB",
+            "gde","Latn","ZZ",
+            "gdn","Latn","ZZ",
+            "gdr","Latn","ZZ",
+            "geb","Latn","ZZ",
+            "gej","Latn","ZZ",
+            "gel","Latn","ZZ",
+            "ka","Geor","GE",
+            "gez","Ethi","ET",
+            "gfk","Latn","ZZ",
+            "gvr","Deva","NP",
+            "ghs","Latn","ZZ",
+            "gil","Latn","KI",
+            "gim","Latn","ZZ",
+            "gjk","Arab","PK",
+            "gjn","Latn","ZZ",
+            "gju","Arab","PK",
+            "gkn","Latn","ZZ",
+            "gkp","Latn","ZZ",
+            "gl","Latn","ES",
+            "glk","Arab","IR",
+            "gmm","Latn","ZZ",
+            "gmv","Ethi","ZZ",
+            "gn","Latn","PY",
+            "gnd","Latn","ZZ",
+            "gng","Latn","ZZ",
+            "gon","Telu","IN",
+            "god","Latn","ZZ",
+            "gof","Ethi","ZZ",
+            "goi","Latn","ZZ",
+            "gom","Deva","IN",
+            "gor","Latn","ID",
+            "gos","Latn","NL",
+            "got","Goth","UA",
+            "grc","Cprt","CY",
+            "grc","Linb","GR",
+            "grt","Beng","IN",
+            "grw","Latn","ZZ",
+            "gsw","Latn","CH",
+            "gu","Gujr","IN",
+            "gub","Latn","BR",
+            "guc","Latn","CO",
+            "gud","Latn","ZZ",
+            "gur","Latn","GH",
+            "guw","Latn","ZZ",
+            "gux","Latn","ZZ",
+            "guz","Latn","KE",
+            "gv","Latn","IM",
+            "gvf","Latn","ZZ",
+            "gvs","Latn","ZZ",
+            "gwc","Arab","ZZ",
+            "gwi","Latn","CA",
+            "gwt","Arab","ZZ",
+            "gyi","Latn","ZZ",
+            "ha","Latn","NG",
+            "ha","Arab","SD",
+            "ha","Arab","CM",
+            "hag","Latn","ZZ",
+            "hak","Hans","CN",
+            "ham","Latn","ZZ",
+            "haw","Latn","US",
+            "haz","Arab","AF",
+            "hbb","Latn","ZZ",
+            "hdy","Ethi","ZZ",
+            "he","Hebr","IL",
+            "hhy","Latn","ZZ",
+            "hi","Deva","IN",
+            "hia","Latn","ZZ",
+            "hif","Latn","FJ",
+            "hig","Latn","ZZ",
+            "hih","Latn","ZZ",
+            "hil","Latn","PH",
+            "srx","Deva","IN",
+            "hla","Latn","ZZ",
+            "hlu","Hluw","TR",
+            "hmd","Plrd","CN",
+            "hmt","Latn","ZZ",
+            "hnd","Arab","PK",
+            "hne","Deva","IN",
+            "hnj","Hmng","LA",
+            "hnn","Latn","PH",
+            "hno","Arab","PK",
+            "ho","Latn","PG",
+            "hoc","Deva","IN",
+            "hoj","Deva","IN",
+            "hot","Latn","ZZ",
+            "hr","Latn","HR",
+            "hsb","Latn","DE",
+            "hsn","Hans","CN",
+            "ht","Latn","HT",
+            "hu","Latn","HU",
+            "hui","Latn","ZZ",
+            "hz","Latn","NA",
+            "ia","Latn","001",
+            "ian","Latn","ZZ",
+            "iar","Latn","ZZ",
+            "iba","Latn","MY",
+            "ibb","Latn","NG",
+            "iby","Latn","ZZ",
+            "ica","Latn","ZZ",
+            "is","Latn","IS",
+            "ich","Latn","ZZ",
+            "id","Latn","ID",
+            "idd","Latn","ZZ",
+            "idi","Latn","ZZ",
+            "idu","Latn","ZZ",
+            "ife","Latn","TG",
+            "ig","Latn","NG",
+            "igb","Latn","ZZ",
+            "ige","Latn","ZZ",
+            "ii","Yiii","CN",
+            "ijj","Latn","ZZ",
+            "iu","Cans","CA",
+            "ikk","Latn","ZZ",
+            "ikt","Latn","CA",
+            "ikw","Latn","ZZ",
+            "ikx","Latn","ZZ",
+            "ilo","Latn","PH",
+            "imo","Latn","ZZ",
+            "in","Latn","ID",
+            "inh","Cyrl","RU",
+            "io","Latn","001",
+            "iou","Latn","ZZ",
+            "iri","Latn","ZZ",
+            "it","Latn","IT",
+            "iw","Hebr","IL",
+            "iwm","Latn","ZZ",
+            "iws","Latn","ZZ",
+            "izh","Latn","RU",
+            "izi","Latn","ZZ",
+            "ja","Jpan","JP",
+            "jab","Latn","ZZ",
+            "jam","Latn","JM",
+            "jbo","Latn","001",
+            "jbu","Latn","ZZ",
+            "jen","Latn","ZZ",
+            "jgk","Latn","ZZ",
+            "jgo","Latn","CM",
+            "yi","Hebr","001",
+            "jib","Latn","ZZ",
+            "jmc","Latn","TZ",
+            "jml","Deva","NP",
+            "jra","Latn","ZZ",
+            "jut","Latn","DK",
+            "jv","Latn","ID",
+            "jw","Latn","ID",
+            "kaa","Cyrl","UZ",
+            "kab","Latn","DZ",
+            "kac","Latn","MM",
+            "kad","Latn","ZZ",
+            "kai","Latn","ZZ",
+            "kaj","Latn","NG",
+            "kam","Latn","KE",
+            "kao","Latn","ML",
+            "kbd","Cyrl","RU",
+            "kbm","Latn","ZZ",
+            "kbp","Latn","ZZ",
+            "kbq","Latn","ZZ",
+            "kbx","Latn","ZZ",
+            "kby","Arab","NE",
+            "kcg","Latn","NG",
+            "kck","Latn","ZW",
+            "kcl","Latn","ZZ",
+            "kct","Latn","ZZ",
+            "kde","Latn","TZ",
+            "kdh","Arab","TG",
+            "kdl","Latn","ZZ",
+            "kdt","Thai","TH",
+            "kea","Latn","CV",
+            "ken","Latn","CM",
+            "kez","Latn","ZZ",
+            "kfo","Latn","CI",
+            "kfr","Deva","IN",
+            "kfy","Deva","IN",
+            "kg","Latn","CD",
+            "kge","Latn","ID",
+            "kgf","Latn","ZZ",
+            "kgp","Latn","BR",
+            "kha","Latn","IN",
+            "khb","Talu","CN",
+            "khn","Deva","IN",
+            "khq","Latn","ML",
+            "khs","Latn","ZZ",
+            "kht","Mymr","IN",
+            "khw","Arab","PK",
+            "khz","Latn","ZZ",
+            "ki","Latn","KE",
+            "kij","Latn","ZZ",
+            "kiu","Latn","TR",
+            "kiw","Latn","ZZ",
+            "kj","Latn","NA",
+            "kjd","Latn","ZZ",
+            "kjg","Laoo","LA",
+            "kjs","Latn","ZZ",
+            "kjy","Latn","ZZ",
+            "kk","Cyrl","KZ",
+            "kk","Arab","AF",
+            "kk","Arab","CN",
+            "kk","Arab","IR",
+            "kk","Arab","MN",
+            "kkc","Latn","ZZ",
+            "kkj","Latn","CM",
+            "kl","Latn","GL",
+            "kln","Latn","KE",
+            "klq","Latn","ZZ",
+            "klt","Latn","ZZ",
+            "klx","Latn","ZZ",
+            "km","Khmr","KH",
+            "kmb","Latn","AO",
+            "kmh","Latn","ZZ",
+            "kmo","Latn","ZZ",
+            "ku","Latn","TR",
+            "ku","Arab","LB",
+            "ku","Arab","IQ",
+            "kms","Latn","ZZ",
+            "kmu","Latn","ZZ",
+            "kmw","Latn","ZZ",
+            "kn","Knda","IN",
+            "kr","Latn","ZZ",
+            "knf","Latn","GW",
+            "kok","Deva","IN",
+            "knp","Latn","ZZ",
+            "ko","Kore","KR",
+            "koi","Cyrl","RU",
+            "kol","Latn","ZZ",
+            "kos","Latn","FM",
+            "koz","Latn","ZZ",
+            "kpe","Latn","LR",
+            "kpf","Latn","ZZ",
+            "kpo","Latn","ZZ",
+            "kpr","Latn","ZZ",
+            "kv","Cyrl","RU",
+            "kpx","Latn","ZZ",
+            "kqb","Latn","ZZ",
+            "kqf","Latn","ZZ",
+            "kqs","Latn","ZZ",
+            "kqy","Ethi","ZZ",
+            "krc","Cyrl","RU",
+            "kri","Latn","SL",
+            "krj","Latn","PH",
+            "krl","Latn","RU",
+            "krs","Latn","ZZ",
+            "kru","Deva","IN",
+            "ks","Arab","IN",
+            "ksb","Latn","TZ",
+            "ksd","Latn","ZZ",
+            "ksf","Latn","CM",
+            "ksh","Latn","DE",
+            "ksj","Latn","ZZ",
+            "ksr","Latn","ZZ",
+            "ktb","Ethi","ZZ",
+            "ktm","Latn","ZZ",
+            "kto","Latn","ZZ",
+            "kub","Latn","ZZ",
+            "kud","Latn","ZZ",
+            "kue","Latn","ZZ",
+            "kuj","Latn","ZZ",
+            "kum","Cyrl","RU",
+            "kun","Latn","ZZ",
+            "kup","Latn","ZZ",
+            "kus","Latn","ZZ",
+            "kvg","Latn","ZZ",
+            "kvr","Latn","ID",
+            "kvx","Arab","PK",
+            "kw","Latn","GB",
+            "kwj","Latn","ZZ",
+            "kwo","Latn","ZZ",
+            "yam","Latn","ZZ",
+            "kxa","Latn","ZZ",
+            "kxc","Ethi","ZZ",
+            "tvd","Latn","ZZ",
+            "kxm","Thai","TH",
+            "kxp","Arab","PK",
+            "kxw","Latn","ZZ",
+            "kxz","Latn","ZZ",
+            "ky","Cyrl","KG",
+            "ky","Arab","CN",
+            "ky","Latn","TR",
+            "kye","Latn","ZZ",
+            "kyx","Latn","ZZ",
+            "kzr","Latn","ZZ",
+            "la","Latn","VA",
+            "lab","Lina","GR",
+            "lad","Hebr","IL",
+            "lag","Latn","TZ",
+            "lah","Arab","PK",
+            "laj","Latn","UG",
+            "las","Latn","ZZ",
+            "lb","Latn","LU",
+            "lbe","Cyrl","RU",
+            "lbu","Latn","ZZ",
+            "lbw","Latn","ID",
+            "lcm","Latn","ZZ",
+            "lcp","Thai","CN",
+            "ldb","Latn","ZZ",
+            "led","Latn","ZZ",
+            "lee","Latn","ZZ",
+            "lem","Latn","ZZ",
+            "lep","Lepc","IN",
+            "leq","Latn","ZZ",
+            "leu","Latn","ZZ",
+            "lez","Cyrl","RU",
+            "lg","Latn","UG",
+            "lgg","Latn","ZZ",
+            "li","Latn","NL",
+            "lia","Latn","ZZ",
+            "lid","Latn","ZZ",
+            "lif","Deva","NP",
+            "lif","Limb","IN",
+            "lig","Latn","ZZ",
+            "lih","Latn","ZZ",
+            "lij","Latn","IT",
+            "lis","Lisu","CN",
+            "ljp","Latn","ID",
+            "lki","Arab","IR",
+            "lkt","Latn","US",
+            "lle","Latn","ZZ",
+            "lln","Latn","ZZ",
+            "lmn","Telu","IN",
+            "lmo","Latn","IT",
+            "lmp","Latn","ZZ",
+            "ln","Latn","CD",
+            "lns","Latn","ZZ",
+            "lnu","Latn","ZZ",
+            "lo","Laoo","LA",
+            "loj","Latn","ZZ",
+            "lok","Latn","ZZ",
+            "lol","Latn","CD",
+            "lor","Latn","ZZ",
+            "los","Latn","ZZ",
+            "loz","Latn","ZM",
+            "lrc","Arab","IR",
+            "lt","Latn","LT",
+            "ltg","Latn","LV",
+            "lu","Latn","CD",
+            "lua","Latn","CD",
+            "luo","Latn","KE",
+            "luz","Arab","IR",
+            "lv","Latn","LV",
+            "lwl","Thai","TH",
+            "lzh","Hans","CN",
+            "lzz","Latn","TR",
+            "mk","Cyrl","MK",
+            "mad","Latn","ID",
+            "maf","Latn","CM",
+            "mag","Deva","IN",
+            "mai","Deva","IN",
+            "mak","Latn","ID",
+            "mi","Latn","NZ",
+            "mas","Latn","KE",
+            "maw","Latn","ZZ",
+            "ms","Latn","MY",
+            "ms","Arab","CC",
+            "ms","Arab","ID",
+            "maz","Latn","MX",
+            "mbh","Latn","ZZ",
+            "mbo","Latn","ZZ",
+            "mbq","Latn","ZZ",
+            "mbu","Latn","ZZ",
+            "mbw","Latn","ZZ",
+            "mci","Latn","ZZ",
+            "mcp","Latn","ZZ",
+            "mcq","Latn","ZZ",
+            "mcr","Latn","ZZ",
+            "mcu","Latn","ZZ",
+            "mda","Latn","ZZ",
+            "mde","Arab","ZZ",
+            "mdf","Cyrl","RU",
+            "mdh","Latn","PH",
+            "mdj","Latn","ZZ",
+            "mdr","Latn","ID",
+            "mdx","Ethi","ZZ",
+            "med","Latn","ZZ",
+            "mee","Latn","ZZ",
+            "mek","Latn","ZZ",
+            "men","Latn","SL",
+            "mer","Latn","KE",
+            "met","Latn","ZZ",
+            "meu","Latn","ZZ",
+            "mfa","Arab","TH",
+            "mfe","Latn","MU",
+            "mfn","Latn","ZZ",
+            "mfo","Latn","ZZ",
+            "mfq","Latn","ZZ",
+            "mg","Latn","MG",
+            "mgh","Latn","MZ",
+            "mgl","Latn","ZZ",
+            "mgo","Latn","CM",
+            "mgp","Deva","NP",
+            "mgy","Latn","TZ",
+            "mh","Latn","MH",
+            "mhi","Latn","ZZ",
+            "mhl","Latn","ZZ",
+            "mif","Latn","ZZ",
+            "min","Latn","ID",
+            "mis","Hatr","IQ",
+            "mis","Medf","NG",
+            "miw","Latn","ZZ",
+            "mki","Arab","ZZ",
+            "mkl","Latn","ZZ",
+            "mkp","Latn","ZZ",
+            "mkw","Latn","ZZ",
+            "ml","Mlym","IN",
+            "mle","Latn","ZZ",
+            "mlp","Latn","ZZ",
+            "mls","Latn","SD",
+            "mmo","Latn","ZZ",
+            "mmu","Latn","ZZ",
+            "mmx","Latn","ZZ",
+            "mna","Latn","ZZ",
+            "mnf","Latn","ZZ",
+            "mni","Beng","IN",
+            "mnw","Mymr","MM",
+            "ro","Latn","RO",
+            "moa","Latn","ZZ",
+            "moe","Latn","CA",
+            "moh","Latn","CA",
+            "mos","Latn","BF",
+            "mox","Latn","ZZ",
+            "mpp","Latn","ZZ",
+            "mps","Latn","ZZ",
+            "mpt","Latn","ZZ",
+            "mpx","Latn","ZZ",
+            "mql","Latn","ZZ",
+            "mr","Deva","IN",
+            "mrd","Deva","NP",
+            "mrj","Cyrl","RU",
+            "mro","Mroo","BD",
+            "mt","Latn","MT",
+            "mtc","Latn","ZZ",
+            "mtf","Latn","ZZ",
+            "mti","Latn","ZZ",
+            "mtr","Deva","IN",
+            "mua","Latn","CM",
+            "raj","Deva","IN",
+            "mur","Latn","ZZ",
+            "mus","Latn","US",
+            "mva","Latn","ZZ",
+            "mvn","Latn","ZZ",
+            "mvy","Arab","PK",
+            "mwk","Latn","ML",
+            "mwv","Latn","ID",
+            "mww","Hmnp","US",
+            "mxc","Latn","ZW",
+            "mxm","Latn","ZZ",
+            "myk","Latn","ZZ",
+            "mym","Ethi","ZZ",
+            "myv","Cyrl","RU",
+            "myw","Latn","ZZ",
+            "myx","Latn","UG",
+            "myz","Mand","IR",
+            "mzk","Latn","ZZ",
+            "mzm","Latn","ZZ",
+            "mzn","Arab","IR",
+            "mzp","Latn","ZZ",
+            "mzw","Latn","ZZ",
+            "mzz","Latn","ZZ",
+            "na","Latn","NR",
+            "nac","Latn","ZZ",
+            "naf","Latn","ZZ",
+            "nak","Latn","ZZ",
+            "nan","Hans","CN",
+            "nap","Latn","IT",
+            "naq","Latn","NA",
+            "nas","Latn","ZZ",
+            "nb","Latn","NO",
+            "nca","Latn","ZZ",
+            "nce","Latn","ZZ",
+            "ncf","Latn","ZZ",
+            "nch","Latn","MX",
+            "nco","Latn","ZZ",
+            "ncu","Latn","ZZ",
+            "nd","Latn","ZW",
+            "ndc","Latn","MZ",
+            "nds","Latn","DE",
+            "ne","Deva","NP",
+            "neb","Latn","ZZ",
+            "new","Deva","NP",
+            "nex","Latn","ZZ",
+            "nfr","Latn","ZZ",
+            "ng","Latn","NA",
+            "nga","Latn","ZZ",
+            "ngb","Latn","ZZ",
+            "ngl","Latn","MZ",
+            "nhb","Latn","ZZ",
+            "nhe","Latn","MX",
+            "nhw","Latn","MX",
+            "nif","Latn","ZZ",
+            "nii","Latn","ZZ",
+            "nij","Latn","ID",
+            "nin","Latn","ZZ",
+            "niu","Latn","NU",
+            "niy","Latn","ZZ",
+            "niz","Latn","ZZ",
+            "njo","Latn","IN",
+            "nkg","Latn","ZZ",
+            "nko","Latn","ZZ",
+            "nmg","Latn","CM",
+            "nmz","Latn","ZZ",
+            "nn","Latn","NO",
+            "nnf","Latn","ZZ",
+            "nnh","Latn","CM",
+            "nnk","Latn","ZZ",
+            "nnm","Latn","ZZ",
+            "nnp","Wcho","IN",
+            "no","Latn","NO",
+            "nod","Lana","TH",
+            "noe","Deva","IN",
+            "non","Runr","SE",
+            "nop","Latn","ZZ",
+            "nou","Latn","ZZ",
+            "nqo","Nkoo","GN",
+            "nr","Latn","ZA",
+            "nrb","Latn","ZZ",
+            "nsk","Cans","CA",
+            "nsn","Latn","ZZ",
+            "nso","Latn","ZA",
+            "nss","Latn","ZZ",
+            "ntm","Latn","ZZ",
+            "ntr","Latn","ZZ",
+            "nui","Latn","ZZ",
+            "nup","Latn","ZZ",
+            "nus","Latn","SS",
+            "nuv","Latn","ZZ",
+            "nux","Latn","ZZ",
+            "nv","Latn","US",
+            "nwb","Latn","ZZ",
+            "nxq","Latn","CN",
+            "nxr","Latn","ZZ",
+            "ny","Latn","MW",
+            "nym","Latn","TZ",
+            "nyn","Latn","UG",
+            "nzi","Latn","GH",
+            "oc","Latn","FR",
+            "ogc","Latn","ZZ",
+            "okr","Latn","ZZ",
+            "okv","Latn","ZZ",
+            "ong","Latn","ZZ",
+            "onn","Latn","ZZ",
+            "ons","Latn","ZZ",
+            "opm","Latn","ZZ",
+            "or","Orya","IN",
+            "oro","Latn","ZZ",
+            "oru","Arab","ZZ",
+            "os","Cyrl","GE",
+            "osa","Osge","US",
+            "ota","Arab","ZZ",
+            "otk","Orkh","MN",
+            "ozm","Latn","ZZ",
+            "pa","Guru","IN",
+            "pa","Arab","PK",
+            "pag","Latn","PH",
+            "pal","Phli","IR",
+            "pal","Phlp","CN",
+            "pam","Latn","PH",
+            "pap","Latn","AW",
+            "pau","Latn","PW",
+            "pbi","Latn","ZZ",
+            "ps","Arab","AF",
+            "pcd","Latn","FR",
+            "pcm","Latn","NG",
+            "pdc","Latn","US",
+            "pdt","Latn","CA",
+            "ped","Latn","ZZ",
+            "peo","Xpeo","IR",
+            "pex","Latn","ZZ",
+            "pfl","Latn","DE",
+            "phl","Arab","ZZ",
+            "phn","Phnx","LB",
+            "pil","Latn","ZZ",
+            "pip","Latn","ZZ",
+            "pka","Brah","IN",
+            "pko","Latn","KE",
+            "pl","Latn","PL",
+            "pla","Latn","ZZ",
+            "pms","Latn","IT",
+            "png","Latn","ZZ",
+            "pnn","Latn","ZZ",
+            "pnt","Grek","GR",
+            "pon","Latn","FM",
+            "ppo","Latn","ZZ",
+            "pra","Khar","PK",
+            "prd","Arab","IR",
+            "prg","Latn","001",
+            "pss","Latn","ZZ",
+            "pt","Latn","BR",
+            "ptp","Latn","ZZ",
+            "puu","Latn","GA",
+            "pwa","Latn","ZZ",
+            "qu","Latn","PE",
+            "quc","Latn","GT",
+            "qug","Latn","EC",
+            "rai","Latn","ZZ",
+            "rao","Latn","ZZ",
+            "rcf","Latn","RE",
+            "rej","Latn","ID",
+            "rel","Latn","ZZ",
+            "res","Latn","ZZ",
+            "rgn","Latn","IT",
+            "rhg","Arab","MM",
+            "ria","Latn","IN",
+            "rif","Tfng","MA",
+            "rif","Latn","NL",
+            "rjs","Deva","NP",
+            "rkt","Beng","BD",
+            "rm","Latn","CH",
+            "rmf","Latn","FI",
+            "rmo","Latn","CH",
+            "rmt","Arab","IR",
+            "rmu","Latn","SE",
+            "rn","Latn","BI",
+            "rna","Latn","ZZ",
+            "rng","Latn","MZ",
+            "rob","Latn","ID",
+            "rof","Latn","TZ",
+            "roo","Latn","ZZ",
+            "rro","Latn","ZZ",
+            "rtm","Latn","FJ",
+            "ru","Cyrl","RU",
+            "rue","Cyrl","UA",
+            "rug","Latn","SB",
+            "rw","Latn","RW",
+            "rwk","Latn","TZ",
+            "rwo","Latn","ZZ",
+            "ryu","Kana","JP",
+            "sa","Deva","IN",
+            "saf","Latn","GH",
+            "sah","Cyrl","RU",
+            "saq","Latn","KE",
+            "sas","Latn","ID",
+            "sat","Latn","IN",
+            "sav","Latn","SN",
+            "saz","Saur","IN",
+            "sba","Latn","ZZ",
+            "sbe","Latn","ZZ",
+            "sbp","Latn","TZ",
+            "sc","Latn","IT",
+            "sr","Cyrl","RS",
+            "sr","Latn","RU",
+            "sr","Latn","ME",
+            "sr","Latn","RO",
+            "sr","Latn","TR",
+            "sck","Deva","IN",
+            "scl","Arab","ZZ",
+            "scn","Latn","IT",
+            "sco","Latn","GB",
+            "scs","Latn","CA",
+            "sd","Arab","PK",
+            "sd","Deva","IN",
+            "sd","Khoj","IN",
+            "sd","Sind","IN",
+            "sdc","Latn","IT",
+            "sdh","Arab","IR",
+            "se","Latn","NO",
+            "sef","Latn","CI",
+            "seh","Latn","MZ",
+            "sei","Latn","MX",
+            "ses","Latn","ML",
+            "sg","Latn","CF",
+            "sga","Ogam","IE",
+            "sgs","Latn","LT",
+            "sgw","Ethi","ZZ",
+            "sgz","Latn","ZZ",
+            "shi","Tfng","MA",
+            "shk","Latn","ZZ",
+            "shn","Mymr","MM",
+            "shu","Arab","ZZ",
+            "si","Sinh","LK",
+            "sid","Latn","ET",
+            "sig","Latn","ZZ",
+            "sil","Latn","ZZ",
+            "sim","Latn","ZZ",
+            "sjr","Latn","ZZ",
+            "sk","Latn","SK",
+            "skc","Latn","ZZ",
+            "skr","Arab","PK",
+            "sks","Latn","ZZ",
+            "sl","Latn","SI",
+            "sld","Latn","ZZ",
+            "sli","Latn","PL",
+            "sll","Latn","ZZ",
+            "sly","Latn","ID",
+            "sm","Latn","WS",
+            "sma","Latn","SE",
+            "smj","Latn","SE",
+            "smn","Latn","FI",
+            "smp","Samr","IL",
+            "smq","Latn","ZZ",
+            "sms","Latn","FI",
+            "sn","Latn","ZW",
+            "snc","Latn","ZZ",
+            "snk","Latn","ML",
+            "snp","Latn","ZZ",
+            "snx","Latn","ZZ",
+            "sny","Latn","ZZ",
+            "so","Latn","SO",
+            "sog","Sogd","UZ",
+            "sok","Latn","ZZ",
+            "soq","Latn","ZZ",
+            "sou","Thai","TH",
+            "soy","Latn","ZZ",
+            "spd","Latn","ZZ",
+            "spl","Latn","ZZ",
+            "sps","Latn","ZZ",
+            "srb","Sora","IN",
+            "srn","Latn","SR",
+            "srr","Latn","SN",
+            "ss","Latn","ZA",
+            "ssd","Latn","ZZ",
+            "ssg","Latn","ZZ",
+            "ssy","Latn","ER",
+            "st","Latn","ZA",
+            "stk","Latn","ZZ",
+            "stq","Latn","DE",
+            "su","Latn","ID",
+            "sua","Latn","ZZ",
+            "sue","Latn","ZZ",
+            "suk","Latn","TZ",
+            "sur","Latn","ZZ",
+            "sus","Latn","GN",
+            "sv","Latn","SE",
+            "sw","Latn","TZ",
+            "swb","Arab","YT",
+            "swc","Latn","CD",
+            "swg","Latn","DE",
+            "swp","Latn","ZZ",
+            "swv","Deva","IN",
+            "sxn","Latn","ID",
+            "sxw","Latn","ZZ",
+            "syl","Beng","BD",
+            "szl","Latn","PL",
+            "ta","Taml","IN",
+            "taj","Deva","NP",
+            "tal","Latn","ZZ",
+            "tan","Latn","ZZ",
+            "taq","Latn","ZZ",
+            "tbc","Latn","ZZ",
+            "tbd","Latn","ZZ",
+            "tbf","Latn","ZZ",
+            "tbg","Latn","ZZ",
+            "tbo","Latn","ZZ",
+            "tbw","Latn","PH",
+            "tbz","Latn","ZZ",
+            "tci","Latn","ZZ",
+            "tcy","Knda","IN",
+            "tdd","Tale","CN",
+            "tdg","Deva","NP",
+            "tdh","Deva","NP",
+            "te","Telu","IN",
+            "ted","Latn","ZZ",
+            "tem","Latn","SL",
+            "teo","Latn","UG",
+            "tet","Latn","TL",
+            "tfi","Latn","ZZ",
+            "tg","Cyrl","TJ",
+            "tg","Arab","PK",
+            "tgc","Latn","ZZ",
+            "tgo","Latn","ZZ",
+            "tgu","Latn","ZZ",
+            "th","Thai","TH",
+            "thl","Deva","NP",
+            "thq","Deva","NP",
+            "thr","Deva","NP",
+            "ti","Ethi","ET",
+            "tif","Latn","ZZ",
+            "tig","Ethi","ER",
+            "tik","Latn","ZZ",
+            "tim","Latn","ZZ",
+            "tio","Latn","ZZ",
+            "tiv","Latn","NG",
+            "tk","Latn","TM",
+            "tkl","Latn","TK",
+            "tkr","Latn","AZ",
+            "tkt","Deva","NP",
+            "tl","Latn","PH",
+            "tlf","Latn","ZZ",
+            "tlx","Latn","ZZ",
+            "tly","Latn","AZ",
+            "tmh","Latn","NE",
+            "tmy","Latn","ZZ",
+            "tn","Latn","ZA",
+            "tnh","Latn","ZZ",
+            "to","Latn","TO",
+            "tof","Latn","ZZ",
+            "tog","Latn","MW",
+            "toq","Latn","ZZ",
+            "tpi","Latn","PG",
+            "tpm","Latn","ZZ",
+            "tpz","Latn","ZZ",
+            "tqo","Latn","ZZ",
+            "tr","Latn","TR",
+            "tru","Latn","TR",
+            "trv","Latn","TW",
+            "trw","Arab","ZZ",
+            "ts","Latn","ZA",
+            "tsd","Grek","GR",
+            "tsf","Deva","NP",
+            "tsg","Latn","PH",
+            "tsj","Tibt","BT",
+            "tsw","Latn","ZZ",
+            "tt","Cyrl","RU",
+            "ttd","Latn","ZZ",
+            "tte","Latn","ZZ",
+            "ttj","Latn","UG",
+            "ttr","Latn","ZZ",
+            "tts","Thai","TH",
+            "ttt","Latn","AZ",
+            "tuh","Latn","ZZ",
+            "tul","Latn","ZZ",
+            "tum","Latn","MW",
+            "tuq","Latn","ZZ",
+            "tvl","Latn","TV",
+            "tvu","Latn","ZZ",
+            "twh","Latn","ZZ",
+            "twq","Latn","NE",
+            "txg","Tang","CN",
+            "ty","Latn","PF",
+            "tya","Latn","ZZ",
+            "tyv","Cyrl","RU",
+            "tzm","Latn","MA",
+            "ubu","Latn","ZZ",
+            "udm","Cyrl","RU",
+            "ug","Arab","CN",
+            "ug","Cyrl","KZ",
+            "ug","Cyrl","MN",
+            "uga","Ugar","SY",
+            "uk","Cyrl","UA",
+            "uli","Latn","FM",
+            "umb","Latn","AO",
+            "en","Latn","NG",
+            "en","Latn","AU",
+            "es","Latn","MX",
+            "sw","Latn","CD",
+            "en","Latn","ZA",
+            "es","Latn","CU",
+            "en","Latn","PG",
+            "en","Latn","GU",
+            "uz","Latn","UZ",
+            "ar","Arab","SA",
+            "en","Latn","GB",
+            "es","Latn","419",
+            "pap","Latn","CW",
+            "ar","Arab","SD",
+            "ar","Arab","YE",
+            "ca","Latn","AD",
+            "ar","Arab","AE",
+            "fa","Arab","AF",
+            "pt","Latn","AO",
+            "und","Latn","AQ",
+            "es","Latn","AR",
+            "sm","Latn","AS",
+            "de","Latn","AT",
+            "nl","Latn","AW",
+            "sv","Latn","AX",
+            "nl","Latn","BE",
+            "fr","Latn","BF",
+            "ar","Arab","BH",
+            "fr","Latn","BJ",
+            "fr","Latn","BL",
+            "ms","Latn","BN",
+            "es","Latn","BO",
+            "pap","Latn","BQ",
+            "und","Latn","BV",
+            "fr","Latn","CF",
+            "fr","Latn","CG",
+            "de","Latn","CH",
+            "fr","Latn","CI",
+            "es","Latn","CL",
+            "fr","Latn","CM",
+            "es","Latn","CO",
+            "und","Latn","CP",
+            "es","Latn","CR",
+            "pt","Latn","CV",
+            "el","Grek","CY",
+            "aa","Latn","DJ",
+            "es","Latn","DO",
+            "ar","Arab","DZ",
+            "es","Latn","EA",
+            "es","Latn","EC",
+            "ar","Arab","EH",
+            "ti","Ethi","ER",
+            "de","Latn","EZ",
+            "fr","Latn","GA",
+            "fr","Latn","GF",
+            "fr","Latn","GN",
+            "fr","Latn","GP",
+            "es","Latn","GQ",
+            "und","Latn","GS",
+            "es","Latn","GT",
+            "pt","Latn","GW",
+            "und","Latn","HM",
+            "es","Latn","HN",
+            "es","Latn","IC",
+            "ar","Arab","IQ",
+            "ar","Arab","JO",
+            "sw","Latn","KE",
+            "ar","Arab","KM",
+            "ko","Kore","KP",
+            "ar","Arab","KW",
+            "ru","Cyrl","KZ",
+            "ar","Arab","LB",
+            "de","Latn","LI",
+            "st","Latn","LS",
+            "fr","Latn","LU",
+            "ar","Arab","LY",
+            "ar","Arab","MA",
+            "fr","Latn","MC",
+            "ro","Latn","MD",
+            "fr","Latn","MF",
+            "fr","Latn","MQ",
+            "ar","Arab","MR",
+            "pt","Latn","MZ",
+            "af","Latn","NA",
+            "fr","Latn","NC",
+            "ha","Latn","NE",
+            "es","Latn","NI",
+            "ar","Arab","OM",
+            "es","Latn","PA",
+            "es","Latn","PE",
+            "fr","Latn","PF",
+            "ur","Arab","PK",
+            "fr","Latn","PM",
+            "es","Latn","PR",
+            "ar","Arab","PS",
+            "pt","Latn","PT",
+            "ar","Arab","QA",
+            "en","Latn","DG",
+            "fr","Latn","RE",
+            "fr","Latn","SC",
+            "nb","Latn","SJ",
+            "it","Latn","SM",
+            "fr","Latn","SN",
+            "nl","Latn","SR",
+            "pt","Latn","ST",
+            "es","Latn","SV",
+            "ar","Arab","SY",
+            "fr","Latn","TD",
+            "fr","Latn","TF",
+            "fr","Latn","TG",
+            "pt","Latn","TL",
+            "ar","Arab","TN",
+            "sw","Latn","UG",
+            "es","Latn","UY",
+            "it","Latn","VA",
+            "vi","Latn","VN",
+            "es","Latn","VE",
+            "fr","Latn","WF",
+            "sq","Latn","XK",
+            "fr","Latn","YT",
+            "lez","Aghb","RU",
+            "ks","Arab","GB",
+            "ur","Arab","IN",
+            "ur","Arab","MU",
+            "ha","Arab","NG",
+            "fa","Arab","TJ",
+            "az","Arab","TR",
+            "ban","Bali","ID",
+            "bbc","Batk","ID",
+            "sa","Bhks","IN",
+            "fr","Brai","FR",
+            "bug","Bugi","ID",
+            "bku","Buhd","PH",
+            "xcr","Cari","TR",
+            "mk","Cyrl","AL",
+            "sr","Cyrl","BA",
+            "mk","Cyrl","GR",
+            "uk","Cyrl","MD",
+            "bg","Cyrl","RO",
+            "uk","Cyrl","SK",
+            "kbd","Cyrl","TR",
+            "sr","Cyrl","XK",
+            "ne","Deva","BT",
+            "hif","Deva","FJ",
+            "bho","Deva","MU",
+            "doi","Dogr","IN",
+            "fr","Dupl","FR",
+            "sq","Elba","AL",
+            "arc","Elym","IR",
+            "wsg","Gong","IN",
+            "sa","Gran","IN",
+            "ko","Hang","KR",
+            "zh","Hani","CN",
+            "hnn","Hano","PH",
+            "yi","Hebr","CA",
+            "yi","Hebr","GB",
+            "yi","Hebr","SE",
+            "yi","Hebr","UA",
+            "yi","Hebr","US",
+            "ja","Hira","JP",
+            "hu","Hung","HU",
+            "ko","Jamo","KR",
+            "jv","Java","ID",
+            "ja","Kana","JP",
+            "bho","Kthi","IN",
+            "en","Latn","ET",
+            "tk","Latn","AF",
+            "ku","Latn","AM",
+            "za","Latn","CN",
+            "tr","Latn","CY",
+            "fr","Latn","DZ",
+            "ku","Latn","GE",
+            "tk","Latn","IR",
+            "fr","Latn","KM",
+            "fr","Latn","MA",
+            "sq","Latn","MK",
+            "pt","Latn","MO",
+            "fr","Latn","MR",
+            "fr","Latn","SY",
+            "fr","Latn","TN",
+            "pl","Latn","UA",
+            "xlc","Lyci","TR",
+            "xld","Lydi","TR",
+            "hi","Mahj","IN",
+            "mak","Maka","ID",
+            "xmn","Mani","CN",
+            "bo","Marc","CN",
+            "men","Mend","SL",
+            "xmr","Merc","SD",
+            "xmr","Mero","SD",
+            "mr","Modi","IN",
+            "mni","Mtei","IN",
+            "skr","Mult","PK",
+            "mnw","Mymr","TH",
+            "sa","Nand","IN",
+            "xna","Narb","SA",
+            "new","Newa","NP",
+            "zhx","Nshu","CN",
+            "sat","Olck","IN",
+            "so","Osma","SO",
+            "kv","Perm","RU",
+            "lzh","Phag","CN",
+            "xpr","Prti","IR",
+            "rej","Rjng","ID",
+            "rhg","Rohg","MM",
+            "xsa","Sarb","YE",
+            "sa","Shrd","IN",
+            "sa","Sidd","IN",
+            "sog","Sogo","UZ",
+            "su","Sund","ID",
+            "syl","Sylo","BD",
+            "tbw","Tagb","PH",
+            "doi","Takr","IN",
+            "zgh","Tfng","MA",
+            "fil","Tglg","PH",
+            "kdt","Thai","KH",
+            "kdt","Thai","LA",
+            "mai","Tirh","IN",
+            "vai","Vaii","LR",
+            "hoc","Wara","IN",
+            "cmg","Zanb","MN",
+            "unr","Beng","IN",
+            "unr","Deva","NP",
+            "unx","Beng","IN",
+            "uri","Latn","ZZ",
+            "urt","Latn","ZZ",
+            "urw","Latn","ZZ",
+            "usa","Latn","ZZ",
+            "utr","Latn","ZZ",
+            "uvh","Latn","ZZ",
+            "uvl","Latn","ZZ",
+            "uz","Arab","AF",
+            "uz","Cyrl","CN",
+            "vag","Latn","ZZ",
+            "van","Latn","ZZ",
+            "ve","Latn","ZA",
+            "vec","Latn","IT",
+            "vep","Latn","RU",
+            "vic","Latn","SX",
+            "viv","Latn","ZZ",
+            "vls","Latn","BE",
+            "vmf","Latn","DE",
+            "vmw","Latn","MZ",
+            "vo","Latn","001",
+            "vot","Latn","RU",
+            "vro","Latn","EE",
+            "vun","Latn","TZ",
+            "vut","Latn","ZZ",
+            "wa","Latn","BE",
+            "wae","Latn","CH",
+            "waj","Latn","ZZ",
+            "wal","Ethi","ET",
+            "wan","Latn","ZZ",
+            "war","Latn","PH",
+            "wbp","Latn","AU",
+            "wbq","Telu","IN",
+            "wbr","Deva","IN",
+            "wci","Latn","ZZ",
+            "wer","Latn","ZZ",
+            "wgi","Latn","ZZ",
+            "whg","Latn","ZZ",
+            "wib","Latn","ZZ",
+            "wiu","Latn","ZZ",
+            "wiv","Latn","ZZ",
+            "wja","Latn","ZZ",
+            "wji","Latn","ZZ",
+            "wls","Latn","WF",
+            "wmo","Latn","ZZ",
+            "wnc","Latn","ZZ",
+            "wni","Arab","KM",
+            "wnu","Latn","ZZ",
+            "wo","Latn","SN",
+            "wob","Latn","ZZ",
+            "wos","Latn","ZZ",
+            "wrs","Latn","ZZ",
+            "wsk","Latn","ZZ",
+            "wtm","Deva","IN",
+            "wuu","Hans","CN",
+            "wuv","Latn","ZZ",
+            "wwa","Latn","ZZ",
+            "xav","Latn","BR",
+            "xbi","Latn","ZZ",
+            "xes","Latn","ZZ",
+            "xh","Latn","ZA",
+            "xla","Latn","ZZ",
+            "xmf","Geor","GE",
+            "xnr","Deva","IN",
+            "xog","Latn","UG",
+            "xon","Latn","ZZ",
+            "xrb","Latn","ZZ",
+            "xsi","Latn","ZZ",
+            "xsm","Latn","ZZ",
+            "xsr","Deva","NP",
+            "xwe","Latn","ZZ",
+            "yao","Latn","MZ",
+            "yap","Latn","FM",
+            "yas","Latn","ZZ",
+            "yat","Latn","ZZ",
+            "yav","Latn","CM",
+            "yay","Latn","ZZ",
+            "yaz","Latn","ZZ",
+            "yba","Latn","ZZ",
+            "ybb","Latn","CM",
+            "yby","Latn","ZZ",
+            "yer","Latn","ZZ",
+            "ygr","Latn","ZZ",
+            "ygw","Latn","ZZ",
+            "yko","Latn","ZZ",
+            "yle","Latn","ZZ",
+            "ylg","Latn","ZZ",
+            "yll","Latn","ZZ",
+            "yml","Latn","ZZ",
+            "yo","Latn","NG",
+            "yon","Latn","ZZ",
+            "yrb","Latn","ZZ",
+            "yre","Latn","ZZ",
+            "yrl","Latn","BR",
+            "yss","Latn","ZZ",
+            "yua","Latn","MX",
+            "yue","Hant","HK",
+            "yue","Hans","CN",
+            "yuj","Latn","ZZ",
+            "yut","Latn","ZZ",
+            "yuw","Latn","ZZ",
+            "zag","Latn","SD",
+            "zdj","Arab","KM",
+            "zea","Latn","NL",
+            "zia","Latn","ZZ",
+            "zlm","Latn","TG",
+            "zmi","Latn","MY",
+            "zne","Latn","ZZ",
+            "zu","Latn","ZA",
+        }  // lsrs
+    }  // likely
+    match{
+        trie:bin{  // BytesTrie: 1269 bytes
+00186dc27f73c16e778077a25f78aaa2
+79a25e7a01e82af51165ee35117ae820
+012a854811616e01f398f41248616e01
+f347f42007b314b32ab426b536b605b3
+c45ab329b429b62910b5292a2bb026b1
+22b205b3c446b329b429b62905b3c43c
+b329b429b6291348616ef43f12ef66f2
+3501e92aef1165ee351165ee34012a85
+48166562f24c6174ee35736074a29275
+02e74af2a275fa1172f534012a854c16
+6174ee437972ec35117ae8350bef30f4
+0af428f5a45ef71165ee35ef72f16ef2
+107301e829f220022a8543344c166174
+ee437972ec2b167972ec4c6174ee2be8
+24e82ee93eee1165ee350262a68a6822
+7310f2291165ee34012a855316696ee8
+4c6174ee35734ae1a62ee41165ee3401
+2a8541167261e24c6174ee3512f961e1
+2909e921e93ceba82aeea41aefa417f4
+1172f5351165ee34012a8545167468e9
+4c6174ee356ca86a75a8a1e12ee54ce7
+1172f5351165ee34012a855416616dec
+4c6174ee351165ee34012a855416656c
+f54c6174ee35705d704671a2537202ed
+30eea2c2f71166f2351164e5350363a8
+5be156f374f41170f4a28007b3c217b3
+a61bb4a618b5a615b601b129b6291165
+ee34012a8547167572f54c6174ee3511
+65ee34012a8541167261e24c6174ee35
+12f565f3356d5a6ea2626f02e3a800ed
+a264f21165ee34012a854f167279e14c
+6174ee3509ee21eea2d7f2a43bf34af4
+a244f91165ee34012a854d16796df24c
+6174ee351169e4356652e7a6c2e96eeb
+4eec1165ee34012a854d166c79ed4c61
+74ee3512e565ee351162e73506e52ae5
+32ee50ef5af91165ee351165ee34012a
+8544166576e14c6174ee35106e01e235
+ef350164446e10e223733e7944e20164
+2c6e01ee35ef2310e13112ef65ee3512
+ee65ee3567c0f46a946a8e6b946c06e2
+1ee250e7a292eea646ef1165ee34012a
+854c16616fef4c6174ee351164e5296f
+2e7534e11169f43512fa65ee3512e166
+f23512f669e43507ed31ed32ee50f56e
+f91172f5351165ee34012a854b16686d
+f24c6174ee351165ee34012a854b166e
+64e14c6174ee351174f2357234e13ae7
+a4ddeb1172f53512e965ee351165ee34
+012a854716656ff24c6174ee35673a68
+7c6902e126e722f31165ee3506e40de4
+40eca26aeea267f51168e935612e7334
+e11165ee3512e165ee3512f764e52904
+6154e1a28cf254f4a485f91172f53401
+2a854116726dee437972ec3512f765ee
+350162287310e82910f329649e643e65
+4a6601ef2af9116eec351164e13511e1
+6e01e231ef3105f343f32ef496f51165
+f3351165f3a28007b30cb342b43eb53a
+b601b129b6292a2bb026b14eb204b029
+b229b329b429b52904b029b229b329b4
+29b52901b129b6291166e935e526ee2a
+ef1165ee351165eea28007b317b362b4
+30b55ab601b229b62904b027b127b327
+b427b5272a2bb030b12cb201b229b629
+04b029b129b329b427b529617662a29c
+6305720f7230efa2b6f91165ee3512f3
+66f235653068386b12e261f23513e266
+69ec3512f265ee3507eb5beb4ef252f9
+a250fa1172f534012a854c166174ee43
+7972ec351165ee351161f2a28007b313
+b33cb426b522b605b422b429b529b629
+10b3292a2bb026b122b205b40fb429b5
+29b62905b406b429b529b629b029b129
+b2291165f3356334e13ae2a244e6116e
+ec3512e865ee35127373f92905ee24ee
+3af258f30168287310e82910f2291165
+ee34012a854216656ee74c6174ee3511
+66f235652ee534e81168e93512ed65ee
+351172f535
+        }  // trie
+        regionToPartitions:bin{  // 1677 bytes
+000008090a00020000000b0009000201
+090001010a000a000000000000000a0c
+00000001010000000100000000000000
+000000000000010100000b0000000b00
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+0000000000000000000000000000000c
+010001000000000d0100000d01000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000900000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+000000000a0000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000101010102
+00020000010102010001020301010002
+01000101020001010101010101000202
+01020002020201010201000102020001
+01000101010100010201010201000201
+00020102010101000000010100010000
+01010002000200000000000000000000
+04010002000100010400000000000000
+00000101010d00000000010000000000
+00000001010200010001000001000000
+00000100000105000201020101010000
+02010100020101020203000100020000
+00000000000000000006000102000000
+01000201000000000000000101010000
+00000000010101010001010101000000
+00000000000000010000000000000002
+00010100000000000000000000000000
+00010001010100000001020001000100
+00000001000201010102000000000001
+00010000000000000101010101000004
+00040001010102010300000101010106
+03020402010101010201010100010001
+01010002000001000001010001000101
+00000000010000000000000000000000
+00010000000000000000000000000002
+00000002010101000001010201000000
+07010100000100020001000000000000
+00000000000000010000000000010000
+00000000000000010000000000000000
+00010000000100010001000000010101
+01010001010101010101010100000201
+01010200020101010002010001010100
+01010101040101000100020001010000
+01010000000000010000000000030e00
+00000007000000000002010100020002
+00020007000000000100000000000001
+00000000000000000000010000000000
+00000000000000010000000000000000
+00000000000000000001000000000000
+00000000000000000000000001010000
+00000000000000000000000001010000
+00000001000000000000000000000001
+00000000010000000001000000
+        }  // regionToPartitions
+        partitions{".","0","1","2","3","4","5","6","0123456","03","16","02","05","04","012346"}
+        paradigms{
+            "pt","Latn","BR",
+            "pt","Latn","PT",
+            "es","Latn","ES",
+            "es","Latn","419",
+            "en","Latn","US",
+            "en","Latn","GB",
+        }
+        distances:intvector{80,50,4,3}
+    }  // match
+}
index 885f3039bf6d7b8ee127ac0360a6578cf4fb1eaa..120db6844f25b4faeef6422f5f32d81fdfb93aa7 100644 (file)
@@ -205,10 +205,10 @@ main(int argc,
                 "\t-c or --copyright        include copyright notice\n");
         fprintf(stderr,
                 "\t-e or --encoding         encoding of source files\n"
-                "\t-d of --destdir          destination directory, followed by the path, defaults to %s\n"
-                "\t-s or --sourcedir        source directory for files followed by path, defaults to %s\n"
+                "\t-d or --destdir          destination directory, followed by the path, defaults to '%s'\n"
+                "\t-s or --sourcedir        source directory for files followed by path, defaults to '%s'\n"
                 "\t-i or --icudatadir       directory for locating any needed intermediate data files,\n"
-                "\t                         followed by path, defaults to %s\n",
+                "\t                         followed by path, defaults to '%s'\n",
                 u_getDataDirectory(), u_getDataDirectory(), u_getDataDirectory());
         fprintf(stderr,
                 "\t-j or --write-java       write a Java ListResourceBundle for ICU4J, followed by optional encoding\n"
index 884d5d5666081ac476c633e8119e88789ff81180..18a8c76dbc5df15560ece84ce1e665207f83b140 100644 (file)
@@ -274,11 +274,11 @@ expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenV
     }
 }
 
-static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
+static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment,
+                                int32_t &stringLength, UErrorCode *status)
 {
     struct UString *tokenValue;
     char           *result;
-    uint32_t        count;
 
     expect(state, TOK_STRING, &tokenValue, comment, line, status);
 
@@ -287,14 +287,13 @@ static char *getInvariantString(ParseState* state, uint32_t *line, struct UStrin
         return NULL;
     }
 
-    count = u_strlen(tokenValue->fChars);
-    if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
+    if(!uprv_isInvariantUString(tokenValue->fChars, tokenValue->fLength)) {
         *status = U_INVALID_FORMAT_ERROR;
         error(*line, "invariant characters required for table keys, binary data, etc.");
         return NULL;
     }
 
-    result = static_cast<char *>(uprv_malloc(count+1));
+    result = static_cast<char *>(uprv_malloc(tokenValue->fLength+1));
 
     if (result == NULL)
     {
@@ -302,7 +301,8 @@ static char *getInvariantString(ParseState* state, uint32_t *line, struct UStrin
         return NULL;
     }
 
-    u_UCharsToChars(tokenValue->fChars, result, count+1);
+    u_UCharsToChars(tokenValue->fChars, result, tokenValue->fLength+1);
+    stringLength = tokenValue->fLength;
     return result;
 }
 
@@ -1371,7 +1371,6 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US
     int32_t            value;
     UBool              readToken = FALSE;
     char              *stopstring;
-    uint32_t           len;
     struct UString     memberComments;
 
     IntVectorResource *result = intvector_open(state->bundle, tag, comment, status);
@@ -1404,7 +1403,8 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US
             return result;
         }
 
-        string = getInvariantString(state, NULL, NULL, status);
+        int32_t stringLength;
+        string = getInvariantString(state, NULL, NULL, stringLength, status);
 
         if (U_FAILURE(*status))
         {
@@ -1414,9 +1414,9 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US
 
         /* For handling illegal char in the Intvector */
         value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
-        len=(uint32_t)(stopstring-string);
+        int32_t len = (int32_t)(stopstring-string);
 
-        if(len==uprv_strlen(string))
+        if(len==stringLength)
         {
             result->add(value, *status);
             uprv_free(string);
@@ -1454,7 +1454,8 @@ static struct SResource *
 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
 {
     uint32_t line;
-    LocalMemory<char> string(getInvariantString(state, &line, NULL, status));
+    int32_t stringLength;
+    LocalMemory<char> string(getInvariantString(state, &line, NULL, stringLength, status));
     if (string.isNull() || U_FAILURE(*status))
     {
         return NULL;
@@ -1470,46 +1471,45 @@ parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UStri
         printf(" binary %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
     }
 
-    uint32_t count = (uint32_t)uprv_strlen(string.getAlias());
-    if (count > 0){
-        if((count % 2)==0){
-            LocalMemory<uint8_t> value;
-            if (value.allocateInsteadAndCopy(count) == NULL)
-            {
-                *status = U_MEMORY_ALLOCATION_ERROR;
-                return NULL;
-            }
-
-            char toConv[3] = {'\0', '\0', '\0'};
-            for (uint32_t i = 0; i < count; i += 2)
-            {
-                toConv[0] = string[i];
-                toConv[1] = string[i + 1];
+    LocalMemory<uint8_t> value;
+    int32_t count = 0;
+    if (stringLength > 0 && value.allocateInsteadAndCopy(stringLength) == NULL)
+    {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
 
-                char *stopstring;
-                value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
-                uint32_t len=(uint32_t)(stopstring-toConv);
+    char toConv[3] = {'\0', '\0', '\0'};
+    for (int32_t i = 0; i < stringLength;)
+    {
+        // Skip spaces (which may have been line endings).
+        char c0 = string[i++];
+        if (c0 == ' ') { continue; }
+        if (i == stringLength) {
+            *status=U_INVALID_CHAR_FOUND;
+            error(line, "Encountered invalid binary value (odd number of hex digits)");
+            return NULL;
+        }
+        toConv[0] = c0;
+        toConv[1] = string[i++];
 
-                if(len!=2)
-                {
-                    *status=U_INVALID_CHAR_FOUND;
-                    return NULL;
-                }
-            }
+        char *stopstring;
+        value[count++] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
+        uint32_t len=(uint32_t)(stopstring-toConv);
 
-            return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status);
-        }
-        else
+        if(len!=2)
         {
-            *status = U_INVALID_CHAR_FOUND;
-            error(line, "Encountered invalid binary value (length is odd)");
+            *status=U_INVALID_CHAR_FOUND;
+            error(line, "Encountered invalid binary value (not all pairs of hex digits)");
             return NULL;
         }
     }
-    else
-    {
+
+    if (count == 0) {
         warning(startline, "Encountered empty binary value");
         return bin_open(state->bundle, tag, 0, NULL, "", comment, status);
+    } else {
+        return bin_open(state->bundle, tag, count, value.getAlias(), NULL, comment, status);
     }
 }
 
@@ -1520,9 +1520,9 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr
     int32_t           value;
     char             *string;
     char             *stopstring;
-    uint32_t          len;
 
-    string = getInvariantString(state, NULL, NULL, status);
+    int32_t stringLength;
+    string = getInvariantString(state, NULL, NULL, stringLength, status);
 
     if (string == NULL || U_FAILURE(*status))
     {
@@ -1541,7 +1541,7 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr
         printf(" integer %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
     }
 
-    if (uprv_strlen(string) <= 0)
+    if (stringLength == 0)
     {
         warning(startline, "Encountered empty integer. Default value is 0.");
     }
@@ -1549,8 +1549,8 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr
     /* Allow integer support for hexdecimal, octal digit and decimal*/
     /* and handle illegal char in the integer*/
     value = uprv_strtoul(string, &stopstring, 0);
-    len=(uint32_t)(stopstring-string);
-    if(len==uprv_strlen(string))
+    int32_t len = (int32_t)(stopstring-string);
+    if(len==stringLength)
     {
         result = int_open(state->bundle, tag, value, comment, status);
     }
@@ -1567,7 +1567,8 @@ static struct SResource *
 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
 {
     uint32_t          line;
-    LocalMemory<char> filename(getInvariantString(state, &line, NULL, status));
+    int32_t stringLength;
+    LocalMemory<char> filename(getInvariantString(state, &line, NULL, stringLength, status));
     if (U_FAILURE(*status))
     {
         return NULL;
@@ -1628,12 +1629,11 @@ parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UStr
 
     UCHARBUF *ucbuf;
     char     *fullname = NULL;
-    int32_t  count     = 0;
     const char* cp = NULL;
     const UChar* uBuffer = NULL;
 
-    filename = getInvariantString(state, &line, NULL, status);
-    count     = (int32_t)uprv_strlen(filename);
+    int32_t stringLength;
+    filename = getInvariantString(state, &line, NULL, stringLength, status);
 
     if (U_FAILURE(*status))
     {
@@ -1652,7 +1652,7 @@ parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UStr
         printf(" include %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
     }
 
-    fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
+    fullname = (char *) uprv_malloc(state->inputdirLength + stringLength + 2);
     /* test for NULL */
     if(fullname == NULL)
     {
index 781a6d0229ee345d86982ef58a45fac53e50d133..a8dfaa5c409910742852ca741b77397e9e60a4b5 100644 (file)
         <pathelement location="${icu4j.regiondata.jar}"/>
         <pathelement location="${icu4j.translit.jar}"/>
         <pathelement location="${icu4j.test-framework.jar}"/>
+        <pathelement location="${icu4j.tools.jar}"/>
         <pathelement location="${icu4j.core-tests.jar}"/>
         <pathelement location="${icu4j.collate-tests.jar}"/>
         <pathelement location="${icu4j.charset-tests.jar}"/>
         </ant>
     </target>
 
-    <target name="core-tests" depends="core, test-framework" description="Build core tests">
+    <target name="core-tests" depends="core, test-framework, tools" description="Build core tests">
         <ant dir="${icu4j.core-tests.dir}" inheritAll="false">
             <reference refid="junit.jars"/>
         </ant>
         <ant dir="${icu4j.build-tools.dir}" inheritAll="false"/>
     </target>
 
-    <target name="tools" depends="core, core-tests, collate, translit, translit-tests" description="Build tool classes">
+    <target name="tools" depends="core, collate, translit" description="Build tool classes">
         <ant dir="${icu4j.tools.dir}" inheritAll="false"/>
     </target>
 
index 317f5444ebc9967860bd9de6e7277ed54625d73f..d1dc775d1832b583771b872d3708130c3dc82f15 100644 (file)
@@ -4,18 +4,18 @@ package com.ibm.icu.impl.locale;
 
 import java.util.Objects;
 
-final class LSR {
-    static final int REGION_INDEX_LIMIT = 1001 + 26 * 26;
+public final class LSR {
+    public static final int REGION_INDEX_LIMIT = 1001 + 26 * 26;
 
-    static final boolean DEBUG_OUTPUT = false;
+    public static final boolean DEBUG_OUTPUT = false;
 
-    final String language;
-    final String script;
-    final String region;
+    public final String language;
+    public final String script;
+    public final String region;
     /** Index for region, negative if ill-formed. @see indexForRegion */
     final int regionIndex;
 
-    LSR(String language, String script, String region) {
+    public LSR(String language, String script, String region) {
         this.language = language;
         this.script = script;
         this.region = region;
@@ -27,7 +27,7 @@ final class LSR {
      * Do not rely on a particular region->index mapping; it may change.
      * Returns 0 for ill-formed strings.
      */
-    static final int indexForRegion(String region) {
+    public static final int indexForRegion(String region) {
         if (region.length() == 2) {
             int a = region.charAt(0) - 'A';
             if (a < 0 || 25 < a) { return 0; }
index 56735a8b5cda433187e1d534c492b220d6c4e61b..8fe0fe8042abff4f382b7846a5896c028e1fb95b 100644 (file)
@@ -2,12 +2,20 @@
 // License & terms of use: http://www.unicode.org/copyright.html#License
 package com.ibm.icu.impl.locale;
 
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
 import java.util.Map;
+import java.util.MissingResourceException;
 import java.util.Set;
 import java.util.TreeMap;
 
-import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
+import com.ibm.icu.impl.ICUData;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.UResource;
 import com.ibm.icu.util.BytesTrie;
+import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
 import com.ibm.icu.util.ULocale;
 
 /**
@@ -16,17 +24,17 @@ import com.ibm.icu.util.ULocale;
  */
 public class LocaleDistance {
     /** Distance value bit flag, set by the builder. */
-    static final int DISTANCE_SKIP_SCRIPT = 0x80;
+    public static final int DISTANCE_SKIP_SCRIPT = 0x80;
     /** Distance value bit flag, set by trieNext(). */
     private static final int DISTANCE_IS_FINAL = 0x100;
     private static final int DISTANCE_IS_FINAL_OR_SKIP_SCRIPT =
             DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
     // Indexes into array of distances.
-    static final int IX_DEF_LANG_DISTANCE = 0;
-    static final int IX_DEF_SCRIPT_DISTANCE = 1;
-    static final int IX_DEF_REGION_DISTANCE = 2;
-    static final int IX_MIN_REGION_DISTANCE = 3;
-    static final int IX_LIMIT = 4;
+    public static final int IX_DEF_LANG_DISTANCE = 0;
+    public static final int IX_DEF_SCRIPT_DISTANCE = 1;
+    public static final int IX_DEF_REGION_DISTANCE = 2;
+    public static final int IX_MIN_REGION_DISTANCE = 3;
+    public static final int IX_LIMIT = 4;
     private static final int ABOVE_THRESHOLD = 100;
 
     private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
@@ -54,22 +62,100 @@ public class LocaleDistance {
     private final int minRegionDistance;
     private final int defaultDemotionPerDesiredLocale;
 
-    // TODO: Load prebuilt data from a resource bundle
-    // to avoid the dependency on the builder code.
     // VisibleForTesting
-    public static final LocaleDistance INSTANCE = LocaleDistanceBuilder.build();
-
-    LocaleDistance(BytesTrie trie,
-            byte[] regionToPartitionsIndex, String[] partitionArrays,
-            Set<LSR> paradigmLSRs, int[] distances) {
-        this.trie = trie;
-        this.regionToPartitionsIndex = regionToPartitionsIndex;
-        this.partitionArrays = partitionArrays;
-        this.paradigmLSRs = paradigmLSRs;
-        defaultLanguageDistance = distances[IX_DEF_LANG_DISTANCE];
-        defaultScriptDistance = distances[IX_DEF_SCRIPT_DISTANCE];
-        defaultRegionDistance = distances[IX_DEF_REGION_DISTANCE];
-        this.minRegionDistance = distances[IX_MIN_REGION_DISTANCE];
+    public static final class Data {
+        public byte[] trie;
+        public byte[] regionToPartitionsIndex;
+        public String[] partitionArrays;
+        public Set<LSR> paradigmLSRs;
+        public int[] distances;
+
+        public Data(byte[] trie,
+                byte[] regionToPartitionsIndex, String[] partitionArrays,
+                Set<LSR> paradigmLSRs, int[] distances) {
+            this.trie = trie;
+            this.regionToPartitionsIndex = regionToPartitionsIndex;
+            this.partitionArrays = partitionArrays;
+            this.paradigmLSRs = paradigmLSRs;
+            this.distances = distances;
+        }
+
+        private static UResource.Value getValue(UResource.Table table,
+                String key, UResource.Value value) {
+            if (!table.findValue(key, value)) {
+                throw new MissingResourceException(
+                        "langInfo.res missing data", "", "match/" + key);
+            }
+            return value;
+        }
+
+        // VisibleForTesting
+        public static Data load() throws MissingResourceException {
+            ICUResourceBundle langInfo = ICUResourceBundle.getBundleInstance(
+                    ICUData.ICU_BASE_NAME, "langInfo",
+                    ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
+            UResource.Value value = langInfo.getValueWithFallback("match");
+            UResource.Table matchTable = value.getTable();
+
+            ByteBuffer buffer = getValue(matchTable, "trie", value).getBinary();
+            byte[] trie = new byte[buffer.remaining()];
+            buffer.get(trie);
+
+            buffer = getValue(matchTable, "regionToPartitions", value).getBinary();
+            byte[] regionToPartitions = new byte[buffer.remaining()];
+            buffer.get(regionToPartitions);
+            if (regionToPartitions.length < LSR.REGION_INDEX_LIMIT) {
+                throw new MissingResourceException(
+                        "langInfo.res binary data too short", "", "match/regionToPartitions");
+            }
+
+            String[] partitions = getValue(matchTable, "partitions", value).getStringArray();
+
+            Set<LSR> paradigmLSRs;
+            if (matchTable.findValue("paradigms", value)) {
+                String[] paradigms = value.getStringArray();
+                paradigmLSRs = new HashSet<>(paradigms.length / 3);
+                for (int i = 0; i < paradigms.length; i += 3) {
+                    paradigmLSRs.add(new LSR(paradigms[i], paradigms[i + 1], paradigms[i + 2]));
+                }
+            } else {
+                paradigmLSRs = Collections.emptySet();
+            }
+
+            int[] distances = getValue(matchTable, "distances", value).getIntVector();
+            if (distances.length < IX_LIMIT) {
+                throw new MissingResourceException(
+                        "langInfo.res intvector too short", "", "match/distances");
+            }
+
+            return new Data(trie, regionToPartitions, partitions, paradigmLSRs, distances);
+        }
+
+        @Override
+        public boolean equals(Object other) {
+            if (this == other) { return true; }
+            if (!getClass().equals(other.getClass())) { return false; }
+            Data od = (Data)other;
+            return Arrays.equals(trie, od.trie) &&
+                    Arrays.equals(regionToPartitionsIndex, od.regionToPartitionsIndex) &&
+                    Arrays.equals(partitionArrays, od.partitionArrays) &&
+                    paradigmLSRs.equals(od.paradigmLSRs) &&
+                    Arrays.equals(distances, od.distances);
+        }
+    }
+
+    // VisibleForTesting
+    public static final LocaleDistance INSTANCE = new LocaleDistance(Data.load());
+
+    private LocaleDistance(Data data) {
+        this.trie = new BytesTrie(data.trie, 0);
+        this.regionToPartitionsIndex = data.regionToPartitionsIndex;
+        this.partitionArrays = data.partitionArrays;
+        this.paradigmLSRs = data.paradigmLSRs;
+        defaultLanguageDistance = data.distances[IX_DEF_LANG_DISTANCE];
+        defaultScriptDistance = data.distances[IX_DEF_SCRIPT_DISTANCE];
+        defaultRegionDistance = data.distances[IX_DEF_REGION_DISTANCE];
+        this.minRegionDistance = data.distances[IX_MIN_REGION_DISTANCE];
 
         LSR en = new LSR("en", "Latn", "US");
         LSR enGB = new LSR("en", "Latn", "GB");
@@ -102,7 +188,7 @@ public class LocaleDistance {
      * (negative if none has a distance below the threshold),
      * and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
      */
-    int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
+    public int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
             int threshold, FavorSubtag favorSubtag) {
         BytesTrie iter = new BytesTrie(trie);
         // Look up the desired language only once for all supported LSRs.
@@ -335,7 +421,7 @@ public class LocaleDistance {
         return partitionArrays[pIndex];
     }
 
-    boolean isParadigmLSR(LSR lsr) {
+    public boolean isParadigmLSR(LSR lsr) {
         return paradigmLSRs.contains(lsr);
     }
 
@@ -348,7 +434,7 @@ public class LocaleDistance {
         return defaultRegionDistance;
     }
 
-    int getDefaultDemotionPerDesiredLocale() {
+    public int getDefaultDemotionPerDesiredLocale() {
         return defaultDemotionPerDesiredLocale;
     }
 
index 0873b6d3241aad8f68c2789022ff9fde5ac90f90..3b63705e6a3c6063a8696f557f650fa875d4ed0c 100644 (file)
@@ -2,10 +2,18 @@
 // License & terms of use: http://www.unicode.org/copyright.html#License
 package com.ibm.icu.impl.locale;
 
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
+import java.util.MissingResourceException;
 import java.util.TreeMap;
 
+import com.ibm.icu.impl.ICUData;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.UResource;
 import com.ibm.icu.util.BytesTrie;
 import com.ibm.icu.util.ULocale;
 
@@ -14,30 +22,93 @@ public final class XLikelySubtags {
     private static final String PSEUDO_BIDI_PREFIX = "+";  // -XB, -PSBIDI
     private static final String PSEUDO_CRACKED_PREFIX = ",";  // -XC, -PSCRACK
 
-    static final int SKIP_SCRIPT = 1;
+    public static final int SKIP_SCRIPT = 1;
 
     private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
 
-    // TODO: Load prebuilt data from a resource bundle
-    // to avoid the dependency on the builder code.
     // VisibleForTesting
-    public static final XLikelySubtags INSTANCE = new XLikelySubtags(LikelySubtagsBuilder.build());
-
-    static final class Data {
-        private final Map<String, String> languageAliases;
-        private final Map<String, String> regionAliases;
-        private final BytesTrie trie;
-        private final LSR[] lsrs;
-
-        Data(Map<String, String> languageAliases, Map<String, String> regionAliases,
-                BytesTrie trie, LSR[] lsrs) {
+    public static final class Data {
+        public final Map<String, String> languageAliases;
+        public final Map<String, String> regionAliases;
+        public final byte[] trie;
+        public final LSR[] lsrs;
+
+        public Data(Map<String, String> languageAliases, Map<String, String> regionAliases,
+                byte[] trie, LSR[] lsrs) {
             this.languageAliases = languageAliases;
             this.regionAliases = regionAliases;
             this.trie = trie;
             this.lsrs = lsrs;
         }
+
+        private static UResource.Value getValue(UResource.Table table,
+                String key, UResource.Value value) {
+            if (!table.findValue(key, value)) {
+                throw new MissingResourceException(
+                        "langInfo.res missing data", "", "likely/" + key);
+            }
+            return value;
+        }
+
+        // VisibleForTesting
+        public static Data load() throws MissingResourceException {
+            ICUResourceBundle langInfo = ICUResourceBundle.getBundleInstance(
+                    ICUData.ICU_BASE_NAME, "langInfo",
+                    ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
+            UResource.Value value = langInfo.getValueWithFallback("likely");
+            UResource.Table likelyTable = value.getTable();
+
+            Map<String, String> languageAliases;
+            if (likelyTable.findValue("languageAliases", value)) {
+                String[] pairs = value.getStringArray();
+                languageAliases = new HashMap<>(pairs.length / 2);
+                for (int i = 0; i < pairs.length; i += 2) {
+                    languageAliases.put(pairs[i], pairs[i + 1]);
+                }
+            } else {
+                languageAliases = Collections.emptyMap();
+            }
+
+            Map<String, String> regionAliases;
+            if (likelyTable.findValue("regionAliases", value)) {
+                String[] pairs = value.getStringArray();
+                regionAliases = new HashMap<>(pairs.length / 2);
+                for (int i = 0; i < pairs.length; i += 2) {
+                    regionAliases.put(pairs[i], pairs[i + 1]);
+                }
+            } else {
+                regionAliases = Collections.emptyMap();
+            }
+
+            ByteBuffer buffer = getValue(likelyTable, "trie", value).getBinary();
+            byte[] trie = new byte[buffer.remaining()];
+            buffer.get(trie);
+
+            String[] lsrSubtags = getValue(likelyTable, "lsrs", value).getStringArray();
+            LSR[] lsrs = new LSR[lsrSubtags.length / 3];
+            for (int i = 0, j = 0; i < lsrSubtags.length; i += 3, ++j) {
+                lsrs[j] = new LSR(lsrSubtags[i], lsrSubtags[i + 1], lsrSubtags[i + 2]);
+            }
+
+            return new Data(languageAliases, regionAliases, trie, lsrs);
+        }
+
+        @Override
+        public boolean equals(Object other) {
+            if (this == other) { return true; }
+            if (!getClass().equals(other.getClass())) { return false; }
+            Data od = (Data)other;
+            return
+                    languageAliases.equals(od.languageAliases) &&
+                    regionAliases.equals(od.regionAliases) &&
+                    Arrays.equals(trie, od.trie) &&
+                    Arrays.equals(lsrs, od.lsrs);
+        }
     }
 
+    // VisibleForTesting
+    public static final XLikelySubtags INSTANCE = new XLikelySubtags(Data.load());
+
     private final Map<String, String> languageAliases;
     private final Map<String, String> regionAliases;
 
@@ -54,7 +125,7 @@ public final class XLikelySubtags {
     private XLikelySubtags(XLikelySubtags.Data data) {
         languageAliases = data.languageAliases;
         regionAliases = data.regionAliases;
-        trie = data.trie;
+        trie = new BytesTrie(data.trie, 0);
         lsrs = data.lsrs;
 
         // Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
@@ -85,6 +156,23 @@ public final class XLikelySubtags {
         }
     }
 
+    /**
+     * Implementation of LocaleMatcher.canonicalize(ULocale).
+     */
+    public ULocale canonicalize(ULocale locale) {
+        String lang = locale.getLanguage();
+        String lang2 = languageAliases.get(lang);
+        String region = locale.getCountry();
+        String region2 = regionAliases.get(region);
+        if (lang2 != null || region2 != null) {
+            return new ULocale(
+                lang2 == null ? lang : lang2,
+                locale.getScript(),
+                region2 == null ? region : region2);
+        }
+        return locale;
+    }
+
     private static String getCanonical(Map<String, String> aliases, String alias) {
         String canonical = aliases.get(alias);
         return canonical == null ? alias : canonical;
@@ -101,7 +189,7 @@ public final class XLikelySubtags {
                 locale.getVariant());
     }
 
-    LSR makeMaximizedLsrFrom(Locale locale) {
+    public LSR makeMaximizedLsrFrom(Locale locale) {
         String tag = locale.toLanguageTag();
         if (tag.startsWith("x-")) {
             // Private use language tag x-subtag-subtag...
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java
deleted file mode 100644 (file)
index f7ffeb2..0000000
+++ /dev/null
@@ -1,900 +0,0 @@
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.locale;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Objects;
-
-import com.ibm.icu.util.LocalePriorityList;
-import com.ibm.icu.util.ULocale;
-
-/**
- * Immutable class that picks the best match between a user's desired locales and
- * and application's supported locales.
- *
- * <p>If there are multiple supported locales with the same (language, script, region)
- * likely subtags, then the current implementation returns the first of those locales.
- * It ignores variant subtags (except for pseudolocale variants) and extensions.
- * This may change in future versions.
- *
- * <p>For example, the current implementation does not distinguish between
- * de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
- *
- * <p>If you prefer one equivalent locale over another, then provide only the preferred one,
- * or place it earlier in the list of supported locales.
- *
- * <p>Otherwise, the order of supported locales may have no effect on the best-match results.
- * The current implementation compares each desired locale with supported locales
- * in the following order:
- * 1. Default locale, if supported;
- * 2. CLDR "paradigm locales" like en-GB and es-419;
- * 3. other supported locales.
- * This may change in future versions.
- *
- * <p>TODO: Migration notes.
- *
- * @author markdavis
- */
-public final class XLocaleMatcher {
-    private static final LSR UND_LSR = new LSR("und","","");
-    private static final ULocale UND_ULOCALE = new ULocale("und");
-    private static final Locale UND_LOCALE = new Locale("und");
-
-    // Activates debugging output to stderr with details of GetBestMatch.
-    private static final boolean TRACE_MATCHER = false;
-
-    private static abstract class LsrIterator implements Iterator<LSR> {
-        int bestDesiredIndex = -1;
-
-        @Override
-        public void remove() {
-            throw new UnsupportedOperationException();
-        }
-
-        public abstract void rememberCurrent(int desiredIndex);
-    }
-
-    /**
-     * Builder option for whether the language subtag or the script subtag is most important.
-     *
-     * @see Builder#setFavorSubtag(FavorSubtag)
-     * @draft ICU 65
-     * @provisional This API might change or be removed in a future release.
-     */
-    public enum FavorSubtag {
-        /**
-         * Language differences are most important, then script differences, then region differences.
-         * (This is the default behavior.)
-         *
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        LANGUAGE,
-        /**
-         * Makes script differences matter relatively more than language differences.
-         *
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        SCRIPT
-    }
-
-    /**
-     * Builder option for whether all desired locales are treated equally or
-     * earlier ones are preferred.
-     *
-     * @see Builder#setDemotionPerDesiredLocale(Demotion)
-     * @draft ICU 65
-     * @provisional This API might change or be removed in a future release.
-     */
-    public enum Demotion {
-        /**
-         * All desired locales are treated equally.
-         *
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        NONE,
-        /**
-         * Earlier desired locales are preferred.
-         *
-         * <p>From each desired locale to the next,
-         * the distance to any supported locale is increased by an additional amount
-         * which is at least as large as most region mismatches.
-         * A later desired locale has to have a better match with some supported locale
-         * due to more than merely having the same region subtag.
-         *
-         * <p>For example: <code>Supported={en, sv}  desired=[en-GB, sv]</code>
-         * yields <code>Result(en-GB, en)</code> because
-         * with the demotion of sv its perfect match is no better than
-         * the region distance between the earlier desired locale en-GB and en=en-US.
-         *
-         * <p>Notes:
-         * <ul>
-         *   <li>In some cases, language and/or script differences can be as small as
-         *       the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
-         *   <li>It is possible for certain region differences to be larger than usual,
-         *       and larger than the demotion.
-         *       (As of CLDR 35 there is no such case, but
-         *        this is possible in future versions of the data.)
-         * </ul>
-         *
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        REGION
-    }
-
-    /**
-     * Data for the best-matching pair of a desired and a supported locale.
-     *
-     * @draft ICU 65
-     * @provisional This API might change or be removed in a future release.
-     */
-    public static final class Result {
-        private final ULocale desiredULocale;
-        private final ULocale supportedULocale;
-        private final Locale desiredLocale;
-        private final Locale supportedLocale;
-        private final int desiredIndex;
-        private final int supportedIndex;
-
-        private Result(ULocale udesired, ULocale usupported,
-                Locale desired, Locale supported,
-                int desIndex, int suppIndex) {
-            desiredULocale = udesired;
-            supportedULocale = usupported;
-            desiredLocale = desired;
-            supportedLocale = supported;
-            desiredIndex = desIndex;
-            supportedIndex = suppIndex;
-        }
-
-        /**
-         * Returns the best-matching desired locale.
-         * null if the list of desired locales is empty or if none matched well enough.
-         *
-         * @return the best-matching desired locale, or null.
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public ULocale getDesiredULocale() {
-            return desiredULocale == null && desiredLocale != null ?
-                    ULocale.forLocale(desiredLocale) : desiredULocale;
-        }
-        /**
-         * Returns the best-matching desired locale.
-         * null if the list of desired locales is empty or if none matched well enough.
-         *
-         * @return the best-matching desired locale, or null.
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public Locale getDesiredLocale() {
-            return desiredLocale == null && desiredULocale != null ?
-                    desiredULocale.toLocale() : desiredLocale;
-        }
-
-        /**
-         * Returns the best-matching supported locale.
-         * If none matched well enough, this is the default locale.
-         * The default locale is null if the list of supported locales is empty and
-         * no explicit default locale is set.
-         *
-         * @return the best-matching supported locale, or null.
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public ULocale getSupportedULocale() { return supportedULocale; }
-        /**
-         * Returns the best-matching supported locale.
-         * If none matched well enough, this is the default locale.
-         * The default locale is null if the list of supported locales is empty and
-         * no explicit default locale is set.
-         *
-         * @return the best-matching supported locale, or null.
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public Locale getSupportedLocale() { return supportedLocale; }
-
-        /**
-         * Returns the index of the best-matching desired locale in the input Iterable order.
-         * -1 if the list of desired locales is empty or if none matched well enough.
-         *
-         * @return the index of the best-matching desired locale, or -1.
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public int getDesiredIndex() { return desiredIndex; }
-
-        /**
-         * Returns the index of the best-matching supported locale in the constructor’s or builder’s input order
-         * (“set” Collection plus “added” locales).
-         * If the matcher was built from a locale list string, then the iteration order is that
-         * of a LocalePriorityList built from the same string.
-         * -1 if the list of supported locales is empty or if none matched well enough.
-         *
-         * @return the index of the best-matching supported locale, or -1.
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public int getSupportedIndex() { return supportedIndex; }
-
-        /**
-         * Takes the best-matching supported locale and adds relevant fields of the
-         * best-matching desired locale, such as the -t- and -u- extensions.
-         * May replace some fields of the supported locale.
-         * The result is the locale that should be used for date and number formatting, collation, etc.
-         *
-         * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
-         *
-         * @return the service locale, combining the best-matching desired and supported locales.
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public ULocale makeServiceULocale() {
-            ULocale bestDesired = getDesiredULocale();
-            ULocale serviceLocale = supportedULocale;
-            if (!serviceLocale.equals(bestDesired) && bestDesired != null) {
-                ULocale.Builder b = new ULocale.Builder().setLocale(serviceLocale);
-
-                // Copy the region from bestDesired, if there is one.
-                // TODO: Seems wrong to clobber serviceLocale.getCountry() if that is not empty.
-                String region = bestDesired.getCountry();
-                if (!region.isEmpty()) {
-                    b.setRegion(region);
-                }
-
-                // Copy the variants from bestDesired, if there are any.
-                // Note that this will override any serviceLocale variants.
-                // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
-                // TODO: Why replace? Why not append?
-                String variants = bestDesired.getVariant();
-                if (!variants.isEmpty()) {
-                    b.setVariant(variants);
-                }
-
-                // Copy the extensions from bestDesired, if there are any.
-                // Note that this will override any serviceLocale extensions.
-                // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
-                // (replacing calendar).
-                // TODO: Maybe enumerate -u- keys to not replace others in the serviceLocale??
-                //       (Unsure about this one.)
-                for (char extensionKey : bestDesired.getExtensionKeys()) {
-                    b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
-                }
-                serviceLocale = b.build();
-            }
-            return serviceLocale;
-        }
-
-        /**
-         * Takes the best-matching supported locale and adds relevant fields of the
-         * best-matching desired locale, such as the -t- and -u- extensions.
-         * May replace some fields of the supported locale.
-         * The result is the locale that should be used for date and number formatting, collation, etc.
-         *
-         * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
-         *
-         * @return the service locale, combining the best-matching desired and supported locales.
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public Locale makeServiceLocale() {
-            return makeServiceULocale().toLocale();
-        }
-    }
-
-    private final int thresholdDistance;
-    private final int demotionPerDesiredLocale;
-    private final FavorSubtag favorSubtag;
-
-    // These are in input order.
-    private final ULocale[] supportedULocales;
-    private final Locale[] supportedLocales;
-    // These are in preference order: 1. Default locale 2. paradigm locales 3. others.
-    private final Map<LSR, Integer> supportedLsrToIndex;
-    // Array versions of the supportedLsrToIndex keys and values.
-    // The distance lookup loops over the supportedLsrs and returns the index of the best match.
-    private final LSR[] supportedLsrs;
-    private final int[] supportedIndexes;
-    private final ULocale defaultULocale;
-    private final Locale defaultLocale;
-    private final int defaultLocaleIndex;
-
-    /**
-     * LocaleMatcher Builder.
-     *
-     * @see XLocaleMatcher#builder()
-     * @draft ICU 65
-     * @provisional This API might change or be removed in a future release.
-     */
-    public static class Builder {
-        private List<ULocale> supportedLocales;
-        private int thresholdDistance = -1;
-        private Demotion demotion;
-        private ULocale defaultLocale;
-        private FavorSubtag favor;
-
-        /**
-         * Parses the string like {@link LocalePriorityList} does and
-         * sets the supported locales accordingly.
-         * Clears any previously set/added supported locales first.
-         *
-         * @param locales the languagePriorityList to set
-         * @return this Builder object
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public Builder setSupportedLocales(String locales) {
-            return setSupportedULocales(LocalePriorityList.add(locales).build().getULocales());
-        }
-
-        /**
-         * Copies the supported locales, preserving iteration order.
-         * Clears any previously set/added supported locales first.
-         * Duplicates are allowed, and are not removed.
-         *
-         * @param locales the list of locale
-         * @return this Builder object
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public Builder setSupportedULocales(Collection<ULocale> locales) {
-            supportedLocales = new ArrayList<>(locales);
-            return this;
-        }
-
-        /**
-         * Copies the supported locales, preserving iteration order.
-         * Clears any previously set/added supported locales first.
-         * Duplicates are allowed, and are not removed.
-         *
-         * @param locales the list of locale
-         * @return this Builder object
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public Builder setSupportedLocales(Collection<Locale> locales) {
-            supportedLocales = new ArrayList<>(locales.size());
-            for (Locale locale : locales) {
-                supportedLocales.add(ULocale.forLocale(locale));
-            }
-            return this;
-        }
-
-        /**
-         * Adds another supported locale.
-         * Duplicates are allowed, and are not removed.
-         *
-         * @param locale the list of locale
-         * @return this Builder object
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public Builder addSupportedULocale(ULocale locale) {
-            if (supportedLocales == null) {
-                supportedLocales = new ArrayList<>();
-            }
-            supportedLocales.add(locale);
-            return this;
-        }
-
-        /**
-         * Adds another supported locale.
-         * Duplicates are allowed, and are not removed.
-         *
-         * @param locale the list of locale
-         * @return this Builder object
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public Builder addSupportedLocale(Locale locale) {
-            return addSupportedULocale(ULocale.forLocale(locale));
-        }
-
-        /**
-         * Sets the default locale; if null, or if it is not set explicitly,
-         * then the first supported locale is used as the default locale.
-         *
-         * @param defaultLocale the default locale
-         * @return this Builder object
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public Builder setDefaultULocale(ULocale defaultLocale) {
-            this.defaultLocale = defaultLocale;
-            return this;
-        }
-
-        /**
-         * Sets the default locale; if null, or if it is not set explicitly,
-         * then the first supported locale is used as the default locale.
-         *
-         * @param defaultLocale the default locale
-         * @return this Builder object
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public Builder setDefaultLocale(Locale defaultLocale) {
-            this.defaultLocale = ULocale.forLocale(defaultLocale);
-            return this;
-        }
-
-        /**
-         * If SCRIPT, then the language differences are smaller than script differences.
-         * This is used in situations (such as maps) where
-         * it is better to fall back to the same script than a similar language.
-         *
-         * @param subtag the subtag to favor
-         * @return this Builder object
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public Builder setFavorSubtag(FavorSubtag subtag) {
-            this.favor = subtag;
-            return this;
-        }
-
-        /**
-         * Option for whether all desired locales are treated equally or
-         * earlier ones are preferred (this is the default).
-         *
-         * @param demotion the demotion per desired locale to set.
-         * @return this Builder object
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public Builder setDemotionPerDesiredLocale(Demotion demotion) {
-            this.demotion = demotion;
-            return this;
-        }
-
-        /**
-         * <i>Internal only!</i>
-         *
-         * @param thresholdDistance the thresholdDistance to set, with -1 = default
-         * @return this Builder object
-         * @internal
-         * @deprecated This API is ICU internal only.
-         */
-        @Deprecated
-        public Builder internalSetThresholdDistance(int thresholdDistance) {
-            if (thresholdDistance > 100) {
-                thresholdDistance = 100;
-            }
-            this.thresholdDistance = thresholdDistance;
-            return this;
-        }
-
-        /**
-         * Builds and returns a new locale matcher.
-         * This builder can continue to be used.
-         *
-         * @return new XLocaleMatcher.
-         * @draft ICU 65
-         * @provisional This API might change or be removed in a future release.
-         */
-        public XLocaleMatcher build() {
-            return new XLocaleMatcher(this);
-        }
-
-        @Override
-        public String toString() {
-            StringBuilder s = new StringBuilder().append("{XLocaleMatcher.Builder");
-            if (!supportedLocales.isEmpty()) {
-                s.append(" supported={").append(supportedLocales.toString()).append('}');
-            }
-            if (defaultLocale != null) {
-                s.append(" default=").append(defaultLocale.toString());
-            }
-            if (favor != null) {
-                s.append(" distance=").append(favor.toString());
-            }
-            if (thresholdDistance >= 0) {
-                s.append(String.format(" threshold=%d", thresholdDistance));
-            }
-            if (demotion != null) {
-                s.append(" demotion=").append(demotion.toString());
-            }
-            return s.append('}').toString();
-        }
-    }
-
-    /**
-     * Returns a builder used in chaining parameters for building a LocaleMatcher.
-     *
-     * @return a new Builder object
-     * @draft ICU 65
-     * @provisional This API might change or be removed in a future release.
-     */
-    public static Builder builder() {
-        return new Builder();
-    }
-
-    /** Convenience method */
-    public XLocaleMatcher(String supportedLocales) {
-        this(builder().setSupportedLocales(supportedLocales));
-    }
-    /** Convenience method */
-    public XLocaleMatcher(LocalePriorityList supportedLocales) {
-        this(builder().setSupportedULocales(supportedLocales.getULocales()));
-    }
-
-    private XLocaleMatcher(Builder builder) {
-        thresholdDistance = builder.thresholdDistance < 0 ?
-                LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
-        // Store the supported locales in input order,
-        // so that when different types are used (e.g., java.util.Locale)
-        // we can return those by parallel index.
-        int supportedLocalesLength = builder.supportedLocales.size();
-        supportedULocales = new ULocale[supportedLocalesLength];
-        supportedLocales = new Locale[supportedLocalesLength];
-        // Supported LRSs in input order.
-        LSR lsrs[] = new LSR[supportedLocalesLength];
-        // Also find the first supported locale whose LSR is
-        // the same as that for the default locale.
-        ULocale udef = builder.defaultLocale;
-        Locale def = null;
-        LSR defLSR = null;
-        int idef = -1;
-        if (udef != null) {
-            def = udef.toLocale();
-            defLSR = getMaximalLsrOrUnd(udef);
-        }
-        int i = 0;
-        for (ULocale locale : builder.supportedLocales) {
-            supportedULocales[i] = locale;
-            supportedLocales[i] = locale.toLocale();
-            LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale);
-            if (idef < 0 && defLSR != null && lsr.equals(defLSR)) {
-                idef = i;
-            }
-            ++i;
-        }
-
-        // We need an unordered map from LSR to first supported locale with that LSR,
-        // and an ordered list of (LSR, Indexes).
-        // We use a LinkedHashMap for both,
-        // and insert the supported locales in the following order:
-        // 1. Default locale, if it is supported.
-        // 2. Priority locales in builder order.
-        // 3. Remaining locales in builder order.
-        supportedLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
-        Map<LSR, Integer> otherLsrToIndex = null;
-        if (idef >= 0) {
-            supportedLsrToIndex.put(defLSR, idef);
-        }
-        i = 0;
-        for (ULocale locale : supportedULocales) {
-            if (i == idef) { continue; }
-            LSR lsr = lsrs[i];
-            if (defLSR == null) {
-                assert i == 0;
-                udef = locale;
-                def = supportedLocales[0];
-                defLSR = lsr;
-                idef = 0;
-                supportedLsrToIndex.put(lsr, 0);
-            } else if (lsr.equals(defLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
-                putIfAbsent(supportedLsrToIndex, lsr, i);
-            } else {
-                if (otherLsrToIndex == null) {
-                    otherLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
-                }
-                putIfAbsent(otherLsrToIndex, lsr, i);
-            }
-            ++i;
-        }
-        if (otherLsrToIndex != null) {
-            supportedLsrToIndex.putAll(otherLsrToIndex);
-        }
-        int numSuppLsrs = supportedLsrToIndex.size();
-        supportedLsrs = new LSR[numSuppLsrs];
-        supportedIndexes = new int[numSuppLsrs];
-        i = 0;
-        for (Map.Entry<LSR, Integer> entry : supportedLsrToIndex.entrySet()) {
-            supportedLsrs[i] = entry.getKey();  // = lsrs[entry.getValue()]
-            supportedIndexes[i++] = entry.getValue();
-        }
-
-        defaultULocale = udef;
-        defaultLocale = def;
-        defaultLocaleIndex = idef;
-        demotionPerDesiredLocale =
-                builder.demotion == Demotion.NONE ? 0 :
-                    LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale();  // null or REGION
-        favorSubtag = builder.favor;
-    }
-
-    private static final void putIfAbsent(Map<LSR, Integer> lsrToIndex, LSR lsr, int i) {
-        Integer index = lsrToIndex.get(lsr);
-        if (index == null) {
-            lsrToIndex.put(lsr, i);
-        }
-    }
-
-    private static final LSR getMaximalLsrOrUnd(ULocale locale) {
-        if (locale.equals(UND_ULOCALE)) {
-            return UND_LSR;
-        } else {
-            return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
-        }
-    }
-
-    private static final LSR getMaximalLsrOrUnd(Locale locale) {
-        if (locale.equals(UND_LOCALE)) {
-            return UND_LSR;
-        } else {
-            return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
-        }
-    }
-
-    private static final class ULocaleLsrIterator extends LsrIterator {
-        private Iterator<ULocale> locales;
-        private ULocale current, remembered;
-
-        ULocaleLsrIterator(Iterator<ULocale> locales) {
-            this.locales = locales;
-        }
-
-        @Override
-        public boolean hasNext() {
-            return locales.hasNext();
-        }
-
-        @Override
-        public LSR next() {
-            current = locales.next();
-            return getMaximalLsrOrUnd(current);
-        }
-
-        @Override
-        public void rememberCurrent(int desiredIndex) {
-            bestDesiredIndex = desiredIndex;
-            remembered = current;
-        }
-    }
-
-    private static final class LocaleLsrIterator extends LsrIterator {
-        private Iterator<Locale> locales;
-        private Locale current, remembered;
-
-        LocaleLsrIterator(Iterator<Locale> locales) {
-            this.locales = locales;
-        }
-
-        @Override
-        public boolean hasNext() {
-            return locales.hasNext();
-        }
-
-        @Override
-        public LSR next() {
-            current = locales.next();
-            return getMaximalLsrOrUnd(current);
-        }
-
-        @Override
-        public void rememberCurrent(int desiredIndex) {
-            bestDesiredIndex = desiredIndex;
-            remembered = current;
-        }
-    }
-
-    public ULocale getBestMatch(ULocale desiredLocale) {
-        LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
-        int suppIndex = getBestSuppIndex(desiredLSR, null);
-        return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale;
-    }
-
-    public ULocale getBestMatch(Iterable<ULocale> desiredLocales) {
-        Iterator<ULocale> desiredIter = desiredLocales.iterator();
-        if (!desiredIter.hasNext()) {
-            return defaultULocale;
-        }
-        ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter);
-        LSR desiredLSR = lsrIter.next();
-        int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
-        return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale;
-    }
-
-    public ULocale getBestMatch(String desiredLocaleList) {
-        return getBestMatch(LocalePriorityList.add(desiredLocaleList).build());
-    }
-
-    public Locale getBestLocale(Locale desiredLocale) {
-        LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
-        int suppIndex = getBestSuppIndex(desiredLSR, null);
-        return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
-    }
-
-    public Locale getBestLocale(Iterable<Locale> desiredLocales) {
-        Iterator<Locale> desiredIter = desiredLocales.iterator();
-        if (!desiredIter.hasNext()) {
-            return defaultLocale;
-        }
-        LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter);
-        LSR desiredLSR = lsrIter.next();
-        int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
-        return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
-    }
-
-    private Result makeResult(ULocale desiredLocale, ULocaleLsrIterator lsrIter, int suppIndex) {
-        if (suppIndex < 0) {
-            return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
-        } else if (desiredLocale != null) {
-            return new Result(desiredLocale, supportedULocales[suppIndex],
-                    null, supportedLocales[suppIndex], 0, suppIndex);
-        } else {
-            return new Result(lsrIter.remembered, supportedULocales[suppIndex],
-                    null, supportedLocales[suppIndex], lsrIter.bestDesiredIndex, suppIndex);
-        }
-    }
-
-    private Result makeResult(Locale desiredLocale, LocaleLsrIterator lsrIter, int suppIndex) {
-        if (suppIndex < 0) {
-            return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
-        } else if (desiredLocale != null) {
-            return new Result(null, supportedULocales[suppIndex],
-                    desiredLocale, supportedLocales[suppIndex], 0, suppIndex);
-        } else {
-            return new Result(null, supportedULocales[suppIndex],
-                    lsrIter.remembered, supportedLocales[suppIndex],
-                    lsrIter.bestDesiredIndex, suppIndex);
-        }
-    }
-
-    public Result getBestMatchResult(ULocale desiredLocale) {
-        LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
-        int suppIndex = getBestSuppIndex(desiredLSR, null);
-        return makeResult(desiredLocale, null, suppIndex);
-    }
-
-    /**
-     * Returns the best match between the desired and supported locales.
-     *
-     * @param desiredLocales Typically a user's languages, in order of preference (descending).
-     * @return the best-matching pair of a desired and a supported locale.
-     */
-    public Result getBestMatchResult(Iterable<ULocale> desiredLocales) {
-        Iterator<ULocale> desiredIter = desiredLocales.iterator();
-        if (!desiredIter.hasNext()) {
-            return makeResult(UND_ULOCALE, null, -1);
-        }
-        ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter);
-        LSR desiredLSR = lsrIter.next();
-        int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
-        return makeResult(null, lsrIter, suppIndex);
-    }
-
-    public Result getBestLocaleResult(Locale desiredLocale) {
-        LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
-        int suppIndex = getBestSuppIndex(desiredLSR, null);
-        return makeResult(desiredLocale, null, suppIndex);
-    }
-
-    public Result getBestLocaleResult(Iterable<Locale> desiredLocales) {
-        Iterator<Locale> desiredIter = desiredLocales.iterator();
-        if (!desiredIter.hasNext()) {
-            return makeResult(UND_LOCALE, null, -1);
-        }
-        LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter);
-        LSR desiredLSR = lsrIter.next();
-        int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
-        return makeResult(null, lsrIter, suppIndex);
-    }
-
-    /**
-     * @param desiredLSR The first desired locale's LSR.
-     * @param remainingIter Remaining desired LSRs, null or empty if none.
-     * @return the index of the best-matching supported locale, or -1 if there is no good match.
-     */
-    private int getBestSuppIndex(LSR desiredLSR, LsrIterator remainingIter) {
-        int desiredIndex = 0;
-        int bestSupportedLsrIndex = -1;
-        for (int bestDistance = thresholdDistance;;) {
-            // Quick check for exact maximized LSR.
-            Integer index = supportedLsrToIndex.get(desiredLSR);
-            if (index != null) {
-                int suppIndex = index;
-                if (TRACE_MATCHER) {
-                    System.err.printf("Returning %s: desiredLSR=supportedLSR\n",
-                            supportedULocales[suppIndex]);
-                }
-                if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
-                return suppIndex;
-            }
-            int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
-                    desiredLSR, supportedLsrs, bestDistance, favorSubtag);
-            if (bestIndexAndDistance >= 0) {
-                bestDistance = bestIndexAndDistance & 0xff;
-                if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
-                bestSupportedLsrIndex = bestIndexAndDistance >> 8;
-            }
-            if ((bestDistance -= demotionPerDesiredLocale) <= 0) {
-                break;
-            }
-            if (remainingIter == null || !remainingIter.hasNext()) {
-                break;
-            }
-            desiredLSR = remainingIter.next();
-        }
-        if (bestSupportedLsrIndex < 0) {
-            if (TRACE_MATCHER) {
-                System.err.printf("Returning default %s: no good match\n", defaultULocale);
-            }
-            return -1;
-        }
-        int suppIndex = supportedIndexes[bestSupportedLsrIndex];
-        if (TRACE_MATCHER) {
-            System.err.printf("Returning %s: best matching supported locale\n",
-                    supportedULocales[suppIndex]);
-        }
-        return suppIndex;
-    }
-
-    @Override
-    public String toString() {
-        StringBuilder s = new StringBuilder().append("{XLocaleMatcher");
-        if (supportedULocales.length > 0) {
-            s.append(" supported={").append(supportedULocales[0].toString());
-            for (int i = 1; i < supportedULocales.length; ++i) {
-                s.append(", ").append(supportedULocales[i].toString());
-            }
-            s.append('}');
-        }
-        s.append(" default=").append(Objects.toString(defaultULocale));
-        if (favorSubtag != null) {
-            s.append(" distance=").append(favorSubtag.toString());
-        }
-        if (thresholdDistance >= 0) {
-            s.append(String.format(" threshold=%d", thresholdDistance));
-        }
-        s.append(String.format(" demotion=%d", demotionPerDesiredLocale));
-        return s.append('}').toString();
-    }
-
-    /**
-     * Returns a fraction between 0 and 1, where 1 means that the languages are a
-     * perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0.
-     * <br>Note that
-     * the precise values may change over time; no code should be made dependent
-     * on the values remaining constant.
-     * @param desired Desired locale
-     * @param desiredMax Maximized locale (using likely subtags)
-     * @param supported Supported locale
-     * @param supportedMax Maximized locale (using likely subtags)
-     * @return value between 0 and 1, inclusive.
-     * @deprecated ICU 65 Build and use a matcher rather than comparing pairs of locales.
-     */
-    @Deprecated
-    public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
-        // Returns the inverse of the distance: That is, 1-distance(desired, supported).
-        int distance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
-                XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired),
-                new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) },
-                thresholdDistance, favorSubtag) & 0xff;
-        return (100 - distance) / 100.0;
-    }
-
-    /**
-     * Canonicalize a locale (language). Note that for now, it is canonicalizing
-     * according to CLDR conventions (he vs iw, etc), since that is what is needed
-     * for likelySubtags.
-     * @param ulocale language/locale code
-     * @return ULocale with remapped subtags.
-     * @stable ICU 4.4
-     */
-    public ULocale canonicalize(ULocale ulocale) {
-        // TODO
-        return null;
-    }
-}
index c1bf6af2f9239018fab9faeb0d93d44d9ad69092..1f3bf81052c42bbc75c21c75661f1dfa2f0068f3 100644 (file)
@@ -8,43 +8,52 @@
  */
 package com.ibm.icu.util;
 
-import java.util.HashMap;
-import java.util.HashSet;
+import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Locale;
 import java.util.Map;
-import java.util.Map.Entry;
 import java.util.Objects;
-import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import com.ibm.icu.impl.ICUData;
-import com.ibm.icu.impl.ICUResourceBundle;
-import com.ibm.icu.impl.Relation;
-import com.ibm.icu.impl.Row;
-import com.ibm.icu.impl.Row.R3;
-import com.ibm.icu.impl.locale.XLocaleMatcher;
-import com.ibm.icu.impl.locale.XLocaleMatcher.Builder;
-import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
+
+import com.ibm.icu.impl.locale.LSR;
+import com.ibm.icu.impl.locale.LocaleDistance;
+import com.ibm.icu.impl.locale.XLikelySubtags;
 
 /**
- * Provides a way to match the languages (locales) supported by a product to the
- * languages (locales) acceptable to a user, and get the best match. For
- * example:
+ * Immutable class that picks the best match between a user's desired locales and
+ * and application's supported locales.
  *
+ * <p>Example:
  * <pre>
- * LocaleMatcher matcher = new LocaleMatcher("fr, en-GB, en");
- *
- * // afterwards:
- * matcher.getBestMatch("en-US").toLanguageTag() =&gt; "en"
+ * LocaleMatcher matcher = LocaleMatcher.builder().setSupportedLocales("fr, en-GB, en").build();
+ * Locale bestSupported = matcher.getBestLocale(Locale.US);  // "en"
  * </pre>
  *
- * It takes into account when languages are close to one another, such as fil
- * and tl, and when language regional variants are close, like en-GB and en-AU.
- * It also handles scripts, like zh-Hant vs zh-TW. For examples, see the test
- * file.
+ * <p>A matcher takes into account when languages are close to one another,
+ * such as Danish and Norwegian,
+ * and when regional variants are close, like en-GB and en-AU as opposed to en-US.
+ *
+ * <p>If there are multiple supported locales with the same (language, script, region)
+ * likely subtags, then the current implementation returns the first of those locales.
+ * It ignores variant subtags (except for pseudolocale variants) and extensions.
+ * This may change in future versions.
+ *
+ * <p>For example, the current implementation does not distinguish between
+ * de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
+ *
+ * <p>If you prefer one equivalent locale over another, then provide only the preferred one,
+ * or place it earlier in the list of supported locales.
+ *
+ * <p>Otherwise, the order of supported locales may have no effect on the best-match results.
+ * The current implementation compares each desired locale with supported locales
+ * in the following order:
+ * 1. Default locale, if supported;
+ * 2. CLDR "paradigm locales" like en-GB and es-419;
+ * 3. other supported locales.
+ * This may change in future versions.
+ *
  * <p>All classes implementing this interface should be immutable. Often a
  * product will just need one static instance, built with the languages
  * that it supports. However, it may want multiple instances with different
@@ -54,880 +63,958 @@ import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
  * @stable ICU 4.4
  */
 public class LocaleMatcher {
+    private static final LSR UND_LSR = new LSR("und","","");
+    private static final ULocale UND_ULOCALE = new ULocale("und");
+    private static final Locale UND_LOCALE = new Locale("und");
 
-    /**
-     * @internal
-     * @deprecated This API is ICU internal only.
-     */
-    @Deprecated
-    public static final boolean DEBUG = false;
-
-    private static final ULocale UNKNOWN_LOCALE = new ULocale("und");
-
-    /**
-     * Threshold for falling back to the default (first) language. May make this
-     * a parameter in the future.
-     */
-    private static final double DEFAULT_THRESHOLD = 0.5;
+    // Activates debugging output to stderr with details of GetBestMatch.
+    private static final boolean TRACE_MATCHER = false;
 
-    /**
-     * The default language, in case the threshold is not met.
-     */
-    private final ULocale defaultLanguage;
+    private static abstract class LsrIterator implements Iterator<LSR> {
+        int bestDesiredIndex = -1;
 
-    /**
-     * The default language, in case the threshold is not met.
-     */
-    private final double threshold;
+        @Override
+        public void remove() {
+            throw new UnsupportedOperationException();
+        }
 
-    /**
-     * Create a new language matcher. The highest-weighted language is the
-     * default. That means that if no other language is matches closer than a given
-     * threshold, that default language is chosen. Typically the default is English,
-     * but it could be different based on additional information, such as the domain
-     * of the page.
-     *
-     * @param languagePriorityList weighted list
-     * @stable ICU 4.4
-     */
-    public LocaleMatcher(LocalePriorityList languagePriorityList) {
-        this(languagePriorityList, defaultWritten);
+        public abstract void rememberCurrent(int desiredIndex);
     }
 
     /**
-     * Create a new language matcher from a String form. The highest-weighted
-     * language is the default.
+     * Builder option for whether the language subtag or the script subtag is most important.
      *
-     * @param languagePriorityListString String form of LanguagePriorityList
-     * @stable ICU 4.4
-     */
-    public LocaleMatcher(String languagePriorityListString) {
-        this(LocalePriorityList.add(languagePriorityListString).build());
-    }
-
-    /**
-     * Internal testing function; may expose API later.
-     * @param languagePriorityList LocalePriorityList to match
-     * @param matcherData Internal matching data
-     * @internal
-     * @deprecated This API is ICU internal only.
-     */
-    @Deprecated
-    public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData) {
-        this(languagePriorityList, matcherData, DEFAULT_THRESHOLD);
-    }
-
-    /**
-     * Internal testing function; may expose API later.
-     * @param languagePriorityList LocalePriorityList to match
-     * @param matcherData Internal matching data
-     * @internal
-     * @deprecated This API is ICU internal only.
+     * @see Builder#setFavorSubtag(FavorSubtag)
+     * @draft ICU 65
+     * @provisional This API might change or be removed in a future release.
      */
-    @Deprecated
-    public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold) {
-        this.matcherData = matcherData == null ? defaultWritten : matcherData.freeze();
-        this.languagePriorityList = languagePriorityList;
-        for (final ULocale language : languagePriorityList) {
-            add(language, languagePriorityList.getWeight(language));
-        }
-        processMapping();
-        Iterator<ULocale> it = languagePriorityList.iterator();
-        defaultLanguage = it.hasNext() ? it.next() : null;
-        this.threshold = threshold;
-    }
-
-
-    /**
-     * Returns a fraction between 0 and 1, where 1 means that the languages are a
-     * perfect match, and 0 means that they are completely different. Note that
-     * the precise values may change over time; no code should be made dependent
-     * on the values remaining constant.
-     * @param desired Desired locale
-     * @param desiredMax Maximized locale (using likely subtags)
-     * @param supported Supported locale
-     * @param supportedMax Maximized locale (using likely subtags)
-     * @return value between 0 and 1, inclusive.
-     * @stable ICU 4.4
-     */
-    public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
-        return matcherData.match(desired, desiredMax, supported, supportedMax);
+    public enum FavorSubtag {
+        /**
+         * Language differences are most important, then script differences, then region differences.
+         * (This is the default behavior.)
+         *
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        LANGUAGE,
+        /**
+         * Makes script differences matter relatively more than language differences.
+         *
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        SCRIPT
     }
 
-
     /**
-     * Canonicalize a locale (language). Note that for now, it is canonicalizing
-     * according to CLDR conventions (he vs iw, etc), since that is what is needed
-     * for likelySubtags.
-     * @param ulocale language/locale code
-     * @return ULocale with remapped subtags.
-     * @stable ICU 4.4
+     * Builder option for whether all desired locales are treated equally or
+     * earlier ones are preferred.
+     *
+     * @see Builder#setDemotionPerDesiredLocale(Demotion)
+     * @draft ICU 65
+     * @provisional This API might change or be removed in a future release.
      */
-    public ULocale canonicalize(ULocale ulocale) {
-        // TODO Get the data from CLDR, use Java conventions.
-        String lang = ulocale.getLanguage();
-        String lang2 = canonicalMap.get(lang);
-        String script = ulocale.getScript();
-        String script2 = canonicalMap.get(script);
-        String region = ulocale.getCountry();
-        String region2 = canonicalMap.get(region);
-        if (lang2 != null || script2 != null || region2 != null) {
-            return new ULocale(
-                lang2 == null ? lang : lang2,
-                    script2 == null ? script : script2,
-                        region2 == null ? region : region2
-                );
-        }
-        return ulocale;
+    public enum Demotion {
+        /**
+         * All desired locales are treated equally.
+         *
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        NONE,
+        /**
+         * Earlier desired locales are preferred.
+         *
+         * <p>From each desired locale to the next,
+         * the distance to any supported locale is increased by an additional amount
+         * which is at least as large as most region mismatches.
+         * A later desired locale has to have a better match with some supported locale
+         * due to more than merely having the same region subtag.
+         *
+         * <p>For example: <code>Supported={en, sv}  desired=[en-GB, sv]</code>
+         * yields <code>Result(en-GB, en)</code> because
+         * with the demotion of sv its perfect match is no better than
+         * the region distance between the earlier desired locale en-GB and en=en-US.
+         *
+         * <p>Notes:
+         * <ul>
+         *   <li>In some cases, language and/or script differences can be as small as
+         *       the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
+         *   <li>It is possible for certain region differences to be larger than usual,
+         *       and larger than the demotion.
+         *       (As of CLDR 35 there is no such case, but
+         *        this is possible in future versions of the data.)
+         * </ul>
+         *
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        REGION
     }
 
     /**
-     * Get the best match for a LanguagePriorityList
+     * Data for the best-matching pair of a desired and a supported locale.
      *
-     * @param languageList list to match
-     * @return best matching language code
-     * @stable ICU 4.4
+     * @draft ICU 65
+     * @provisional This API might change or be removed in a future release.
      */
-    public ULocale getBestMatch(LocalePriorityList languageList) {
-        double bestWeight = 0;
-        ULocale bestTableMatch = null;
-        double penalty = 0;
-        OutputDouble matchWeight = new OutputDouble();
-        for (final ULocale language : languageList) {
-            final ULocale matchLocale = getBestMatchInternal(language, matchWeight);
-            final double weight = matchWeight.value * languageList.getWeight(language) - penalty;
-            if (weight > bestWeight) {
-                bestWeight = weight;
-                bestTableMatch = matchLocale;
-            }
-            penalty += 0.07000001;
-        }
-        if (bestWeight < threshold) {
-            bestTableMatch = defaultLanguage;
+    public static final class Result {
+        private final ULocale desiredULocale;
+        private final ULocale supportedULocale;
+        private final Locale desiredLocale;
+        private final Locale supportedLocale;
+        private final int desiredIndex;
+        private final int supportedIndex;
+
+        private Result(ULocale udesired, ULocale usupported,
+                Locale desired, Locale supported,
+                int desIndex, int suppIndex) {
+            desiredULocale = udesired;
+            supportedULocale = usupported;
+            desiredLocale = desired;
+            supportedLocale = supported;
+            desiredIndex = desIndex;
+            supportedIndex = suppIndex;
         }
-        return bestTableMatch;
-    }
 
-    /**
-     * Convenience method: Get the best match for a LanguagePriorityList
-     *
-     * @param languageList String form of language priority list
-     * @return best matching language code
-     * @stable ICU 4.4
-     */
-    public ULocale getBestMatch(String languageList) {
-        return getBestMatch(LocalePriorityList.add(languageList).build());
-    }
+        /**
+         * Returns the best-matching desired locale.
+         * null if the list of desired locales is empty or if none matched well enough.
+         *
+         * @return the best-matching desired locale, or null.
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        public ULocale getDesiredULocale() {
+            return desiredULocale == null && desiredLocale != null ?
+                    ULocale.forLocale(desiredLocale) : desiredULocale;
+        }
+        /**
+         * Returns the best-matching desired locale.
+         * null if the list of desired locales is empty or if none matched well enough.
+         *
+         * @return the best-matching desired locale, or null.
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        public Locale getDesiredLocale() {
+            return desiredLocale == null && desiredULocale != null ?
+                    desiredULocale.toLocale() : desiredLocale;
+        }
 
-    /**
-     * Get the best match for an individual language code.
-     *
-     * @param ulocale locale/language code to match
-     * @return best matching language code
-     * @stable ICU 4.4
-     */
-    public ULocale getBestMatch(ULocale ulocale) {
-        return getBestMatchInternal(ulocale, null);
-    }
+        /**
+         * Returns the best-matching supported locale.
+         * If none matched well enough, this is the default locale.
+         * The default locale is null if the list of supported locales is empty and
+         * no explicit default locale is set.
+         *
+         * @return the best-matching supported locale, or null.
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        public ULocale getSupportedULocale() { return supportedULocale; }
+        /**
+         * Returns the best-matching supported locale.
+         * If none matched well enough, this is the default locale.
+         * The default locale is null if the list of supported locales is empty and
+         * no explicit default locale is set.
+         *
+         * @return the best-matching supported locale, or null.
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        public Locale getSupportedLocale() { return supportedLocale; }
 
-    /**
-     * @internal
-     * @deprecated This API is ICU internal only.
-     */
-    @Deprecated
-    public ULocale getBestMatch(ULocale... ulocales) {
-        return getBestMatch(LocalePriorityList.add(ulocales).build());
-    }
+        /**
+         * Returns the index of the best-matching desired locale in the input Iterable order.
+         * -1 if the list of desired locales is empty or if none matched well enough.
+         *
+         * @return the index of the best-matching desired locale, or -1.
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        public int getDesiredIndex() { return desiredIndex; }
 
-    /**
-     * {@inheritDoc}
-     * @stable ICU 4.4
-     */
-    @Override
-    public String toString() {
-        return "{" + defaultLanguage + ", "
-            + localeToMaxLocaleAndWeight + "}";
-    }
-    // ================= Privates =====================
+        /**
+         * Returns the index of the best-matching supported locale in the
+         * constructor’s or builder’s input order (“set” Collection plus “added” locales).
+         * If the matcher was built from a locale list string, then the iteration order is that
+         * of a LocalePriorityList built from the same string.
+         * -1 if the list of supported locales is empty or if none matched well enough.
+         *
+         * @return the index of the best-matching supported locale, or -1.
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        public int getSupportedIndex() { return supportedIndex; }
 
-    /**
-     * Get the best match for an individual language code.
-     *
-     * @param languageCode
-     * @return best matching language code and weight (as per
-     *         {@link #match(ULocale, ULocale)})
-     */
-    private ULocale getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight) {
-        languageCode = canonicalize(languageCode);
-        final ULocale maximized = addLikelySubtags(languageCode);
-        if (DEBUG) {
-            System.out.println("\ngetBestMatchInternal: " + languageCode + ";\t" + maximized);
-        }
-        double bestWeight = 0;
-        ULocale bestTableMatch = null;
-        String baseLanguage = maximized.getLanguage();
-        Set<R3<ULocale, ULocale, Double>> searchTable = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(baseLanguage);
-        if (searchTable != null) { // we preprocessed the table so as to filter by language
-            if (DEBUG) System.out.println("\tSearching: " + searchTable);
-            for (final R3<ULocale, ULocale, Double> tableKeyValue : searchTable) {
-                ULocale tableKey = tableKeyValue.get0();
-                ULocale maxLocale = tableKeyValue.get1();
-                Double matchedWeight = tableKeyValue.get2();
-                final double match = match(languageCode, maximized, tableKey, maxLocale);
-                if (DEBUG) {
-                    System.out.println("\t" + tableKeyValue + ";\t" + match + "\n");
-                }
-                final double weight = match * matchedWeight;
-                if (weight > bestWeight) {
-                    bestWeight = weight;
-                    bestTableMatch = tableKey;
-                    if (weight > 0.999d) { // bail on good enough match.
-                        break;
-                    }
+        /**
+         * Takes the best-matching supported locale and adds relevant fields of the
+         * best-matching desired locale, such as the -t- and -u- extensions.
+         * May replace some fields of the supported locale.
+         * The result is the locale that should be used for date and number formatting, collation, etc.
+         *
+         * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
+         *
+         * @return the service locale, combining the best-matching desired and supported locales.
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        public ULocale makeServiceULocale() {
+            ULocale bestDesired = getDesiredULocale();
+            ULocale serviceLocale = supportedULocale;
+            if (!serviceLocale.equals(bestDesired) && bestDesired != null) {
+                ULocale.Builder b = new ULocale.Builder().setLocale(serviceLocale);
+
+                // Copy the region from bestDesired, if there is one.
+                String region = bestDesired.getCountry();
+                if (!region.isEmpty()) {
+                    b.setRegion(region);
                 }
-            }
-        }
-        if (bestWeight < threshold) {
-            bestTableMatch = defaultLanguage;
-        }
-        if (outputWeight != null) {
-            outputWeight.value = bestWeight; // only return the weight when needed
-        }
-        return bestTableMatch;
-    }
 
-    /**
-     * @internal
-     * @deprecated This API is ICU internal only.
-     */
-    @Deprecated
-    private static class OutputDouble { // TODO, move to where OutputInt is
-        double value;
-    }
-
-    private void add(ULocale language, Double weight) {
-        language = canonicalize(language);
-        R3<ULocale, ULocale, Double> row = Row.of(language, addLikelySubtags(language), weight);
-        row.freeze();
-        localeToMaxLocaleAndWeight.add(row);
-    }
+                // Copy the variants from bestDesired, if there are any.
+                // Note that this will override any serviceLocale variants.
+                // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
+                String variants = bestDesired.getVariant();
+                if (!variants.isEmpty()) {
+                    b.setVariant(variants);
+                }
 
-    /**
-     * We preprocess the data to get just the possible matches for each desired base language.
-     */
-    private void processMapping() {
-        for (Entry<String, Set<String>> desiredToMatchingLanguages : matcherData.matchingLanguages().keyValuesSet()) {
-            String desired = desiredToMatchingLanguages.getKey();
-            Set<String> supported = desiredToMatchingLanguages.getValue();
-            for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) {
-                final ULocale key = localeToMaxAndWeight.get0();
-                String lang = key.getLanguage();
-                if (supported.contains(lang)) {
-                    addFiltered(desired, localeToMaxAndWeight);
+                // Copy the extensions from bestDesired, if there are any.
+                // Note that this will override any serviceLocale extensions.
+                // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
+                // (replacing calendar).
+                for (char extensionKey : bestDesired.getExtensionKeys()) {
+                    b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
                 }
+                serviceLocale = b.build();
             }
+            return serviceLocale;
         }
-        // now put in the values directly, since languages always map to themselves
-        for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) {
-            final ULocale key = localeToMaxAndWeight.get0();
-            String lang = key.getLanguage();
-            addFiltered(lang, localeToMaxAndWeight);
-        }
-    }
 
-    private void addFiltered(String desired, R3<ULocale, ULocale, Double> localeToMaxAndWeight) {
-        Set<R3<ULocale, ULocale, Double>> map = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(desired);
-        if (map == null) {
-            desiredLanguageToPossibleLocalesToMaxLocaleToData.put(desired, map = new LinkedHashSet<>());
-        }
-        map.add(localeToMaxAndWeight);
-        if (DEBUG) {
-            System.out.println(desired + ", " + localeToMaxAndWeight);
+        /**
+         * Takes the best-matching supported locale and adds relevant fields of the
+         * best-matching desired locale, such as the -t- and -u- extensions.
+         * May replace some fields of the supported locale.
+         * The result is the locale that should be used for
+         * date and number formatting, collation, etc.
+         *
+         * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
+         *
+         * @return the service locale, combining the best-matching desired and supported locales.
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        public Locale makeServiceLocale() {
+            return makeServiceULocale().toLocale();
         }
     }
 
-    Set<Row.R3<ULocale, ULocale, Double>> localeToMaxLocaleAndWeight = new LinkedHashSet<>();
-    Map<String,Set<Row.R3<ULocale, ULocale, Double>>> desiredLanguageToPossibleLocalesToMaxLocaleToData
-    = new LinkedHashMap<>();
-
-    // =============== Special Mapping Information ==============
+    private final int thresholdDistance;
+    private final int demotionPerDesiredLocale;
+    private final FavorSubtag favorSubtag;
+
+    // These are in input order.
+    private final ULocale[] supportedULocales;
+    private final Locale[] supportedLocales;
+    // These are in preference order: 1. Default locale 2. paradigm locales 3. others.
+    private final Map<LSR, Integer> supportedLsrToIndex;
+    // Array versions of the supportedLsrToIndex keys and values.
+    // The distance lookup loops over the supportedLsrs and returns the index of the best match.
+    private final LSR[] supportedLsrs;
+    private final int[] supportedIndexes;
+    private final ULocale defaultULocale;
+    private final Locale defaultLocale;
+    private final int defaultLocaleIndex;
 
     /**
-     * We need to add another method to addLikelySubtags that doesn't return
-     * null, but instead substitutes Zzzz and ZZ if unknown. There are also
-     * a few cases where addLikelySubtags needs to have expanded data, to handle
-     * all deprecated codes.
-     * @param languageCode
-     * @return "fixed" addLikelySubtags
+     * LocaleMatcher Builder.
+     *
+     * @see LocaleMatcher#builder()
+     * @draft ICU 65
+     * @provisional This API might change or be removed in a future release.
      */
-    private ULocale addLikelySubtags(ULocale languageCode) {
-        // max("und") = "en_Latn_US", and since matching is based on maximized tags, the undefined
-        // language would normally match English.  But that would produce the counterintuitive results
-        // that getBestMatch("und", LocaleMatcher("it,en")) would be "en", and
-        // getBestMatch("en", LocaleMatcher("it,und")) would be "und".
-        //
-        // To avoid that, we change the matcher's definitions of max (AddLikelySubtagsWithDefaults)
-        // so that max("und")="und". That produces the following, more desirable results:
-        if (languageCode.equals(UNKNOWN_LOCALE)) {
-            return UNKNOWN_LOCALE;
-        }
-        final ULocale result = ULocale.addLikelySubtags(languageCode);
-        // should have method on getLikelySubtags for this
-        if (result == null || result.equals(languageCode)) {
-            final String language = languageCode.getLanguage();
-            final String script = languageCode.getScript();
-            final String region = languageCode.getCountry();
-            return new ULocale((language.length()==0 ? "und"
-                : language)
-                + "_"
-                + (script.length()==0 ? "Zzzz" : script)
-                + "_"
-                + (region.length()==0 ? "ZZ" : region));
-        }
-        return result;
-    }
+    public static class Builder {
+        private List<ULocale> supportedLocales;
+        private int thresholdDistance = -1;
+        private Demotion demotion;
+        private ULocale defaultLocale;
+        private FavorSubtag favor;
 
-    private static class LocalePatternMatcher {
-        // a value of null means a wildcard; matches any.
-        private String lang;
-        private String script;
-        private String region;
-        private Level level;
-        static Pattern pattern = Pattern.compile(
-            "([a-z]{1,8}|\\*)"
-                + "(?:[_-]([A-Z][a-z]{3}|\\*))?"
-                + "(?:[_-]([A-Z]{2}|[0-9]{3}|\\*))?");
-
-        public LocalePatternMatcher(String toMatch) {
-            Matcher matcher = pattern.matcher(toMatch);
-            if (!matcher.matches()) {
-                throw new IllegalArgumentException("Bad pattern: " + toMatch);
-            }
-            lang = matcher.group(1);
-            script = matcher.group(2);
-            region = matcher.group(3);
-            level = region != null ? Level.region : script != null ? Level.script : Level.language;
+        private Builder() {}
 
-            if (lang.equals("*")) {
-                lang = null;
-            }
-            if (script != null && script.equals("*")) {
-                script = null;
-            }
-            if (region != null && region.equals("*")) {
-                region = null;
-            }
-        }
-
-        boolean matches(ULocale ulocale) {
-            if (lang != null && !lang.equals(ulocale.getLanguage())) {
-                return false;
-            }
-            if (script != null && !script.equals(ulocale.getScript())) {
-                return false;
-            }
-            if (region != null && !region.equals(ulocale.getCountry())) {
-                return false;
-            }
-            return true;
+        /**
+         * Parses the string like {@link LocalePriorityList} does and
+         * sets the supported locales accordingly.
+         * Clears any previously set/added supported locales first.
+         *
+         * @param locales the string of locales to set, to be parsed like LocalePriorityList does
+         * @return this Builder object
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        public Builder setSupportedLocales(String locales) {
+            return setSupportedULocales(LocalePriorityList.add(locales).build().getULocales());
         }
 
-        public Level getLevel() {
-            return level;
+        /**
+         * Copies the supported locales, preserving iteration order.
+         * Clears any previously set/added supported locales first.
+         * Duplicates are allowed, and are not removed.
+         *
+         * @param locales the list of locales
+         * @return this Builder object
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        public Builder setSupportedULocales(Collection<ULocale> locales) {
+            supportedLocales = new ArrayList<>(locales);
+            return this;
         }
 
-        public String getLanguage() {
-            return (lang == null ? "*" : lang);
+        /**
+         * Copies the supported locales, preserving iteration order.
+         * Clears any previously set/added supported locales first.
+         * Duplicates are allowed, and are not removed.
+         *
+         * @param locales the list of locale
+         * @return this Builder object
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        public Builder setSupportedLocales(Collection<Locale> locales) {
+            supportedLocales = new ArrayList<>(locales.size());
+            for (Locale locale : locales) {
+                supportedLocales.add(ULocale.forLocale(locale));
+            }
+            return this;
         }
 
-        public String getScript() {
-            return (script == null ? "*" : script);
+        /**
+         * Adds another supported locale.
+         * Duplicates are allowed, and are not removed.
+         *
+         * @param locale the list of locale
+         * @return this Builder object
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        public Builder addSupportedULocale(ULocale locale) {
+            if (supportedLocales == null) {
+                supportedLocales = new ArrayList<>();
+            }
+            supportedLocales.add(locale);
+            return this;
         }
 
-        public String getRegion() {
-            return (region == null ? "*" : region);
+        /**
+         * Adds another supported locale.
+         * Duplicates are allowed, and are not removed.
+         *
+         * @param locale the list of locale
+         * @return this Builder object
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        public Builder addSupportedLocale(Locale locale) {
+            return addSupportedULocale(ULocale.forLocale(locale));
         }
 
-        @Override
-        public String toString() {
-            String result = getLanguage();
-            if (level != Level.language) {
-                result += "-" + getScript();
-                if (level != Level.script) {
-                    result += "-" + getRegion();
-                }
-            }
-            return result;
+        /**
+         * Sets the default locale; if null, or if it is not set explicitly,
+         * then the first supported locale is used as the default locale.
+         *
+         * @param defaultLocale the default locale
+         * @return this Builder object
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        public Builder setDefaultULocale(ULocale defaultLocale) {
+            this.defaultLocale = defaultLocale;
+            return this;
         }
 
-        /* (non-Javadoc)
-         * @see java.lang.Object#equals(java.lang.Object)
+        /**
+         * Sets the default locale; if null, or if it is not set explicitly,
+         * then the first supported locale is used as the default locale.
+         *
+         * @param defaultLocale the default locale
+         * @return this Builder object
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
          */
-        @Override
-        public boolean equals(Object obj) {
-            if (obj == this) {
-                return true;
-            }
-            if (obj == null || !(obj instanceof LocalePatternMatcher)) {
-                return false;
-            }
-            LocalePatternMatcher other = (LocalePatternMatcher) obj;
-            return Objects.equals(level, other.level)
-                && Objects.equals(lang, other.lang)
-                && Objects.equals(script, other.script)
-                && Objects.equals(region, other.region);
+        public Builder setDefaultLocale(Locale defaultLocale) {
+            this.defaultLocale = ULocale.forLocale(defaultLocale);
+            return this;
         }
 
-        /* (non-Javadoc)
-         * @see java.lang.Object#hashCode()
+        /**
+         * If SCRIPT, then the language differences are smaller than script differences.
+         * This is used in situations (such as maps) where
+         * it is better to fall back to the same script than a similar language.
+         *
+         * @param subtag the subtag to favor
+         * @return this Builder object
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
          */
-        @Override
-        public int hashCode() {
-            return level.ordinal()
-                ^ (lang == null ? 0 : lang.hashCode())
-                ^ (script == null ? 0 : script.hashCode())
-                ^ (region == null ? 0 : region.hashCode());
+        public Builder setFavorSubtag(FavorSubtag subtag) {
+            this.favor = subtag;
+            return this;
         }
-    }
 
-    enum Level {
-        language(0.99),
-        script(0.2),
-        region(0.04);
-
-        final double worst;
-
-        Level(double d) {
-            worst = d;
+        /**
+         * Option for whether all desired locales are treated equally or
+         * earlier ones are preferred (this is the default).
+         *
+         * @param demotion the demotion per desired locale to set.
+         * @return this Builder object
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        public Builder setDemotionPerDesiredLocale(Demotion demotion) {
+            this.demotion = demotion;
+            return this;
         }
-    }
 
-    private static class ScoreData implements Freezable<ScoreData> {
-        @SuppressWarnings("unused")
-        private static final double maxUnequal_changeD_sameS = 0.5;
-
-        @SuppressWarnings("unused")
-        private static final double maxUnequal_changeEqual = 0.75;
-
-        LinkedHashSet<Row.R3<LocalePatternMatcher,LocalePatternMatcher,Double>> scores = new LinkedHashSet<>();
-        final Level level;
-
-        public ScoreData(Level level) {
-            this.level = level;
-        }
-
-        void addDataToScores(String desired, String supported, R3<LocalePatternMatcher,LocalePatternMatcher,Double> data) {
-            //            Map<String, Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>>> lang_result = scores.get(desired);
-            //            if (lang_result == null) {
-            //                scores.put(desired, lang_result = new HashMap());
-            //            }
-            //            Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>> result = lang_result.get(supported);
-            //            if (result == null) {
-            //                lang_result.put(supported, result = new LinkedHashSet());
-            //            }
-            //            result.add(data);
-            boolean added = scores.add(data);
-            if (!added) {
-                throw new ICUException("trying to add duplicate data: " +  data);
+        /**
+         * <i>Internal only!</i>
+         *
+         * @param thresholdDistance the thresholdDistance to set, with -1 = default
+         * @return this Builder object
+         * @internal
+         * @deprecated This API is ICU internal only.
+         */
+        @Deprecated
+        public Builder internalSetThresholdDistance(int thresholdDistance) {
+            if (thresholdDistance > 100) {
+                thresholdDistance = 100;
             }
+            this.thresholdDistance = thresholdDistance;
+            return this;
         }
 
-        double getScore(ULocale dMax, String desiredRaw, String desiredMax,
-            ULocale sMax, String supportedRaw, String supportedMax) {
-            double distance = 0;
-            if (!desiredMax.equals(supportedMax)) {
-                distance = getRawScore(dMax, sMax);
-            } else if (!desiredRaw.equals(supportedRaw)) { // maxes are equal, changes are equal
-                distance += 0.001;
-            }
-            return distance;
+        /**
+         * Builds and returns a new locale matcher.
+         * This builder can continue to be used.
+         *
+         * @return new LocaleMatcher.
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        public LocaleMatcher build() {
+            return new LocaleMatcher(this);
         }
 
-        private double getRawScore(ULocale desiredLocale, ULocale supportedLocale) {
-            if (DEBUG) {
-                System.out.println("\t\t\t" + level + " Raw Score:\t" + desiredLocale + ";\t" + supportedLocale);
+        /**
+         * {@inheritDoc}
+         * @draft ICU 65
+         * @provisional This API might change or be removed in a future release.
+         */
+        @Override
+        public String toString() {
+            StringBuilder s = new StringBuilder().append("{LocaleMatcher.Builder");
+            if (!supportedLocales.isEmpty()) {
+                s.append(" supported={").append(supportedLocales.toString()).append('}');
             }
-            for (R3<LocalePatternMatcher,LocalePatternMatcher,Double> datum : scores) { // : result
-                if (datum.get0().matches(desiredLocale)
-                    && datum.get1().matches(supportedLocale)) {
-                    if (DEBUG) {
-                        System.out.println("\t\t\t\tFOUND\t" + datum);
-                    }
-                    return datum.get2();
-                }
+            if (defaultLocale != null) {
+                s.append(" default=").append(defaultLocale.toString());
             }
-            if (DEBUG) {
-                System.out.println("\t\t\t\tNOTFOUND\t" + level.worst);
+            if (favor != null) {
+                s.append(" distance=").append(favor.toString());
             }
-            return level.worst;
-        }
-
-        @Override
-        public String toString() {
-            StringBuilder result = new StringBuilder().append(level);
-            for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> score : scores) {
-                result.append("\n\t\t").append(score);
+            if (thresholdDistance >= 0) {
+                s.append(String.format(" threshold=%d", thresholdDistance));
+            }
+            if (demotion != null) {
+                s.append(" demotion=").append(demotion.toString());
             }
-            return result.toString();
+            return s.append('}').toString();
         }
+    }
 
+    /**
+     * Returns a builder used in chaining parameters for building a LocaleMatcher.
+     *
+     * @return a new Builder object
+     * @draft ICU 65
+     * @provisional This API might change or be removed in a future release.
+     */
+    public static Builder builder() {
+        return new Builder();
+    }
 
-        @Override
-        @SuppressWarnings("unchecked")
-        public ScoreData cloneAsThawed() {
-            try {
-                ScoreData result = (ScoreData) clone();
-                result.scores = (LinkedHashSet<R3<LocalePatternMatcher, LocalePatternMatcher, Double>>) result.scores.clone();
-                result.frozen = false;
-                return result;
-            } catch (CloneNotSupportedException e) {
-                throw new ICUCloneNotSupportedException(e); // will never happen
-            }
+    /**
+     * Copies the supported locales, preserving iteration order, and constructs a LocaleMatcher.
+     * The first locale is used as the default locale for when there is no good match.
+     *
+     * @param supportedLocales list of locales
+     * @stable ICU 4.4
+     */
+    public LocaleMatcher(LocalePriorityList supportedLocales) {
+        this(builder().setSupportedULocales(supportedLocales.getULocales()));
+    }
 
-        }
+    /**
+     * Parses the string like {@link LocalePriorityList} does and
+     * constructs a LocaleMatcher for the supported locales parsed from the string.
+     * The first one (in LocalePriorityList iteration order) is used as the default locale for
+     * when there is no good match.
+     *
+     * @param supportedLocales the string of locales to set,
+     *          to be parsed like LocalePriorityList does
+     * @stable ICU 4.4
+     */
+    public LocaleMatcher(String supportedLocales) {
+        this(builder().setSupportedLocales(supportedLocales));
+    }
 
-        private volatile boolean frozen = false;
+    private LocaleMatcher(Builder builder) {
+        thresholdDistance = builder.thresholdDistance < 0 ?
+                LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
+        // Store the supported locales in input order,
+        // so that when different types are used (e.g., java.util.Locale)
+        // we can return those by parallel index.
+        int supportedLocalesLength = builder.supportedLocales.size();
+        supportedULocales = new ULocale[supportedLocalesLength];
+        supportedLocales = new Locale[supportedLocalesLength];
+        // Supported LRSs in input order.
+        LSR lsrs[] = new LSR[supportedLocalesLength];
+        // Also find the first supported locale whose LSR is
+        // the same as that for the default locale.
+        ULocale udef = builder.defaultLocale;
+        Locale def = null;
+        LSR defLSR = null;
+        int idef = -1;
+        if (udef != null) {
+            def = udef.toLocale();
+            defLSR = getMaximalLsrOrUnd(udef);
+        }
+        int i = 0;
+        for (ULocale locale : builder.supportedLocales) {
+            supportedULocales[i] = locale;
+            supportedLocales[i] = locale.toLocale();
+            LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale);
+            if (idef < 0 && defLSR != null && lsr.equals(defLSR)) {
+                idef = i;
+            }
+            ++i;
+        }
+
+        // We need an unordered map from LSR to first supported locale with that LSR,
+        // and an ordered list of (LSR, Indexes).
+        // We use a LinkedHashMap for both,
+        // and insert the supported locales in the following order:
+        // 1. Default locale, if it is supported.
+        // 2. Priority locales in builder order.
+        // 3. Remaining locales in builder order.
+        supportedLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
+        Map<LSR, Integer> otherLsrToIndex = null;
+        if (idef >= 0) {
+            supportedLsrToIndex.put(defLSR, idef);
+        }
+        i = 0;
+        for (ULocale locale : supportedULocales) {
+            if (i == idef) { continue; }
+            LSR lsr = lsrs[i];
+            if (defLSR == null) {
+                assert i == 0;
+                udef = locale;
+                def = supportedLocales[0];
+                defLSR = lsr;
+                idef = 0;
+                supportedLsrToIndex.put(lsr, 0);
+            } else if (lsr.equals(defLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
+                putIfAbsent(supportedLsrToIndex, lsr, i);
+            } else {
+                if (otherLsrToIndex == null) {
+                    otherLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
+                }
+                putIfAbsent(otherLsrToIndex, lsr, i);
+            }
+            ++i;
+        }
+        if (otherLsrToIndex != null) {
+            supportedLsrToIndex.putAll(otherLsrToIndex);
+        }
+        int numSuppLsrs = supportedLsrToIndex.size();
+        supportedLsrs = new LSR[numSuppLsrs];
+        supportedIndexes = new int[numSuppLsrs];
+        i = 0;
+        for (Map.Entry<LSR, Integer> entry : supportedLsrToIndex.entrySet()) {
+            supportedLsrs[i] = entry.getKey();  // = lsrs[entry.getValue()]
+            supportedIndexes[i++] = entry.getValue();
+        }
+
+        defaultULocale = udef;
+        defaultLocale = def;
+        defaultLocaleIndex = idef;
+        demotionPerDesiredLocale =
+                builder.demotion == Demotion.NONE ? 0 :
+                    LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale();  // null or REGION
+        favorSubtag = builder.favor;
+    }
 
-        @Override
-        public ScoreData freeze() {
-            return this;
+    private static final void putIfAbsent(Map<LSR, Integer> lsrToIndex, LSR lsr, int i) {
+        Integer index = lsrToIndex.get(lsr);
+        if (index == null) {
+            lsrToIndex.put(lsr, i);
         }
+    }
 
-        @Override
-        public boolean isFrozen() {
-            return frozen;
+    private static final LSR getMaximalLsrOrUnd(ULocale locale) {
+        if (locale.equals(UND_ULOCALE)) {
+            return UND_LSR;
+        } else {
+            return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
         }
+    }
 
-        public Relation<String,String> getMatchingLanguages() {
-            Relation<String,String> desiredToSupported = Relation.of(new LinkedHashMap<String,Set<String>>(), HashSet.class);
-            for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> item : scores) {
-                LocalePatternMatcher desired = item.get0();
-                LocalePatternMatcher supported = item.get1();
-                if (desired.lang != null && supported.lang != null) { // explicitly mentioned languages must have reasonable distance
-                    desiredToSupported.put(desired.lang, supported.lang);
-                }
-            }
-            desiredToSupported.freeze();
-            return desiredToSupported;
+    private static final LSR getMaximalLsrOrUnd(Locale locale) {
+        if (locale.equals(UND_LOCALE)) {
+            return UND_LSR;
+        } else {
+            return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
         }
     }
 
-    /**
-     * Only for testing and use by tools. Interface may change!!
-     * @internal
-     * @deprecated This API is ICU internal only.
-     */
-    @Deprecated
-    public static class LanguageMatcherData implements Freezable<LanguageMatcherData> {
-        private ScoreData languageScores = new ScoreData(Level.language);
-        private ScoreData scriptScores = new ScoreData(Level.script);
-        private ScoreData regionScores = new ScoreData(Level.region);
-        private Relation<String, String> matchingLanguages;
-        private volatile boolean frozen = false;
-
+    private static final class ULocaleLsrIterator extends LsrIterator {
+        private Iterator<ULocale> locales;
+        private ULocale current, remembered;
 
-        /**
-         * @internal
-         * @deprecated This API is ICU internal only.
-         */
-        @Deprecated
-        public LanguageMatcherData() {
+        ULocaleLsrIterator(Iterator<ULocale> locales) {
+            this.locales = locales;
         }
 
-        /**
-         * @internal
-         * @deprecated This API is ICU internal only.
-         */
-        @Deprecated
-        public Relation<String, String> matchingLanguages() {
-            return matchingLanguages;
+        @Override
+        public boolean hasNext() {
+            return locales.hasNext();
         }
 
-        /**
-         * @internal
-         * @deprecated This API is ICU internal only.
-         */
         @Override
-        @Deprecated
-        public String toString() {
-            return languageScores + "\n\t" + scriptScores + "\n\t" + regionScores;
+        public LSR next() {
+            current = locales.next();
+            return getMaximalLsrOrUnd(current);
         }
 
-        /**
-         * @internal
-         * @deprecated This API is ICU internal only.
-         */
-        @Deprecated
-        public double match(ULocale a, ULocale aMax, ULocale b, ULocale bMax) {
-            double diff = 0;
-            diff += languageScores.getScore(aMax, a.getLanguage(), aMax.getLanguage(), bMax, b.getLanguage(), bMax.getLanguage());
-            if (diff > 0.999d) { // with no language match, we bail
-                return 0.0d;
-            }
-            diff += scriptScores.getScore(aMax, a.getScript(), aMax.getScript(), bMax, b.getScript(), bMax.getScript());
-            diff += regionScores.getScore(aMax, a.getCountry(), aMax.getCountry(), bMax, b.getCountry(), bMax.getCountry());
-
-            if (!a.getVariant().equals(b.getVariant())) {
-                diff += 0.01;
-            }
-            if (diff < 0.0d) {
-                diff = 0.0d;
-            } else if (diff > 1.0d) {
-                diff = 1.0d;
-            }
-            if (DEBUG) {
-                System.out.println("\t\t\tTotal Distance\t" + diff);
-            }
-            return 1.0 - diff;
+        @Override
+        public void rememberCurrent(int desiredIndex) {
+            bestDesiredIndex = desiredIndex;
+            remembered = current;
         }
+    }
 
-        /**
-         * @internal
-         * @deprecated This API is ICU internal only.
-         */
-        @Deprecated
-        public LanguageMatcherData addDistance(String desired, String supported, int percent, String comment) {
-            return addDistance(desired, supported, percent, false, comment);
-        }
-        /**
-         * @internal
-         * @deprecated This API is ICU internal only.
-         */
-        @Deprecated
-        public LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway) {
-            return addDistance(desired, supported, percent, oneway, null);
-        }
-
-        private LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway, String comment) {
-            if (DEBUG) {
-                System.out.println("\t<languageMatch desired=\"" + desired + "\"" +
-                    " supported=\"" + supported + "\"" +
-                    " percent=\"" + percent + "\""
-                    + (oneway ? " oneway=\"true\"" : "")
-                    + "/>"
-                    + (comment == null ? "" : "\t<!-- " + comment + " -->"));
-                //                    //     .addDistance("nn", "nb", 4, true)
-                //                        System.out.println(".addDistance(\"" + desired + "\"" +
-                //                                ", \"" + supported + "\"" +
-                //                                ", " + percent + ""
-                //                                + (oneway ? "" : ", true")
-                //                                + (comment == null ? "" : ", \"" + comment + "\"")
-                //                                + ")"
-                //                        );
+    private static final class LocaleLsrIterator extends LsrIterator {
+        private Iterator<Locale> locales;
+        private Locale current, remembered;
 
-            }
-            double score = 1-percent/100.0; // convert from percentage
-            LocalePatternMatcher desiredMatcher = new LocalePatternMatcher(desired);
-            Level desiredLen = desiredMatcher.getLevel();
-            LocalePatternMatcher supportedMatcher = new LocalePatternMatcher(supported);
-            Level supportedLen = supportedMatcher.getLevel();
-            if (desiredLen != supportedLen) {
-                throw new IllegalArgumentException("Lengths unequal: " + desired + ", " + supported);
-            }
-            R3<LocalePatternMatcher,LocalePatternMatcher,Double> data = Row.of(desiredMatcher, supportedMatcher, score);
-            R3<LocalePatternMatcher,LocalePatternMatcher,Double> data2 = oneway ? null : Row.of(supportedMatcher, desiredMatcher, score);
-            boolean desiredEqualsSupported = desiredMatcher.equals(supportedMatcher);
-            switch (desiredLen) {
-            case language:
-                String dlanguage = desiredMatcher.getLanguage();
-                String slanguage = supportedMatcher.getLanguage();
-                languageScores.addDataToScores(dlanguage, slanguage, data);
-                if (!oneway && !desiredEqualsSupported) {
-                    languageScores.addDataToScores(slanguage, dlanguage, data2);
-                }
-                break;
-            case script:
-                String dscript = desiredMatcher.getScript();
-                String sscript = supportedMatcher.getScript();
-                scriptScores.addDataToScores(dscript, sscript, data);
-                if (!oneway && !desiredEqualsSupported) {
-                    scriptScores.addDataToScores(sscript, dscript, data2);
-                }
-                break;
-            case region:
-                String dregion = desiredMatcher.getRegion();
-                String sregion = supportedMatcher.getRegion();
-                regionScores.addDataToScores(dregion, sregion, data);
-                if (!oneway && !desiredEqualsSupported) {
-                    regionScores.addDataToScores(sregion, dregion, data2);
-                }
-                break;
-            }
-            return this;
+        LocaleLsrIterator(Iterator<Locale> locales) {
+            this.locales = locales;
         }
 
-        /**
-         * {@inheritDoc}
-         * @internal
-         * @deprecated This API is ICU internal only.
-         */
         @Override
-        @Deprecated
-        public LanguageMatcherData cloneAsThawed() {
-            LanguageMatcherData result;
-            try {
-                result = (LanguageMatcherData) clone();
-                result.languageScores = languageScores.cloneAsThawed();
-                result.scriptScores = scriptScores.cloneAsThawed();
-                result.regionScores = regionScores.cloneAsThawed();
-                result.frozen = false;
-                return result;
-            } catch (CloneNotSupportedException e) {
-                throw new ICUCloneNotSupportedException(e); // will never happen
-            }
+        public boolean hasNext() {
+            return locales.hasNext();
         }
 
-        /**
-         * {@inheritDoc}
-         * @internal
-         * @deprecated This API is ICU internal only.
-         */
         @Override
-        @Deprecated
-        public LanguageMatcherData freeze() {
-            languageScores.freeze();
-            regionScores.freeze();
-            scriptScores.freeze();
-            matchingLanguages = languageScores.getMatchingLanguages();
-            frozen = true;
-            return this;
+        public LSR next() {
+            current = locales.next();
+            return getMaximalLsrOrUnd(current);
         }
 
-        /**
-         * {@inheritDoc}
-         * @internal
-         * @deprecated This API is ICU internal only.
-         */
         @Override
-        @Deprecated
-        public boolean isFrozen() {
-            return frozen;
+        public void rememberCurrent(int desiredIndex) {
+            bestDesiredIndex = desiredIndex;
+            remembered = current;
         }
     }
 
-    LanguageMatcherData matcherData;
-    LocalePriorityList languagePriorityList;
+    /**
+     * Returns the supported locale which best matches the desired locale.
+     *
+     * @param desiredLocale Typically a user's language.
+     * @return the best-matching supported locale.
+     * @stable ICU 4.4
+     */
+    public ULocale getBestMatch(ULocale desiredLocale) {
+        LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
+        int suppIndex = getBestSuppIndex(desiredLSR, null);
+        return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale;
+    }
 
-    private static final LanguageMatcherData defaultWritten;
+    /**
+     * Returns the supported locale which best matches one of the desired locales.
+     *
+     * @param desiredLocales Typically a user's languages, in order of preference (descending).
+     *          (In ICU 4.4..63 this parameter had type LocalePriorityList.)
+     * @return the best-matching supported locale.
+     * @stable ICU 4.4
+     */
+    public ULocale getBestMatch(Iterable<ULocale> desiredLocales) {
+        Iterator<ULocale> desiredIter = desiredLocales.iterator();
+        if (!desiredIter.hasNext()) {
+            return defaultULocale;
+        }
+        ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter);
+        LSR desiredLSR = lsrIter.next();
+        int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
+        return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale;
+    }
 
-    private static HashMap<String,String> canonicalMap = new HashMap<>();
+    /**
+     * Parses the string like {@link LocalePriorityList} does and
+     * returns the supported locale which best matches one of the desired locales.
+     *
+     * @param desiredLocaleList Typically a user's languages, in order of preference (descending),
+     *          as a string which is to be parsed like LocalePriorityList does.
+     * @return the best-matching supported locale.
+     * @stable ICU 4.4
+     */
+    public ULocale getBestMatch(String desiredLocaleList) {
+        return getBestMatch(LocalePriorityList.add(desiredLocaleList).build());
+    }
 
+    /**
+     * Returns the supported locale which best matches the desired locale.
+     *
+     * @param desiredLocale Typically a user's language.
+     * @return the best-matching supported locale.
+     * @draft ICU 65
+     * @provisional This API might change or be removed in a future release.
+     */
+    public Locale getBestLocale(Locale desiredLocale) {
+        LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
+        int suppIndex = getBestSuppIndex(desiredLSR, null);
+        return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
+    }
 
-    static {
-        canonicalMap.put("iw", "he");
-        canonicalMap.put("mo", "ro");
-        canonicalMap.put("tl", "fil");
+    /**
+     * Returns the supported locale which best matches one of the desired locales.
+     *
+     * @param desiredLocales Typically a user's languages, in order of preference (descending).
+     * @return the best-matching supported locale.
+     * @draft ICU 65
+     * @provisional This API might change or be removed in a future release.
+     */
+    public Locale getBestLocale(Iterable<Locale> desiredLocales) {
+        Iterator<Locale> desiredIter = desiredLocales.iterator();
+        if (!desiredIter.hasNext()) {
+            return defaultLocale;
+        }
+        LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter);
+        LSR desiredLSR = lsrIter.next();
+        int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
+        return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
+    }
 
-        ICUResourceBundle suppData = getICUSupplementalData();
-        ICUResourceBundle languageMatching = suppData.findTopLevel("languageMatching");
-        ICUResourceBundle written = (ICUResourceBundle) languageMatching.get("written");
-        defaultWritten = new LanguageMatcherData();
+    private Result makeResult(ULocale desiredLocale, ULocaleLsrIterator lsrIter, int suppIndex) {
+        if (suppIndex < 0) {
+            return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
+        } else if (desiredLocale != null) {
+            return new Result(desiredLocale, supportedULocales[suppIndex],
+                    null, supportedLocales[suppIndex], 0, suppIndex);
+        } else {
+            return new Result(lsrIter.remembered, supportedULocales[suppIndex],
+                    null, supportedLocales[suppIndex], lsrIter.bestDesiredIndex, suppIndex);
+        }
+    }
 
-        for(UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) {
-            ICUResourceBundle item = (ICUResourceBundle) iter.next();
-            /*
-            "*_*_*",
-            "*_*_*",
-            "96",
-             */
-            // <languageMatch desired="gsw" supported="de" percent="96" oneway="true" />
-            boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3));
-            defaultWritten.addDistance(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2)), oneway);
+    private Result makeResult(Locale desiredLocale, LocaleLsrIterator lsrIter, int suppIndex) {
+        if (suppIndex < 0) {
+            return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
+        } else if (desiredLocale != null) {
+            return new Result(null, supportedULocales[suppIndex],
+                    desiredLocale, supportedLocales[suppIndex], 0, suppIndex);
+        } else {
+            return new Result(null, supportedULocales[suppIndex],
+                    lsrIter.remembered, supportedLocales[suppIndex],
+                    lsrIter.bestDesiredIndex, suppIndex);
         }
-        defaultWritten.freeze();
     }
 
     /**
-     * @internal
-     * @deprecated This API is ICU internal only.
+     * Returns the best match between the desired locale and the supported locales.
+     *
+     * @param desiredLocale Typically a user's language.
+     * @return the best-matching pair of the desired and a supported locale.
+     * @draft ICU 65
+     * @provisional This API might change or be removed in a future release.
+     */
+    public Result getBestMatchResult(ULocale desiredLocale) {
+        LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
+        int suppIndex = getBestSuppIndex(desiredLSR, null);
+        return makeResult(desiredLocale, null, suppIndex);
+    }
+
+    /**
+     * Returns the best match between the desired and supported locales.
+     *
+     * @param desiredLocales Typically a user's languages, in order of preference (descending).
+     * @return the best-matching pair of a desired and a supported locale.
+     * @draft ICU 65
+     * @provisional This API might change or be removed in a future release.
      */
-    @Deprecated
-    public static ICUResourceBundle getICUSupplementalData() {
-        ICUResourceBundle suppData = (ICUResourceBundle) UResourceBundle.getBundleInstance(
-            ICUData.ICU_BASE_NAME,
-            "supplementalData",
-            ICUResourceBundle.ICU_DATA_CLASS_LOADER);
-        return suppData;
+    public Result getBestMatchResult(Iterable<ULocale> desiredLocales) {
+        Iterator<ULocale> desiredIter = desiredLocales.iterator();
+        if (!desiredIter.hasNext()) {
+            return makeResult(UND_ULOCALE, null, -1);
+        }
+        ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter);
+        LSR desiredLSR = lsrIter.next();
+        int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
+        return makeResult(null, lsrIter, suppIndex);
     }
 
     /**
-     * @internal
-     * @deprecated This API is ICU internal only.
+     * Returns the best match between the desired locale and the supported locales.
+     *
+     * @param desiredLocale Typically a user's language.
+     * @return the best-matching pair of the desired and a supported locale.
+     * @draft ICU 65
+     * @provisional This API might change or be removed in a future release.
      */
-    @Deprecated
-    public static double match(ULocale a, ULocale b) {
-        final LocaleMatcher matcher = new LocaleMatcher("");
-        return matcher.match(a, matcher.addLikelySubtags(a), b, matcher.addLikelySubtags(b));
+    public Result getBestLocaleResult(Locale desiredLocale) {
+        LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
+        int suppIndex = getBestSuppIndex(desiredLSR, null);
+        return makeResult(desiredLocale, null, suppIndex);
     }
 
-    transient XLocaleMatcher xLocaleMatcher = null;
-    transient ULocale xDefaultLanguage = null;
-    transient boolean xFavorScript = false;
+    /**
+     * Returns the best match between the desired and supported locales.
+     *
+     * @param desiredLocales Typically a user's languages, in order of preference (descending).
+     * @return the best-matching pair of a desired and a supported locale.
+     * @draft ICU 65
+     * @provisional This API might change or be removed in a future release.
+     */
+    public Result getBestLocaleResult(Iterable<Locale> desiredLocales) {
+        Iterator<Locale> desiredIter = desiredLocales.iterator();
+        if (!desiredIter.hasNext()) {
+            return makeResult(UND_LOCALE, null, -1);
+        }
+        LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter);
+        LSR desiredLSR = lsrIter.next();
+        int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
+        return makeResult(null, lsrIter, suppIndex);
+    }
 
-    private synchronized XLocaleMatcher getLocaleMatcher() {
-        if (xLocaleMatcher == null) {
-            Builder builder = XLocaleMatcher.builder();
-            builder.setSupportedULocales(languagePriorityList.getULocales());
-            if (xDefaultLanguage != null) {
-                builder.setDefaultULocale(xDefaultLanguage);
+    /**
+     * @param desiredLSR The first desired locale's LSR.
+     * @param remainingIter Remaining desired LSRs, null or empty if none.
+     * @return the index of the best-matching supported locale, or -1 if there is no good match.
+     */
+    private int getBestSuppIndex(LSR desiredLSR, LsrIterator remainingIter) {
+        int desiredIndex = 0;
+        int bestSupportedLsrIndex = -1;
+        for (int bestDistance = thresholdDistance;;) {
+            // Quick check for exact maximized LSR.
+            Integer index = supportedLsrToIndex.get(desiredLSR);
+            if (index != null) {
+                int suppIndex = index;
+                if (TRACE_MATCHER) {
+                    System.err.printf("Returning %s: desiredLSR=supportedLSR\n",
+                            supportedULocales[suppIndex]);
+                }
+                if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
+                return suppIndex;
             }
-            if (xFavorScript) {
-                builder.setFavorSubtag(FavorSubtag.SCRIPT);
+            int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
+                    desiredLSR, supportedLsrs, bestDistance, favorSubtag);
+            if (bestIndexAndDistance >= 0) {
+                bestDistance = bestIndexAndDistance & 0xff;
+                if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
+                bestSupportedLsrIndex = bestIndexAndDistance >> 8;
             }
-            xLocaleMatcher = builder.build();
+            if ((bestDistance -= demotionPerDesiredLocale) <= 0) {
+                break;
+            }
+            if (remainingIter == null || !remainingIter.hasNext()) {
+                break;
+            }
+            desiredLSR = remainingIter.next();
+        }
+        if (bestSupportedLsrIndex < 0) {
+            if (TRACE_MATCHER) {
+                System.err.printf("Returning default %s: no good match\n", defaultULocale);
+            }
+            return -1;
         }
-        return xLocaleMatcher;
+        int suppIndex = supportedIndexes[bestSupportedLsrIndex];
+        if (TRACE_MATCHER) {
+            System.err.printf("Returning %s: best matching supported locale\n",
+                    supportedULocales[suppIndex]);
+        }
+        return suppIndex;
     }
 
     /**
-     * Get the best match between the desired languages and supported languages
-     * This supports the new CLDR syntax to provide for better matches within
-     * regional clusters (such as maghreb Arabic vs non-maghreb Arabic, or regions that use en-GB vs en-US)
-     * and also matching between regions and macroregions, such as comparing es-419 to es-AR).
-     * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
-     * @param outputBestDesired The one of the desired languages that matched best.
-     * Set to null if the best match was not below the threshold distance.
-     * @return best-match supported language
-     * @internal
-     * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
+     * Returns a fraction between 0 and 1, where 1 means that the languages are a
+     * perfect match, and 0 means that they are completely different.
+     *
+     * <p>This is mostly an implementation detail, and the precise values may change over time.
+     * The implementation may use either the maximized forms or the others ones, or both.
+     * The implementation may or may not rely on the forms to be consistent with each other.
+     *
+     * <p>Callers should construct and use a matcher rather than match pairs of locales directly.
+     *
+     * @param desired Desired locale.
+     * @param desiredMax Maximized locale (using likely subtags).
+     * @param supported Supported locale.
+     * @param supportedMax Maximized locale (using likely subtags).
+     * @return value between 0 and 1, inclusive.
+     * @deprecated ICU 65 Build and use a matcher rather than comparing pairs of locales.
      */
     @Deprecated
-    public ULocale getBestMatch(LinkedHashSet<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) {
-        if (outputBestDesired == null) {
-            return getLocaleMatcher().getBestMatch(desiredLanguages);
-        } else {
-            XLocaleMatcher.Result result = getLocaleMatcher().getBestMatchResult(desiredLanguages);
-            outputBestDesired.value = result.getDesiredULocale();
-            return result.getSupportedULocale();
-        }
+    public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
+        // Returns the inverse of the distance: That is, 1-distance(desired, supported).
+        int distance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
+                XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired),
+                new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) },
+                thresholdDistance, favorSubtag) & 0xff;
+        return (100 - distance) / 100.0;
     }
 
     /**
-     * Set the default language, with null = default = first supported language
-     * @param defaultLanguage Language to use in case the threshold for distance is exceeded.
-     * @return this, for chaining
-     * @internal
-     * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
+     * Partially canonicalizes a locale (language). Note that for now, it is canonicalizing
+     * according to CLDR conventions (he vs iw, etc), since that is what is needed
+     * for likelySubtags.
+     *
+     * <p>Currently, this is a much simpler canonicalization than what the ULocale class does:
+     * The language/script/region subtags are each mapped separately, ignoring the other subtags.
+     * If none of these change, then the input locale is returned.
+     * Otherwise a new ULocale with only those subtags is returned, removing variants and extensions.
+     *
+     * @param locale language/locale code
+     * @return ULocale with remapped subtags.
+     * @stable ICU 4.4
      */
-    @Deprecated
-    public synchronized LocaleMatcher setDefaultLanguage(ULocale defaultLanguage) {
-        this.xDefaultLanguage = defaultLanguage;
-        xLocaleMatcher = null;
-        return this;
+    public ULocale canonicalize(ULocale locale) {
+        return XLikelySubtags.INSTANCE.canonicalize(locale);
     }
 
     /**
-     * If true, then the language differences are smaller than than script differences.
-     * This is used in situations (such as maps) where it is better to fall back to the same script than a similar language.
-     * @param favorScript Set to true to treat script as most important.
-     * @return this, for chaining.
-     * @internal
-     * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
+     * {@inheritDoc}
+     * @stable ICU 4.4
      */
-    @Deprecated
-    public synchronized LocaleMatcher setFavorScript(boolean favorScript) {
-        this.xFavorScript = favorScript;
-        xLocaleMatcher = null;
-        return this;
+    @Override
+    public String toString() {
+        StringBuilder s = new StringBuilder().append("{LocaleMatcher");
+        if (supportedULocales.length > 0) {
+            s.append(" supported={").append(supportedULocales[0].toString());
+            for (int i = 1; i < supportedULocales.length; ++i) {
+                s.append(", ").append(supportedULocales[i].toString());
+            }
+            s.append('}');
+        }
+        s.append(" default=").append(Objects.toString(defaultULocale));
+        if (favorSubtag != null) {
+            s.append(" distance=").append(favorSubtag.toString());
+        }
+        if (thresholdDistance >= 0) {
+            s.append(String.format(" threshold=%d", thresholdDistance));
+        }
+        s.append(String.format(" demotion=%d", demotionPerDesiredLocale));
+        return s.append('}').toString();
     }
 }
index 8ee10ed141f76f941b0efbbc8bea12a94d68a1dd..6f85a99de8d87fb581d8ac69fce39e878460cbaa 100644 (file)
     <path id="javac.classpathref.core-tests">
         <pathelement location="${icu4j.core.jar}"/>
         <pathelement location="${icu4j.test-framework.jar}"/>
+        <pathelement location="${icu4j.tools.jar}"/>
     </path>
 
-    <target name="_all.core-tests" depends="_all.core, _all.test-framework">
+    <target name="_all.core-tests" depends="_all.core, _all.test-framework, _all.tools">
         <ant dir="${icu4j.core-tests.dir}" inheritAll="false"/>
     </target>
 
         <pathelement location="${icu4j.collate.jar}"/>
         <pathelement location="${icu4j.translit.jar}"/>
         <pathelement location="${icu4j.test-framework.jar}"/>
-        <pathelement location="${icu4j.core-tests.jar}"/>
-        <pathelement location="${icu4j.translit-tests.jar}"/>
     </path>
 
-    <target name="_all.tools" depends="_all.core, _all.collate, _all.translit, _all.test-framework, _all.core-tests, _all.translit-tests">
+    <target name="_all.tools" depends="_all.core, _all.collate, _all.translit, _all.test-framework">
         <ant dir="${icu4j.tools.dir}" inheritAll="false"/>
     </target>
 
index 36c0d08dd808f3779b25f235c8faf6dc073ed943..7134fe12d94ee11f2d9b9a72ffa3e9e39cfeeb5b 100644 (file)
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd004f5d8064e047cef4f7d31326b39b7fc43fba685fab2f0d23c154f4dbc637
-size 12818511
+oid sha256:b21585ec768edea7b099bd6a97b0a4130b53966a63e6a10de2f31b22f8b59fbd
+size 12840921
index e02ee9d41d0a73e4c22d67427be7dc84ffe6e6c2..60473b0ad8e1efa483a9ca7c7f0f067b092d90ba 100644 (file)
@@ -18,5 +18,6 @@
                        <attribute name="javadoc_location" value="jar:platform:/resource/external-libraries/JUnitParams-1.0.5-javadoc.jar!/"/>
                </attributes>
        </classpathentry>
+       <classpathentry kind="src" path="/icu4j-tools"/>
        <classpathentry kind="output" path="out/bin"/>
 </classpath>
index 951d4b2cfaae8be908b9fc1acfba5bcba382eaf8..32e60a3eca3102a8afecd49eac975d7a3c51227f 100644 (file)
@@ -9,6 +9,7 @@
                <project>icu4j-regiondata</project>
                <project>icu4j-shared</project>
                <project>icu4j-test-framework</project>
+               <project>icu4j-tools</project>
        </projects>
        <buildSpec>
                <buildCommand>
similarity index 90%
rename from icu4j/tools/misc/src/com/ibm/icu/dev/tool/serializable/SerializableChecker.java
rename to icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/SerializableChecker.java
index 3aa55210a002e9610ecc7fdd8bc19fec8500afd5..b6b66a636ca291369cd50baae1c37823cce917aa 100644 (file)
@@ -8,7 +8,7 @@
  *
  */
 
-package com.ibm.icu.dev.tool.serializable;
+package com.ibm.icu.dev.test.serializable;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
@@ -23,7 +23,6 @@ import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
 
-import com.ibm.icu.dev.test.serializable.SerializableTestUtility;
 import com.ibm.icu.impl.URLHandler;
 
 /**
@@ -31,32 +30,32 @@ import com.ibm.icu.impl.URLHandler;
  * and lists all those classes that implement <code>Serializable</code>. It also checks
  * to make sure that those classes have the <code>serialVersionUID</code>
  * field define.
- * 
+ *
  */
 public class SerializableChecker implements URLHandler.URLVisitor
 {
     private static Class serializable;
     //private static Class throwable;
-    
+
     private String path = null;
-    
+
     //private boolean write;
-    
+
     public SerializableChecker(String path)
     {
         this.path = path;
-        
+
         if (path != null) {
             File dir = new File(path);
-            
+
             if (!dir.exists()) {
                 dir.mkdirs();
             }
         }
     }
-    
+
     static {
-        try {    
+        try {
             serializable = Class.forName("java.io.Serializable");
             //throwable    = Class.forName("java.lang.Throwable");
         } catch (Exception e) {
@@ -64,45 +63,43 @@ public class SerializableChecker implements URLHandler.URLVisitor
             System.out.println("Woops! Can't get class info for Serializable and Throwable.");
         }
     }
-    
+
     private void writeFile(String className, byte bytes[])
     {
         File file = new File(path + File.separator + className + ".dat");
-        FileOutputStream stream;
-        
-        try {
-            stream = new FileOutputStream(file);
-            
+
+        try (FileOutputStream stream = new FileOutputStream(file)) {
             stream.write(bytes);
             stream.close();
         } catch (Exception e) {
             System.out.print(" - can't write file!");
         }
     }
-    
+
+    @Override
     public void visit(String str)
     {
         int ix = str.lastIndexOf(".class");
-        
+
         if (ix >= 0) {
             String className = "com.ibm.icu" + str.substring(0, ix).replace('/', '.');
-            
+
             // Skip things in com.ibm.icu.dev; they're not relevant.
             if (className.startsWith("com.ibm.icu.dev.")) {
                 return;
             }
-            
+
             try {
                 Class c = Class.forName(className);
                 int   m = c.getModifiers();
-                
+
                 if (serializable.isAssignableFrom(c) /*&&
                     (! throwable.isAssignableFrom(c) || c.getDeclaredFields().length > 0)*/) {
                     //Field uid;
-                    
+
                     System.out.print(className + " (" + Modifier.toString(m) + ") - ");
-                    
-                    if(!Modifier.isInterface(m)){ 
+
+                    if(!Modifier.isInterface(m)){
                         try {
                             /* uid = */
                             c.getDeclaredField("serialVersionUID");
@@ -110,18 +107,18 @@ public class SerializableChecker implements URLHandler.URLVisitor
                             System.out.print("no serialVersionUID - ");
                         }
                     }
-                    
+
                     if (Modifier.isPublic(m)) {
                         SerializableTestUtility.Handler handler = SerializableTestUtility.getHandler(className);
-                        
+
                         if (!Modifier.isInterface(m) && handler != null) {
                             Object objectsOut[] = handler.getTestObjects();
                             Object objectsIn[];
                             boolean passed = true;
-                            
+
                             ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
                             ObjectOutputStream out = new ObjectOutputStream(byteOut);
-                            
+
                             try {
                                 out.writeObject(objectsOut);
                                 out.close();
@@ -130,14 +127,14 @@ public class SerializableChecker implements URLHandler.URLVisitor
                                 System.out.println("Eror writing test objects:" + e.toString());
                                 return;
                             }
-                            
+
                             if (path != null) {
                                 writeFile(className, byteOut.toByteArray());
                             }
-                            
+
                             ByteArrayInputStream byteIn = new ByteArrayInputStream(byteOut.toByteArray());
                             ObjectInputStream in = new ObjectInputStream(byteIn);
-                            
+
                             try {
                                 objectsIn = (Object[]) in.readObject();
                                 in.close();
@@ -153,7 +150,7 @@ public class SerializableChecker implements URLHandler.URLVisitor
                                     System.out.println("Object " + i + " failed behavior test.");
                                 }
                             }
-                            
+
                             if (passed) {
                                 System.out.print("test passed.");
                             }
@@ -164,7 +161,7 @@ public class SerializableChecker implements URLHandler.URLVisitor
                             }
                         }
                     }
-                    
+
                     System.out.println();
                 }
            } catch (Exception e) {
@@ -177,10 +174,10 @@ public class SerializableChecker implements URLHandler.URLVisitor
     {
         List argList = Arrays.asList(args);
         String path = null;
-        
+
         for (Iterator it = argList.iterator(); it.hasNext(); /*anything?*/) {
             String arg = (String) it.next();
-            
+
             if (arg.equals("-w")) {
                 if (it.hasNext()) {
                     path = (String) it.next();
@@ -188,15 +185,15 @@ public class SerializableChecker implements URLHandler.URLVisitor
                     System.out.println("Missing directory name on -w command.");
                 }
             } else {
-                
-    
+
+
                 try {
                     //URL jarURL  = new URL("jar:file:/dev/eclipse/workspace/icu4j/icu4j.jar!/com/ibm/icu");
                     //URL fileURL = new URL("file:/dev/eclipse/workspace/icu4j/classes/com/ibm/icu");
                     URL url = new URL(arg);
                     URLHandler handler  = URLHandler.get(url);
                     SerializableChecker checker = new SerializableChecker(path);
-                    
+
                     System.out.println("Checking classes from " + arg + ":");
                     handler.guide(checker, true, false);
                 } catch (Exception e) {
similarity index 92%
rename from icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java
rename to icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleDistanceTest.java
index cb32b1fbce5b9c47e639c9bf215ea85ee2055ffa..39388fcbf01acba79e029f3347eece31be636963 100644 (file)
@@ -12,9 +12,10 @@ import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
 import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.dev.tool.locale.LocaleDistanceBuilder;
 import com.ibm.icu.impl.locale.LocaleDistance;
-import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
 import com.ibm.icu.util.LocaleMatcher;
+import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
 import com.ibm.icu.util.Output;
 import com.ibm.icu.util.ULocale;
 
@@ -25,13 +26,13 @@ import com.ibm.icu.util.ULocale;
  * @author markdavis
  */
 @RunWith(JUnit4.class)
-public class XLocaleDistanceTest extends TestFmwk {
+public class LocaleDistanceTest extends TestFmwk {
     private static final boolean REFORMAT = false; // set to true to get a reformatted data file listed
 
     private LocaleDistance localeDistance = LocaleDistance.INSTANCE;
     DataDrivenTestHelper tfh = new MyTestFileHandler()
             .setFramework(this)
-            .load(XLocaleDistanceTest.class, "data/localeDistanceTest.txt");
+            .load(LocaleDistanceTest.class, "data/localeDistanceTest.txt");
 
     static class Arguments {
         final ULocale desired;
@@ -47,6 +48,13 @@ public class XLocaleDistanceTest extends TestFmwk {
         }
     }
 
+    @Test
+    public void testLoadedDataSameAsBuiltFromScratch() {
+        LocaleDistance.Data built = LocaleDistanceBuilder.build();
+        LocaleDistance.Data loaded = LocaleDistance.Data.load();
+        assertEquals("run LocaleDistanceBuilder and update ICU4C langInfo.txt", built, loaded);
+    }
+
     @SuppressWarnings("unused")
     @Ignore("Disabled because of Linux; need to investigate.")
     @Test
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherShim.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherShim.java
deleted file mode 100644 (file)
index ababb6d..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-/*
- *******************************************************************************
- * Copyright (C) 2015, Google, Inc., International Business Machines Corporation and         *
- * others. All Rights Reserved.                                                *
- *******************************************************************************
- */
-package com.ibm.icu.dev.test.util;
-
-import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData;
-
-/**
- * @author markdavis
- *
- */
-public class LocaleMatcherShim {
-    public static LanguageMatcherData load() {
-        // In CLDR, has different value
-        return null;
-    }
-}
index e80c7f5196e9c2a5e6616bfe5e7b40bb96b2dcd6..32c361fb0f4e30019cf4a21da34d8cfc12332a5d 100644 (file)
@@ -9,55 +9,47 @@
 
 package com.ibm.icu.dev.test.util;
 
-import java.util.Arrays;
-import java.util.LinkedHashSet;
+import java.io.BufferedReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
 
 import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.dev.tool.locale.LikelySubtagsBuilder;
+import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
+import com.ibm.icu.impl.locale.XLikelySubtags;
 import com.ibm.icu.util.LocaleMatcher;
-import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData;
+import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
 import com.ibm.icu.util.LocalePriorityList;
-import com.ibm.icu.util.Output;
 import com.ibm.icu.util.ULocale;
 
+import junitparams.JUnitParamsRunner;
+import junitparams.Parameters;
+
 /**
  * Test the LocaleMatcher.
  *
  * @author markdavis
  */
-@SuppressWarnings("deprecation")
-@RunWith(JUnit4.class)
+@RunWith(JUnitParamsRunner.class)
 public class LocaleMatcherTest extends TestFmwk {
-
-
     private static final ULocale ZH_MO = new ULocale("zh_MO");
     private static final ULocale ZH_HK = new ULocale("zh_HK");
-    static LanguageMatcherData LANGUAGE_MATCHER_DATA = LocaleMatcherShim.load();
 
     private LocaleMatcher newLocaleMatcher(LocalePriorityList build) {
-        return new LocaleMatcher(build, LANGUAGE_MATCHER_DATA);
-    }
-
-    private LocaleMatcher newLocaleMatcher(LocalePriorityList build, LanguageMatcherData data) {
-        return new LocaleMatcher(build, data == null ? LANGUAGE_MATCHER_DATA : data);
-    }
-
-    private LocaleMatcher newLocaleMatcher(LocalePriorityList lpl, LanguageMatcherData data, double d) {
-        return new LocaleMatcher(lpl, data == null ? LANGUAGE_MATCHER_DATA : data, d);
+        return new LocaleMatcher(build);
     }
 
     private LocaleMatcher newLocaleMatcher(String string) {
-        return new LocaleMatcher(LocalePriorityList.add(string).build(), LANGUAGE_MATCHER_DATA);
+        return new LocaleMatcher(LocalePriorityList.add(string).build());
     }
 
-    // public LocaleMatcher(LocalePriorityList languagePriorityList,
-    // LocaleMatcherData matcherData, double threshold)
-
     @Test
     public void testParentLocales() {
         assertCloser("es_AR", "es_419", "es_ES");
@@ -87,32 +79,6 @@ public class LocaleMatcherTest extends TestFmwk {
         assertEquals("test " + a + " is closer to " + closer + " than to " + further, new ULocale(closer), matcher.getBestMatch(a));
     }
 
-    //    public void testParentLocales() {
-    //        // find all the regions that have a closer relation because of an explicit parent
-    //        Set<String> explicitParents = new HashSet<>(INFO.getExplicitParents());
-    //        explicitParents.remove("root");
-    //        Set<String> otherParents = new HashSet<>(INFO.getExplicitParents());
-    //        for (String locale : explicitParents) {
-    //            while (true) {
-    //                locale = LocaleIDParser.getParent(locale);
-    //                if (locale == null || locale.equals("root")) {
-    //                    break;
-    //                }
-    //                otherParents.add(locale);
-    //            }
-    //        }
-    //        otherParents.remove("root");
-    //
-    //        for (String locale : CONFIG.getCldrFactory().getAvailable()) {
-    //            String parentId = LocaleIDParser.getParent(locale);
-    //            String parentIdSimple = LocaleIDParser.getSimpleParent(locale);
-    //            if (!explicitParents.contains(parentId) && !otherParents.contains(parentIdSimple)) {
-    //                continue;
-    //            }
-    //            System.out.println(locale + "\t" + CONFIG.getEnglish().getName(locale) + "\t" + parentId + "\t" + parentIdSimple);
-    //        }
-    //    }
-
     @Test
     public void testChinese() {
         LocaleMatcher matcher = newLocaleMatcher("zh_CN, zh_TW, iw");
@@ -139,31 +105,10 @@ public class LocaleMatcherTest extends TestFmwk {
     @Test
     public void testFallbacks() {
         LocalePriorityList lpl = LocalePriorityList.add("en, hi").build();
-        final LocaleMatcher matcher = newLocaleMatcher(lpl, null, 0.09);
+        final LocaleMatcher matcher = newLocaleMatcher(lpl);
         assertEquals("hi", matcher.getBestMatch("sa").toString());
     }
 
-    @Test
-    public void testOverrideData() {
-        double threshold = 0.05;
-        LanguageMatcherData localeMatcherData = new LanguageMatcherData()
-        .addDistance("br", "fr", 10, true)
-        .addDistance("es", "cy", 10, true);
-        logln(localeMatcherData.toString());
-
-        final LocaleMatcher matcher = newLocaleMatcher(
-            LocalePriorityList
-            .add(ULocale.ENGLISH)
-            .add(ULocale.FRENCH)
-            .add(ULocale.UK)
-            .build(), localeMatcherData, threshold);
-        logln(matcher.toString());
-
-        assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br")));
-        assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one
-        // way
-    }
-
     @Test
     public void testBasics() {
         final LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(ULocale.FRENCH).add(ULocale.UK)
@@ -184,7 +129,7 @@ public class LocaleMatcherTest extends TestFmwk {
         assertEquals(new ULocale("zh_CN"), matcher.getBestMatch("zh"));
         assertEquals(new ULocale("zh_CN"), matcher.getBestMatch("zh_Hans_CN"));
         assertEquals(new ULocale("zh_TW"), matcher.getBestMatch("zh_Hant_HK"));
-        assertEquals(new ULocale("he"), matcher.getBestMatch("iw_IT"));
+        assertEquals(new ULocale("iw"), matcher.getBestMatch("iw_IT"));
     }
 
     @Test
@@ -219,20 +164,8 @@ public class LocaleMatcherTest extends TestFmwk {
     @Test
     public void TestLocaleMatcherCoverage() {
         // Add tests for better code coverage
-        LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(null, 0).build(), null);
+        LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(null, 0).build());
         logln(matcher.toString());
-
-        LanguageMatcherData data = new LanguageMatcherData();
-
-        LanguageMatcherData clone = data.cloneAsThawed();
-
-        if (clone.equals(data)) {
-            errln("Error cloneAsThawed() is equal.");
-        }
-
-        if (data.isFrozen()) {
-            errln("Error LocaleMatcherData is frozen!");
-        }
     }
 
     private void assertEquals(Object expected, Object string) {
@@ -251,17 +184,19 @@ public class LocaleMatcherTest extends TestFmwk {
 
     static final ULocale ENGLISH_CANADA = new ULocale("en_CA");
 
+    private static double match(ULocale a, ULocale b) {
+        final LocaleMatcher matcher = new LocaleMatcher("");
+        return matcher.match(a, null, b, null);
+    }
+
     @Test
     public void testMatch_exact() {
-        assertEquals(1.0,
-            LocaleMatcher.match(ENGLISH_CANADA, ENGLISH_CANADA));
+        assertEquals(1.0, match(ENGLISH_CANADA, ENGLISH_CANADA));
     }
 
     @Test
     public void testMatch_none() {
-        double match = LocaleMatcher.match(
-            new ULocale("ar_MK"),
-            ENGLISH_CANADA);
+        double match = match(new ULocale("ar_MK"), ENGLISH_CANADA);
         assertTrue("Actual < 0: " + match, 0 <= match);
         assertTrue("Actual > 0.15 (~ language + script distance): " + match, 0.2 > match);
     }
@@ -270,13 +205,12 @@ public class LocaleMatcherTest extends TestFmwk {
     public void testMatch_matchOnMazimized() {
         ULocale undTw = new ULocale("und_TW");
         ULocale zhHant = new ULocale("zh_Hant");
-        double matchZh = LocaleMatcher.match(undTw, new ULocale("zh"));
-        double matchZhHant = LocaleMatcher.match(undTw, zhHant);
+        double matchZh = match(undTw, new ULocale("zh"));
+        double matchZhHant = match(undTw, zhHant);
         assertTrue("und_TW should be closer to zh_Hant (" + matchZhHant +
             ") than to zh (" + matchZh + ")",
             matchZh < matchZhHant);
-        double matchEnHantTw = LocaleMatcher.match(new ULocale("en_Hant_TW"),
-            zhHant);
+        double matchEnHantTw = match(new ULocale("en_Hant_TW"), zhHant);
         assertTrue("zh_Hant should be closer to und_TW (" + matchZhHant +
             ") than to en_Hant_TW (" + matchEnHantTw + ")",
             matchEnHantTw < matchZhHant);
@@ -397,16 +331,9 @@ public class LocaleMatcherTest extends TestFmwk {
         assertEquals("it", matcher.getBestMatch("en").toString());
     }
 
-    // public void testGetBestMatch_emptyList() {
-    // final LocaleMatcher matcher = newLocaleMatcher(
-    // new LocalePriorityList(new HashMap()));
-    // assertNull(matcher.getBestMatch(ULocale.ENGLISH));
-    // }
-
     @Test
     public void testGetBestMatch_googlePseudoLocales() {
         // Google pseudo locales are primarily based on variant subtags.
-        // See http://sites/intl_eng/pseudo_locales.
         // (See below for the region code based fall back options.)
         final LocaleMatcher matcher = newLocaleMatcher(
             "fr, pt");
@@ -475,19 +402,25 @@ public class LocaleMatcherTest extends TestFmwk {
         check2(sorted);
     }
 
+    private static final ULocale posix = new ULocale("en_US_POSIX");
+
     /**
      * @param sorted
      */
     private void check2(Set<ULocale> sorted) {
-        // TODO Auto-generated method stub
         logln("Checking: " + sorted);
         LocaleMatcher matcher = newLocaleMatcher(
             LocalePriorityList.add(
                 sorted.toArray(new ULocale[sorted.size()]))
-                .build());
+            .build());
         for (ULocale loc : sorted) {
-            String stringLoc = loc.toString();
-            assertEquals(stringLoc, matcher.getBestMatch(stringLoc).toString());
+            // The result may not be the exact same locale, but it must be equivalent.
+            // Variants and extensions are ignored.
+            if (loc.equals(posix)) { continue; }
+            ULocale max = ULocale.addLikelySubtags(loc);
+            ULocale best = matcher.getBestMatch(loc);
+            ULocale maxBest = ULocale.addLikelySubtags(best);
+            assertEquals(loc.toString(), max, maxBest);
         }
     }
 
@@ -502,29 +435,8 @@ public class LocaleMatcherTest extends TestFmwk {
     }
 
 
-    // public void testComputeDistance_monkeyTest() {
-    // RegionCode[] codes = RegionCode.values();
-    // Random random = new Random();
-    // for (int i = 0; i < 1000; ++i) {
-    // RegionCode x = codes[random.nextInt(codes.length)];
-    // RegionCode y = codes[random.nextInt(codes.length)];
-    // double d = LocaleMatcher.getRegionDistance(x, y, null, null);
-    // if (x == RegionCode.ZZ || y == RegionCode.ZZ) {
-    // assertEquals(LocaleMatcher.REGION_DISTANCE, d);
-    // } else if (x == y) {
-    // assertEquals(0.0, d);
-    // } else {
-    // assertTrue(d > 0);
-    // assertTrue(d <= LocaleMatcher.REGION_DISTANCE);
-    // }
-    // }
-    // }
-
     @Test
     public void testGetBestMatchForList_matchOnMaximized2() {
-//        if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
-//            return;
-//        }
         final LocaleMatcher matcher = newLocaleMatcher("fr, en-GB, ja, es-ES, es-MX");
         // ja-JP matches ja on likely subtags, and it's listed first, thus it wins over
         // thus it wins over the second preference en-GB.
@@ -537,9 +449,6 @@ public class LocaleMatcherTest extends TestFmwk {
 
     @Test
     public void testGetBestMatchForList_closeEnoughMatchOnMaximized() {
-//        if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
-//            return;
-//        }
         final LocaleMatcher matcher = newLocaleMatcher("en-GB, en, de, fr, ja");
         assertEquals("de", matcher.getBestMatch("de-CH, fr").toString());
         assertEquals("en", matcher.getBestMatch("en-US, ar, nl, de, ja").toString());
@@ -547,23 +456,20 @@ public class LocaleMatcherTest extends TestFmwk {
 
     @Test
     public void testGetBestMatchForPortuguese() {
-
-//        if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
-//            return;
-//        }
-
         final LocaleMatcher withPTExplicit = newLocaleMatcher("pt_PT, pt_BR, es, es_419");
         final LocaleMatcher withPTImplicit = newLocaleMatcher("pt_PT, pt, es, es_419");
         // Could happen because "pt_BR" is a tier_1 language and "pt_PT" is tier_2.
 
         final LocaleMatcher withoutPT = newLocaleMatcher("pt_BR, es, es_419");
-        // European user who prefers Spanish over Brazillian Portuguese as a fallback.
+        // European user who prefers Spanish over Brazilian Portuguese as a fallback.
 
         assertEquals("pt_PT", withPTExplicit.getBestMatch("pt_PT, es, pt").toString());
         assertEquals("pt_PT", withPTImplicit.getBestMatch("pt_PT, es, pt").toString());
-        assertEquals("es", withoutPT.getBestMatch("pt_PT, es, pt").toString());
+        // The earlier pt_PT vs. pt_BR region mismatch is as good as the later es perfect match
+        // because of the demotion per desired locale.
+        assertEquals("pt_BR", withoutPT.getBestMatch("pt_PT, es, pt").toString());
 
-        // Brazillian user who prefers South American Spanish over European Portuguese as a fallback.
+        // Brazilian user who prefers South American Spanish over European Portuguese as a fallback.
         // The asymmetry between this case and above is because it's "pt_PT" that's missing between the
         // matchers as "pt_BR" is a much more common language.
         assertEquals("pt_BR", withPTExplicit.getBestMatch("pt, es_419, pt_PT").toString());
@@ -578,9 +484,6 @@ public class LocaleMatcherTest extends TestFmwk {
 
     @Test
     public void testVariantWithScriptMatch() {
-//        if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
-//            return;
-//        }
         final LocaleMatcher matcher = newLocaleMatcher("fr, en, sv");
         assertEquals("en", matcher.getBestMatch("en-GB").toString());
         assertEquals("en", matcher.getBestMatch("en-GB, sv").toString());
@@ -588,80 +491,415 @@ public class LocaleMatcherTest extends TestFmwk {
 
     @Test
     public void testVariantWithScriptMatch2() {
-//        if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
-//            return;
-//        }
         final LocaleMatcher matcher = newLocaleMatcher("en, sv");
         assertEquals("en", matcher.getBestMatch("en-GB, sv").toString());
     }
 
     @Test
-    public void testPerf() {
-        if (LANGUAGE_MATCHER_DATA == null) {
-            return; // skip except when testing data
+    public void Test8288() {
+        final LocaleMatcher matcher = newLocaleMatcher("it, en");
+        assertEquals("it", matcher.getBestMatch("und").toString());
+        assertEquals("en", matcher.getBestMatch("und, en").toString());
+    }
+
+    @Test
+    public void testDemotion() {
+        LocalePriorityList supported = LocalePriorityList.add("fr, de-CH, it").build();
+        LocalePriorityList desired = LocalePriorityList.add("fr-CH, de-CH, it").build();
+        LocaleMatcher noDemotion = LocaleMatcher.builder().
+                setSupportedULocales(supported.getULocales()).
+                setDemotionPerDesiredLocale(LocaleMatcher.Demotion.NONE).build();
+        assertEquals("no demotion", new ULocale("de-CH"), noDemotion.getBestMatch(desired));
+
+        LocaleMatcher regionDemotion = LocaleMatcher.builder().
+                setSupportedULocales(supported.getULocales()).
+                setDemotionPerDesiredLocale(LocaleMatcher.Demotion.REGION).build();
+        assertEquals("region demotion", ULocale.FRENCH, regionDemotion.getBestMatch(desired));
+    }
+
+    private static final class PerfCase {
+        ULocale desired;
+        ULocale expectedShort;
+        ULocale expectedLong;
+        ULocale expectedVeryLong;
+
+        PerfCase(String des, String expShort, String expLong, String expVeryLong) {
+            desired = new ULocale(des);
+            expectedShort = new ULocale(expShort);
+            expectedLong = new ULocale(expLong);
+            expectedVeryLong = new ULocale(expVeryLong);
         }
-        final String desired = "sv, en";
-
-        final LocaleMatcher matcherShort = newLocaleMatcher(desired);
-        final LocaleMatcher matcherLong = newLocaleMatcher("af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu");
-        final LocaleMatcher matcherVeryLong = newLocaleMatcher("af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_001, en_150, en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA");
-
-        //LocaleMatcher.DEBUG = true;
-        ULocale expected = new ULocale("sv");
-        assertEquals(expected, matcherShort.getBestMatch(desired));
-        assertEquals(expected, matcherLong.getBestMatch(desired));
-        assertEquals(expected, matcherVeryLong.getBestMatch(desired));
-        //LocaleMatcher.DEBUG = false;
-
-        for (int i = 0; i < 2; ++i) {
-            int iterations = i == 0 ? 1000 : 100000;
-            boolean showMessage = i != 0;
-            long timeShort = timeLocaleMatcher("Duration (few  supported):\t", desired, matcherShort, showMessage, iterations, 0);
-            @SuppressWarnings("unused")
-            long timeMedium = timeLocaleMatcher("Duration (med. supported):\t", desired, matcherLong, showMessage, iterations, timeShort);
-            @SuppressWarnings("unused")
-            long timeLong = timeLocaleMatcher("Duration (many supported):\t", desired, matcherVeryLong, showMessage, iterations, timeShort);
+    }
+
+    private static final int WARM_UP_ITERATIONS = 1000;
+    private static final int BENCHMARK_ITERATIONS = 20000;
+
+    @Test
+    public void testPerf() {
+        final String shortList = "en, sv";
+        final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
+                "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
+                "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
+                "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
+                "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
+                "zh-CN, zh-TW, zu";
+        final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, " +
+                "ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, " +
+                "ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, " +
+                "ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, " +
+                "ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, " +
+                "bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, " +
+                "bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, " +
+                "bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, " +
+                "ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, " +
+                "cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, " +
+                "de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, " +
+                // removed en_001 to avoid exact match
+                "ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_150, " +
+                "en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, " +
+                "en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, " +
+                "en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, " +
+                "en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, " +
+                "en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, " +
+                "en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, " +
+                "en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, " +
+                "en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, " +
+                "en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, " +
+                "en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, " +
+                "es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, " +
+                "es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, " +
+                "et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, " +
+                "ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, " +
+                "fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, " +
+                "fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, " +
+                "fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, " +
+                "fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, " +
+                "fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, " +
+                "gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, " +
+                "he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, " +
+                "id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, " +
+                "jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, " +
+                "kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, " +
+                "kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, " +
+                "ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, " +
+                "lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, " +
+                "lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, " +
+                "lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, " +
+                "mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, " +
+                "ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, " +
+                "nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, " +
+                "nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, " +
+                "nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, " +
+                "pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, " +
+                "pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, " +
+                "rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, " +
+                "ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, " +
+                "sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, " +
+                "shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, " +
+                "smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, " +
+                "sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, " +
+                "sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, " +
+                "sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, " +
+                "teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, " +
+                "tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, " +
+                "uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, " +
+                "vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, " +
+                "wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, " +
+                "zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, " +
+                "zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
+
+        final LocaleMatcher matcherShort = newLocaleMatcher(shortList);
+        final LocaleMatcher matcherLong = newLocaleMatcher(longList);
+        final LocaleMatcher matcherVeryLong = newLocaleMatcher(veryLongList);
+
+        PerfCase[] pcs = new PerfCase[] {
+                // Exact match in all matchers.
+                new PerfCase("sv", "sv", "sv", "sv"),
+                // Common locale, exact match only in very long list.
+                new PerfCase("fr_CA", "en", "fr", "fr_CA"),
+                // Unusual locale, no exact match.
+                new PerfCase("de_CA", "en", "de", "de"),
+                // World English maps to several region partitions.
+                new PerfCase("en_001", "en", "en", "en"),
+                // Ancient language with interesting subtags.
+                new PerfCase("egy_Copt_CY", "en", "af", "af")
+        };
+
+        for (PerfCase pc : pcs) {
+            final ULocale desired = pc.desired;
+
+            assertEquals(desired.toString(), pc.expectedShort, matcherShort.getBestMatch(desired));
+            assertEquals(desired.toString(), pc.expectedLong, matcherLong.getBestMatch(desired));
+            assertEquals(desired.toString(), pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired));
+
+            timeLocaleMatcher(desired, matcherShort, WARM_UP_ITERATIONS);
+            timeLocaleMatcher(desired, matcherLong, WARM_UP_ITERATIONS);
+            timeLocaleMatcher(desired, matcherVeryLong, WARM_UP_ITERATIONS);
+            long tns = timeLocaleMatcher(desired, matcherShort, BENCHMARK_ITERATIONS);
+            System.out.format("New Duration (few  supported):\t%s\t%d\tnanos\n", desired, tns);
+            long tnl = timeLocaleMatcher(desired, matcherLong, BENCHMARK_ITERATIONS);
+            System.out.format("New Duration (med. supported):\t%s\t%d\tnanos\n", desired, tnl);
+            long tnv = timeLocaleMatcher(desired, matcherVeryLong, BENCHMARK_ITERATIONS);
+            System.out.format("New Duration (many supported):\t%s\t%d\tnanos\n", desired, tnv);
         }
+
+        maximizePerf();
     }
 
-    private long timeLocaleMatcher(String title, String desired, LocaleMatcher matcher,
-        boolean showmessage, int iterations, long comparisonTime) {
+    private static long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) {
         long start = System.nanoTime();
         for (int i = iterations; i > 0; --i) {
             matcher.getBestMatch(desired);
         }
         long delta = System.nanoTime() - start;
-        if (showmessage) warnln(title + (delta / iterations) + " nanos, "
-            + (comparisonTime > 0 ? (delta * 100 / comparisonTime - 100) + "% longer" : ""));
-        return delta;
+        return (delta / iterations);
+    }
+
+    private void maximizePerf() {
+        final String tags = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
+                "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
+                "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
+                "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
+                "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
+                "zh-CN, zh-TW, zu";
+        LocalePriorityList list = LocalePriorityList.add(tags).build();
+        int few = 1000;
+        long t = timeMaximize(list, few);  // warm up
+        t = timeMaximize(list, few);  // measure for scale
+        long targetTime = 100000000L;  // 10^8 ns = 0.1s
+        int iterations = (int)((targetTime * few) / t);
+        t = timeMaximize(list, iterations);
+        int length = 0;
+        for (@SuppressWarnings("unused") ULocale locale : list) { ++length; }
+        System.out.println("maximize: " + (t / iterations / length) + " ns/locale: " +
+                t + " ns / " + iterations + " iterations / " + length + " locales");
+    }
+
+    // returns total ns not per iteration
+    private  static long timeMaximize(Iterable<ULocale> list, int iterations) {
+        long start = System.nanoTime();
+        for (int i = iterations; i > 0; --i) {
+            for (ULocale locale : list) {
+                XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
+            }
+        }
+        return System.nanoTime() - start;
     }
 
     @Test
-    public void Test8288() {
-        final LocaleMatcher matcher = newLocaleMatcher("it, en");
-        assertEquals("it", matcher.getBestMatch("und").toString());
-        assertEquals("en", matcher.getBestMatch("und, en").toString());
+    public void testLikelySubtagsLoadedDataSameAsBuiltFromScratch() {
+        XLikelySubtags.Data built = LikelySubtagsBuilder.build();
+        XLikelySubtags.Data loaded = XLikelySubtags.Data.load();
+        assertEquals("run LocaleDistanceBuilder and update ICU4C langInfo.txt", built, loaded);
     }
 
-    @Test
-    public void TestTechPreview() {
-        final LocaleMatcher matcher = newLocaleMatcher("it, en, ru");
-        ULocale und = new ULocale("und");
-        ULocale bulgarian = new ULocale("bg");
-        ULocale russian = new ULocale("ru");
+    private static final class TestCase implements Cloneable {
+        private static final String ENDL = System.getProperties().getProperty("line.separator");
+
+        int lineNr = 0;
+
+        String nameLine = "";
+        String supportedLine = "";
+        String defaultLine = "";
+        String distanceLine = "";
+        String thresholdLine = "";
+        String matchLine = "";
+
+        String supported = "";
+        String def = "";
+        String favor = "";
+        String threshold = "";
+        String desired = "";
+        String expMatch = "";
+        String expDesired = "";
+        String expCombined = "";
+
+        @Override
+        public TestCase clone() throws CloneNotSupportedException {
+            return (TestCase) super.clone();
+        }
+
+        void reset(String newNameLine) {
+            nameLine = newNameLine;
+            supportedLine = "";
+            defaultLine = "";
+            distanceLine = "";
+            thresholdLine = "";
+
+            supported = "";
+            def = "";
+            favor = "";
+            threshold = "";
+        }
+
+        String toInputsKey() {
+            return supported + '+' + def + '+' + favor + '+' + threshold + '+' + desired;
+        }
+
+        private static void appendLine(StringBuilder sb, String line) {
+            if (!line.isEmpty()) {
+                sb.append(ENDL).append(line);
+            }
+        }
+
+        @Override
+        public String toString() {
+            StringBuilder sb = new StringBuilder(nameLine);
+            appendLine(sb, supportedLine);
+            appendLine(sb, defaultLine);
+            appendLine(sb, distanceLine);
+            appendLine(sb, thresholdLine);
+            sb.append(ENDL).append("line ").append(lineNr).append(':');
+            appendLine(sb, matchLine);
+            return sb.toString();
+        }
+    }
 
-        Output<ULocale> outputBestDesired = new Output<>();
+    private static String getSuffixAfterPrefix(String s, int limit, String prefix) {
+        if (prefix.length() <= limit && s.startsWith(prefix)) {
+            return s.substring(prefix.length(), limit);
+        } else {
+            return null;
+        }
+    }
+
+    // UsedReflectively, not private to avoid unused-warning
+    static List<TestCase> readTestCases() throws Exception {
+        List<TestCase> tests = new ArrayList<>();
+        Map<String, Integer> uniqueTests = new HashMap<>();
+        TestCase test = new TestCase();
+        String filename = "data/localeMatcherTest.txt";
+        try (BufferedReader in = FileUtilities.openFile(LocaleMatcherTest.class, filename)) {
+            String line;
+            while ((line = in.readLine()) != null) {
+                ++test.lineNr;
+                // Start of comment, or end of line, minus trailing spaces.
+                int limit = line.indexOf('#');
+                if (limit < 0) {
+                    limit = line.length();
+                }
+                char c;
+                while (limit > 0 && ((c = line.charAt(limit - 1)) == ' ' || c == '\t')) {
+                    --limit;
+                }
+                if (limit == 0) {  // empty line
+                    continue;
+                }
+                String suffix;
+                if (line.startsWith("** test: ")) {
+                    test.reset(line);
+                } else if ((suffix = getSuffixAfterPrefix(line, limit, "@supported=")) != null) {
+                    test.supportedLine = line;
+                    test.supported = suffix;
+                } else if ((suffix = getSuffixAfterPrefix(line, limit, "@default=")) != null) {
+                    test.defaultLine = line;
+                    test.def = suffix;
+                } else if ((suffix = getSuffixAfterPrefix(line, limit, "@favor=")) != null) {
+                    test.distanceLine = line;
+                    test.favor = suffix;
+                } else if ((suffix = getSuffixAfterPrefix(line, limit, "@threshold=")) != null) {
+                    test.thresholdLine = line;
+                    test.threshold = suffix;
+                } else {
+                    int matchSep = line.indexOf(">>");
+                    // >> before an inline comment, and followed by more than white space.
+                    if (0 <= matchSep && (matchSep + 2) < limit) {
+                        test.matchLine = line;
+                        test.desired = line.substring(0, matchSep).trim();
+                        test.expDesired = test.expCombined = "";
+                        int start = matchSep + 2;
+                        int expLimit = line.indexOf('|', start);
+                        if (expLimit < 0) {
+                            test.expMatch = line.substring(start, limit).trim();
+                        } else {
+                            test.expMatch = line.substring(start, expLimit).trim();
+                            start = expLimit + 1;
+                            expLimit = line.indexOf('|', start);
+                            if (expLimit < 0) {
+                                test.expDesired = line.substring(start, limit).trim();
+                            } else {
+                                test.expDesired = line.substring(start, expLimit).trim();
+                                test.expCombined = line.substring(expLimit + 1, limit).trim();
+                            }
+                        }
+                        String inputs = test.toInputsKey();
+                        Integer prevIndex = uniqueTests.get(inputs);
+                        if (prevIndex == null) {
+                            uniqueTests.put(inputs, tests.size());
+                        } else {
+                            System.out.println("Locale matcher test case on line " + test.lineNr
+                                    + " is a duplicate of line " + tests.get(prevIndex).lineNr);
+                        }
+                        tests.add(test.clone());
+                    } else {
+                        throw new IllegalArgumentException("test data syntax error on line "
+                                + test.lineNr + "\n" + line);
+                    }
+                }
+            }
+        }
+        System.out.println("Number of duplicate locale matcher test cases: " + (tests.size() - uniqueTests.size()));
+        return tests;
+    }
 
-        ULocale best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired);
-        assertEquals(ULocale.ITALIAN, best);
-        assertEquals(null, outputBestDesired.value);
+    private static ULocale getULocaleOrNull(String s) {
+        if (s.equals("null")) {
+            return null;
+        } else {
+            return new ULocale(s);
+        }
+    }
 
-        matcher.setDefaultLanguage(ULocale.JAPANESE);
-        best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired);
-        assertEquals(ULocale.JAPANESE, best);
+    @Test
+    @Parameters(method = "readTestCases")
+    public void dataDriven(TestCase test) {
+        LocaleMatcher matcher;
+        if (test.def.isEmpty() && test.favor.isEmpty() && test.threshold.isEmpty()) {
+            matcher = new LocaleMatcher(test.supported);
+        } else {
+            LocaleMatcher.Builder builder = LocaleMatcher.builder();
+            builder.setSupportedLocales(test.supported);
+            if (!test.def.isEmpty()) {
+                builder.setDefaultULocale(new ULocale(test.def));
+            }
+            if (!test.favor.isEmpty()) {
+                FavorSubtag favor;
+                switch (test.favor) {
+                case "normal":
+                    favor = FavorSubtag.LANGUAGE;
+                    break;
+                case "script":
+                    favor = FavorSubtag.SCRIPT;
+                    break;
+                default:
+                    throw new IllegalArgumentException("unsupported FavorSubtag value " + test.favor);
+                }
+                builder.setFavorSubtag(favor);
+            }
+            if (!test.threshold.isEmpty()) {
+                int threshold = Integer.valueOf(test.threshold);
+                builder.internalSetThresholdDistance(threshold);
+            }
+            matcher = builder.build();
+        }
 
-        matcher.setFavorScript(true);
-        best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, bulgarian)), outputBestDesired);
-        assertEquals(russian, best);
+        ULocale expMatch = getULocaleOrNull(test.expMatch);
+        if (test.expDesired.isEmpty() && test.expCombined.isEmpty()) {
+            ULocale bestSupported = matcher.getBestMatch(test.desired);
+            assertEquals("bestSupported", expMatch, bestSupported);
+        } else {
+            LocalePriorityList desired = LocalePriorityList.add(test.desired).build();
+            LocaleMatcher.Result result = matcher.getBestMatchResult(desired);
+            assertEquals("bestSupported", expMatch, result.getSupportedULocale());
+            if (!test.expDesired.isEmpty()) {
+                ULocale expDesired = getULocaleOrNull(test.expDesired);
+                assertEquals("bestDesired", expDesired, result.getDesiredULocale());
+            }
+            if (!test.expCombined.isEmpty()) {
+                ULocale expCombined = getULocaleOrNull(test.expCombined);
+                ULocale combined = result.makeServiceULocale();
+                assertEquals("combined", expCombined, combined);
+            }
+        }
     }
 }
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java
deleted file mode 100644 (file)
index f06e8be..0000000
+++ /dev/null
@@ -1,612 +0,0 @@
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.dev.test.util;
-
-import java.io.BufferedReader;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeSet;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-
-import com.ibm.icu.dev.test.TestFmwk;
-import com.ibm.icu.impl.locale.LocaleDistance;
-import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
-import com.ibm.icu.impl.locale.XLikelySubtags;
-import com.ibm.icu.impl.locale.XLocaleMatcher;
-import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
-import com.ibm.icu.util.LocaleMatcher;
-import com.ibm.icu.util.LocalePriorityList;
-import com.ibm.icu.util.ULocale;
-
-import junitparams.JUnitParamsRunner;
-import junitparams.Parameters;
-
-/**
- * Test the XLocaleMatcher.
- *
- * @author markdavis
- */
-@RunWith(JUnitParamsRunner.class)
-public class XLocaleMatcherTest extends TestFmwk {
-    private static final int REGION_DISTANCE = 4;
-
-    private static final LocaleDistance LANGUAGE_MATCHER_DATA = LocaleDistance.INSTANCE;
-
-    private XLocaleMatcher newXLocaleMatcher() {
-        return new XLocaleMatcher("");
-    }
-
-    private XLocaleMatcher newXLocaleMatcher(LocalePriorityList build) {
-        return new XLocaleMatcher(build);
-    }
-
-    private XLocaleMatcher newXLocaleMatcher(String string) {
-        return new XLocaleMatcher(LocalePriorityList.add(string).build());
-    }
-
-    @SuppressWarnings("unused")
-    private XLocaleMatcher newXLocaleMatcher(LocalePriorityList list, int d) {
-        return XLocaleMatcher.builder().setSupportedULocales(list.getULocales()).
-                internalSetThresholdDistance(d).build();
-    }
-
-    //    public void testParentLocales() {
-    //        // find all the regions that have a closer relation because of an explicit parent
-    //        Set<String> explicitParents = new HashSet<>(INFO.getExplicitParents());
-    //        explicitParents.remove("root");
-    //        Set<String> otherParents = new HashSet<>(INFO.getExplicitParents());
-    //        for (String locale : explicitParents) {
-    //            while (true) {
-    //                locale = LocaleIDParser.getParent(locale);
-    //                if (locale == null || locale.equals("root")) {
-    //                    break;
-    //                }
-    //                otherParents.add(locale);
-    //            }
-    //        }
-    //        otherParents.remove("root");
-    //
-    //        for (String locale : CONFIG.getCldrFactory().getAvailable()) {
-    //            String parentId = LocaleIDParser.getParent(locale);
-    //            String parentIdSimple = LocaleIDParser.getSimpleParent(locale);
-    //            if (!explicitParents.contains(parentId) && !otherParents.contains(parentIdSimple)) {
-    //                continue;
-    //            }
-    //            System.out.println(locale + "\t" + CONFIG.getEnglish().getName(locale) + "\t" + parentId + "\t" + parentIdSimple);
-    //        }
-    //    }
-
-
-// TBD reenable with override data
-//    public void testOverrideData() {
-//        double threshold = 0.05;
-//        XLocaleDistance XLocaleMatcherData = new XLocaleDistance()
-//        .addDistance("br", "fr", 10, true)
-//        .addDistance("es", "cy", 10, true);
-//        logln(XLocaleMatcherData.toString());
-//
-//        final XLocaleMatcher matcher = newXLocaleMatcher(
-//            LocalePriorityList
-//            .add(ULocale.ENGLISH)
-//            .add(ULocale.FRENCH)
-//            .add(ULocale.UK)
-//            .build(), XLocaleMatcherData, threshold);
-//        logln(matcher.toString());
-//
-//        assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br")));
-//        assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one
-//        // way
-//    }
-
-
-    /**
-     * If all the base languages are the same, then each sublocale matches
-     * itself most closely
-     */
-    @Test
-    public void testExactMatches() {
-        String lastBase = "";
-        TreeSet<ULocale> sorted = new TreeSet<>();
-        for (ULocale loc : ULocale.getAvailableLocales()) {
-            String language = loc.getLanguage();
-            if (!lastBase.equals(language)) {
-                check(sorted);
-                sorted.clear();
-                lastBase = language;
-            }
-            sorted.add(loc);
-        }
-        check(sorted);
-    }
-
-    private void check(Set<ULocale> sorted) {
-        if (sorted.isEmpty()) {
-            return;
-        }
-        check2(sorted);
-        ULocale first = sorted.iterator().next();
-        ULocale max = ULocale.addLikelySubtags(first);
-        sorted.add(max);
-        check2(sorted);
-    }
-
-    private static final ULocale posix = new ULocale("en_US_POSIX");
-
-    /**
-     * @param sorted
-     */
-    private void check2(Set<ULocale> sorted) {
-        logln("Checking: " + sorted);
-        XLocaleMatcher matcher = newXLocaleMatcher(
-            LocalePriorityList.add(
-                sorted.toArray(new ULocale[sorted.size()]))
-            .build());
-        for (ULocale loc : sorted) {
-            // The result may not be the exact same locale, but it must be equivalent.
-            // Variants and extensions are ignored.
-            if (loc.equals(posix)) { continue; }
-            ULocale max = ULocale.addLikelySubtags(loc);
-            ULocale best = matcher.getBestMatch(loc);
-            ULocale maxBest = ULocale.addLikelySubtags(best);
-            assertEquals(loc.toString(), max, maxBest);
-        }
-    }
-
-    @Test
-    public void testDemotion() {
-        LocalePriorityList supported = LocalePriorityList.add("fr, de-CH, it").build();
-        LocalePriorityList desired = LocalePriorityList.add("fr-CH, de-CH, it").build();
-        XLocaleMatcher noDemotion = XLocaleMatcher.builder().
-                setSupportedULocales(supported.getULocales()).
-                setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.NONE).build();
-        assertEquals("no demotion", new ULocale("de-CH"), noDemotion.getBestMatch(desired));
-
-        XLocaleMatcher regionDemotion = XLocaleMatcher.builder().
-                setSupportedULocales(supported.getULocales()).
-                setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.REGION).build();
-        assertEquals("region demotion", ULocale.FRENCH, regionDemotion.getBestMatch(desired));
-    }
-
-    private static final class PerfCase {
-        ULocale desired;
-        ULocale expectedShort;
-        ULocale expectedLong;
-        ULocale expectedVeryLong;
-
-        PerfCase(String des, String expShort, String expLong, String expVeryLong) {
-            desired = new ULocale(des);
-            expectedShort = new ULocale(expShort);
-            expectedLong = new ULocale(expLong);
-            expectedVeryLong = new ULocale(expVeryLong);
-        }
-    }
-
-    private static final int WARM_UP_ITERATIONS = 1000;
-    private static final int BENCHMARK_ITERATIONS = 20000;
-    private static final int AVG_PCT_MEDIUM_NEW_OLD = 33;
-    private static final int AVG_PCT_LONG_NEW_OLD = 80;
-
-    @Test
-    public void testPerf() {
-        if (LANGUAGE_MATCHER_DATA == null) {
-            return; // skip except when testing data
-        }
-
-        final String shortList = "en, sv";
-        final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
-                "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
-                "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
-                "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
-                "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
-                "zh-CN, zh-TW, zu";
-        final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, " +
-                "ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, " +
-                "ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, " +
-                "ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, " +
-                "ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, " +
-                "bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, " +
-                "bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, " +
-                "bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, " +
-                "ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, " +
-                "cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, " +
-                "de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, " +
-                // removed en_001 to avoid exact match
-                "ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_150, " +
-                "en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, " +
-                "en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, " +
-                "en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, " +
-                "en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, " +
-                "en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, " +
-                "en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, " +
-                "en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, " +
-                "en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, " +
-                "en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, " +
-                "en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, " +
-                "es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, " +
-                "es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, " +
-                "et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, " +
-                "ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, " +
-                "fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, " +
-                "fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, " +
-                "fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, " +
-                "fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, " +
-                "fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, " +
-                "gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, " +
-                "he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, " +
-                "id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, " +
-                "jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, " +
-                "kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, " +
-                "kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, " +
-                "ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, " +
-                "lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, " +
-                "lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, " +
-                "lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, " +
-                "mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, " +
-                "ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, " +
-                "nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, " +
-                "nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, " +
-                "nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, " +
-                "pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, " +
-                "pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, " +
-                "rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, " +
-                "ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, " +
-                "sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, " +
-                "shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, " +
-                "smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, " +
-                "sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, " +
-                "sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, " +
-                "sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, " +
-                "teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, " +
-                "tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, " +
-                "uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, " +
-                "vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, " +
-                "wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, " +
-                "zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, " +
-                "zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
-
-        final XLocaleMatcher matcherShort = newXLocaleMatcher(shortList);
-        final XLocaleMatcher matcherLong = newXLocaleMatcher(longList);
-        final XLocaleMatcher matcherVeryLong = newXLocaleMatcher(veryLongList);
-
-        final LocaleMatcher matcherShortOld = new LocaleMatcher(shortList);
-        final LocaleMatcher matcherLongOld = new LocaleMatcher(longList);
-        final LocaleMatcher matcherVeryLongOld = new LocaleMatcher(veryLongList);
-
-        long timeShortNew=0;
-        long timeMediumNew=0;
-        long timeLongNew=0;
-
-        long timeShortOld=0;
-        long timeMediumOld=0;
-        long timeLongOld=0;
-
-        PerfCase[] pcs = new PerfCase[] {
-                // Exact match in all matchers.
-                new PerfCase("sv", "sv", "sv", "sv"),
-                // Common locale, exact match only in very long list.
-                new PerfCase("fr_CA", "en", "fr", "fr_CA"),
-                // Unusual locale, no exact match.
-                new PerfCase("de_CA", "en", "de", "de"),
-                // World English maps to several region partitions.
-                new PerfCase("en_001", "en", "en", "en"),
-                // Ancient language with interesting subtags.
-                new PerfCase("egy_Copt_CY", "en", "af", "af")
-        };
-
-        for (PerfCase pc : pcs) {
-            final ULocale desired = pc.desired;
-
-            assertEquals(desired.toString(), pc.expectedShort, matcherShort.getBestMatch(desired));
-            assertEquals(desired.toString(), pc.expectedLong, matcherLong.getBestMatch(desired));
-            assertEquals(desired.toString(), pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired));
-
-            timeXLocaleMatcher(desired, matcherShort, WARM_UP_ITERATIONS);
-            timeXLocaleMatcher(desired, matcherLong, WARM_UP_ITERATIONS);
-            timeXLocaleMatcher(desired, matcherVeryLong, WARM_UP_ITERATIONS);
-            long tns = timeXLocaleMatcher(desired, matcherShort, BENCHMARK_ITERATIONS);
-            System.out.format("New Duration (few  supported):\t%s\t%d\tnanos\n", desired, tns);
-            timeShortNew += tns;
-            long tnl = timeXLocaleMatcher(desired, matcherLong, BENCHMARK_ITERATIONS);
-            System.out.format("New Duration (med. supported):\t%s\t%d\tnanos\n", desired, tnl);
-            timeMediumNew += tnl;
-            long tnv = timeXLocaleMatcher(desired, matcherVeryLong, BENCHMARK_ITERATIONS);
-            System.out.format("New Duration (many supported):\t%s\t%d\tnanos\n", desired, tnv);
-            timeLongNew += tnv;
-
-            timeLocaleMatcher(desired, matcherShortOld, WARM_UP_ITERATIONS);
-            timeLocaleMatcher(desired, matcherLongOld, WARM_UP_ITERATIONS);
-            timeLocaleMatcher(desired, matcherVeryLongOld, WARM_UP_ITERATIONS);
-            long tos = timeLocaleMatcher(desired, matcherShortOld, BENCHMARK_ITERATIONS);
-            System.out.format("Old Duration (few  supported):\t%s\t%d\tnanos  new/old=%d%%\n",
-                    desired, tos, (100 * tns) / tos);
-            timeShortOld += tos;
-            long tol = timeLocaleMatcher(desired, matcherLongOld, BENCHMARK_ITERATIONS);
-            System.out.format("Old Duration (med. supported):\t%s\t%d\tnanos  new/old=%d%%\n",
-                    desired, tol, (100 * tnl) / tol);
-            timeMediumOld += tol;
-            long tov = timeLocaleMatcher(desired, matcherVeryLongOld, BENCHMARK_ITERATIONS);
-            System.out.format("Old Duration (many supported):\t%s\t%d\tnanos  new/old=%d%%\n",
-                    desired, tov, (100 * tnv) / tov);
-            timeLongOld += tov;
-        }
-
-        assertTrue(
-                String.format("timeShortNew=%d < %d%% of timeShortOld=%d",
-                        timeShortNew, AVG_PCT_MEDIUM_NEW_OLD, timeShortOld),
-                timeShortNew * 100 < timeShortOld * AVG_PCT_MEDIUM_NEW_OLD);
-        assertTrue(
-                String.format("timeMediumNew=%d < %d%% of timeMediumOld=%d",
-                        timeMediumNew, AVG_PCT_MEDIUM_NEW_OLD, timeMediumOld),
-                timeMediumNew * 100 < timeMediumOld * AVG_PCT_MEDIUM_NEW_OLD);
-        assertTrue(
-                String.format("timeLongNew=%d < %d%% of timeLongOld=%d",
-                        timeLongNew, AVG_PCT_LONG_NEW_OLD, timeLongOld),
-                timeLongNew * 100 < timeLongOld * AVG_PCT_LONG_NEW_OLD);
-
-        maximizePerf();
-    }
-
-    private static long timeXLocaleMatcher(ULocale desired, XLocaleMatcher matcher, int iterations) {
-        long start = System.nanoTime();
-        for (int i = iterations; i > 0; --i) {
-            matcher.getBestMatch(desired);
-        }
-        long delta = System.nanoTime() - start;
-        return (delta / iterations);
-    }
-
-    private static long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) {
-        long start = System.nanoTime();
-        for (int i = iterations; i > 0; --i) {
-            matcher.getBestMatch(desired);
-        }
-        long delta = System.nanoTime() - start;
-        return (delta / iterations);
-    }
-
-    private void maximizePerf() {
-        final String tags = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
-                "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
-                "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
-                "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
-                "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
-                "zh-CN, zh-TW, zu";
-        LocalePriorityList list = LocalePriorityList.add(tags).build();
-        int few = 1000;
-        long t = timeMaximize(list, few);  // warm up
-        t = timeMaximize(list, few);  // measure for scale
-        long targetTime = 100000000L;  // 10^8 ns = 0.1s
-        int iterations = (int)((targetTime * few) / t);
-        t = timeMaximize(list, iterations);
-        int length = 0;
-        for (@SuppressWarnings("unused") ULocale locale : list) { ++length; }
-        System.out.println("maximize: " + (t / iterations / length) + " ns/locale: " +
-                t + " ns / " + iterations + " iterations / " + length + " locales");
-    }
-
-    // returns total ns not per iteration
-    private  static long timeMaximize(Iterable<ULocale> list, int iterations) {
-        long start = System.nanoTime();
-        for (int i = iterations; i > 0; --i) {
-            for (ULocale locale : list) {
-                XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
-            }
-        }
-        return System.nanoTime() - start;
-    }
-
-    private static final class TestCase implements Cloneable {
-        private static final String ENDL = System.getProperties().getProperty("line.separator");
-
-        int lineNr = 0;
-
-        String nameLine = "";
-        String supportedLine = "";
-        String defaultLine = "";
-        String distanceLine = "";
-        String thresholdLine = "";
-        String matchLine = "";
-
-        String supported = "";
-        String def = "";
-        String favor = "";
-        String threshold = "";
-        String desired = "";
-        String expMatch = "";
-        String expDesired = "";
-        String expCombined = "";
-
-        @Override
-        public TestCase clone() throws CloneNotSupportedException {
-            return (TestCase) super.clone();
-        }
-
-        void reset(String newNameLine) {
-            nameLine = newNameLine;
-            supportedLine = "";
-            defaultLine = "";
-            distanceLine = "";
-            thresholdLine = "";
-
-            supported = "";
-            def = "";
-            favor = "";
-            threshold = "";
-        }
-
-        String toInputsKey() {
-            return supported + '+' + def + '+' + favor + '+' + threshold + '+' + desired;
-        }
-
-        private static void appendLine(StringBuilder sb, String line) {
-            if (!line.isEmpty()) {
-                sb.append(ENDL).append(line);
-            }
-        }
-
-        @Override
-        public String toString() {
-            StringBuilder sb = new StringBuilder(nameLine);
-            appendLine(sb, supportedLine);
-            appendLine(sb, defaultLine);
-            appendLine(sb, distanceLine);
-            appendLine(sb, thresholdLine);
-            sb.append(ENDL).append("line ").append(lineNr).append(':');
-            appendLine(sb, matchLine);
-            return sb.toString();
-        }
-    }
-
-    private static String getSuffixAfterPrefix(String s, int limit, String prefix) {
-        if (prefix.length() <= limit && s.startsWith(prefix)) {
-            return s.substring(prefix.length(), limit);
-        } else {
-            return null;
-        }
-    }
-
-    // UsedReflectively, not private to avoid unused-warning
-    static List<TestCase> readTestCases() throws Exception {
-        List<TestCase> tests = new ArrayList<>();
-        Map<String, Integer> uniqueTests = new HashMap<>();
-        TestCase test = new TestCase();
-        String filename = "data/localeMatcherTest.txt";
-        try (BufferedReader in = FileUtilities.openFile(XLocaleMatcherTest.class, filename)) {
-            String line;
-            while ((line = in.readLine()) != null) {
-                ++test.lineNr;
-                // Start of comment, or end of line, minus trailing spaces.
-                int limit = line.indexOf('#');
-                if (limit < 0) {
-                    limit = line.length();
-                }
-                char c;
-                while (limit > 0 && ((c = line.charAt(limit - 1)) == ' ' || c == '\t')) {
-                    --limit;
-                }
-                if (limit == 0) {  // empty line
-                    continue;
-                }
-                String suffix;
-                if (line.startsWith("** test: ")) {
-                    test.reset(line);
-                } else if ((suffix = getSuffixAfterPrefix(line, limit, "@supported=")) != null) {
-                    test.supportedLine = line;
-                    test.supported = suffix;
-                } else if ((suffix = getSuffixAfterPrefix(line, limit, "@default=")) != null) {
-                    test.defaultLine = line;
-                    test.def = suffix;
-                } else if ((suffix = getSuffixAfterPrefix(line, limit, "@favor=")) != null) {
-                    test.distanceLine = line;
-                    test.favor = suffix;
-                } else if ((suffix = getSuffixAfterPrefix(line, limit, "@threshold=")) != null) {
-                    test.thresholdLine = line;
-                    test.threshold = suffix;
-                } else {
-                    int matchSep = line.indexOf(">>");
-                    // >> before an inline comment, and followed by more than white space.
-                    if (0 <= matchSep && (matchSep + 2) < limit) {
-                        test.matchLine = line;
-                        test.desired = line.substring(0, matchSep).trim();
-                        test.expDesired = test.expCombined = "";
-                        int start = matchSep + 2;
-                        int expLimit = line.indexOf('|', start);
-                        if (expLimit < 0) {
-                            test.expMatch = line.substring(start, limit).trim();
-                        } else {
-                            test.expMatch = line.substring(start, expLimit).trim();
-                            start = expLimit + 1;
-                            expLimit = line.indexOf('|', start);
-                            if (expLimit < 0) {
-                                test.expDesired = line.substring(start, limit).trim();
-                            } else {
-                                test.expDesired = line.substring(start, expLimit).trim();
-                                test.expCombined = line.substring(expLimit + 1, limit).trim();
-                            }
-                        }
-                        String inputs = test.toInputsKey();
-                        Integer prevIndex = uniqueTests.get(inputs);
-                        if (prevIndex == null) {
-                            uniqueTests.put(inputs, tests.size());
-                        } else {
-                            System.out.println("Locale matcher test case on line " + test.lineNr
-                                    + " is a duplicate of line " + tests.get(prevIndex).lineNr);
-                        }
-                        tests.add(test.clone());
-                    } else {
-                        throw new IllegalArgumentException("test data syntax error on line "
-                                + test.lineNr + "\n" + line);
-                    }
-                }
-            }
-        }
-        System.out.println("Number of duplicate locale matcher test cases: " + (tests.size() - uniqueTests.size()));
-        return tests;
-    }
-
-    private static ULocale getULocaleOrNull(String s) {
-        if (s.equals("null")) {
-            return null;
-        } else {
-            return new ULocale(s);
-        }
-    }
-
-    @Test
-    @Parameters(method = "readTestCases")
-    public void dataDriven(TestCase test) {
-        XLocaleMatcher matcher;
-        if (test.def.isEmpty() && test.favor.isEmpty() && test.threshold.isEmpty()) {
-            matcher = new XLocaleMatcher(test.supported);
-        } else {
-            XLocaleMatcher.Builder builder = XLocaleMatcher.builder();
-            builder.setSupportedLocales(test.supported);
-            if (!test.def.isEmpty()) {
-                builder.setDefaultULocale(new ULocale(test.def));
-            }
-            if (!test.favor.isEmpty()) {
-                FavorSubtag favor;
-                switch (test.favor) {
-                case "normal":
-                    favor = FavorSubtag.LANGUAGE;
-                    break;
-                case "script":
-                    favor = FavorSubtag.SCRIPT;
-                    break;
-                default:
-                    throw new IllegalArgumentException("unsupported FavorSubtag value " + test.favor);
-                }
-                builder.setFavorSubtag(favor);
-            }
-            if (!test.threshold.isEmpty()) {
-                int threshold = Integer.valueOf(test.threshold);
-                builder.internalSetThresholdDistance(threshold);
-            }
-            matcher = builder.build();
-        }
-
-        ULocale expMatch = getULocaleOrNull(test.expMatch);
-        if (test.expDesired.isEmpty() && test.expCombined.isEmpty()) {
-            ULocale bestSupported = matcher.getBestMatch(test.desired);
-            assertEquals("bestSupported", expMatch, bestSupported);
-        } else {
-            LocalePriorityList desired = LocalePriorityList.add(test.desired).build();
-            XLocaleMatcher.Result result = matcher.getBestMatchResult(desired);
-            assertEquals("bestSupported", expMatch, result.getSupportedULocale());
-            if (!test.expDesired.isEmpty()) {
-                ULocale expDesired = getULocaleOrNull(test.expDesired);
-                assertEquals("bestDesired", expDesired, result.getDesiredULocale());
-            }
-            if (!test.expCombined.isEmpty()) {
-                ULocale expCombined = getULocaleOrNull(test.expCombined);
-                ULocale combined = result.makeServiceULocale();
-                assertEquals("combined", expCombined, combined);
-            }
-        }
-    }
-}
index 3ae236d9f9b97bab13e34adcbc3b9bc43520b0d6..858b05b33e3be6899382d6e004b71b394d65588d 100644 (file)
@@ -3,10 +3,8 @@
        <classpathentry kind="src" path="src"/>
        <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
        <classpathentry combineaccessrules="false" kind="src" path="/icu4j-core"/>
-       <classpathentry combineaccessrules="false" kind="src" path="/icu4j-translit-tests"/>
        <classpathentry combineaccessrules="false" kind="src" path="/icu4j-translit"/>
        <classpathentry combineaccessrules="false" kind="src" path="/icu4j-collate"/>
        <classpathentry combineaccessrules="false" kind="src" path="/icu4j-test-framework"/>
-       <classpathentry combineaccessrules="false" kind="src" path="/icu4j-core-tests"/>
        <classpathentry kind="output" path="out/bin"/>
 </classpath>
index 16a37bc03da16145f24b8e3ca2c29638fdfd9b1e..2fbf5974887b60c1d60d2582235e8a2b61fd7ce6 100644 (file)
@@ -3,10 +3,6 @@
        <name>icu4j-tools</name>
        <comment></comment>
        <projects>
-               <project>icu4j-core</project>
-               <project>icu4j-core-tests</project>
-               <project>icu4j-shared</project>
-               <project>icu4j-test-framework</project>
        </projects>
        <buildSpec>
                <buildCommand>
similarity index 97%
rename from icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LikelySubtagsBuilder.java
rename to icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LikelySubtagsBuilder.java
index a6bdbf695bea507b0b6ccfe5bb48c845fd536937..813d6f8f81badb05ed7670052cd707b7ca50ad66 100644 (file)
@@ -1,6 +1,6 @@
 // © 2017 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.locale;
+package com.ibm.icu.dev.tool.locale;
 
 import java.nio.ByteBuffer;
 import java.util.Collection;
@@ -14,10 +14,11 @@ import java.util.TreeMap;
 import com.ibm.icu.impl.ICUData;
 import com.ibm.icu.impl.ICUResourceBundle;
 import com.ibm.icu.impl.UResource;
+import com.ibm.icu.impl.locale.LSR;
 import com.ibm.icu.impl.locale.XCldrStub.HashMultimap;
 import com.ibm.icu.impl.locale.XCldrStub.Multimap;
 import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
-import com.ibm.icu.util.BytesTrie;
+import com.ibm.icu.impl.locale.XLikelySubtags;
 import com.ibm.icu.util.BytesTrieBuilder;
 import com.ibm.icu.util.ICUException;
 
@@ -25,7 +26,7 @@ import com.ibm.icu.util.ICUException;
  * Builds data for XLikelySubtags.
  * Reads source data from ICU resource bundles.
  */
-class LikelySubtagsBuilder {
+public class LikelySubtagsBuilder {
     private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
 
     private static ICUResourceBundle getSupplementalDataBundle(String name) {
@@ -50,7 +51,7 @@ class LikelySubtagsBuilder {
             UResource.Key key = new UResource.Key();
             for (int i = 0; aliases.getKeyAndValue(i, key, value); ++i) {
                 String aliasFrom = key.toString();
-                if (aliasFrom.contains("_")) {
+                if (aliasFrom.contains("_") || aliasFrom.contains("-")) {
                     continue; // only simple aliasing
                 }
                 UResource.Table table = value.getTable();
@@ -113,7 +114,7 @@ class LikelySubtagsBuilder {
             }
         }
 
-        BytesTrie build() {
+        byte[] build() {
             ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
             // Allocate an array with just the necessary capacity,
             // so that we do not hold on to a larger array for a long time.
@@ -122,11 +123,12 @@ class LikelySubtagsBuilder {
             if (DEBUG_OUTPUT) {
                 System.out.println("likely subtags trie size: " + bytes.length + " bytes");
             }
-            return new BytesTrie(bytes, 0);
+            return bytes;
         }
     }
 
-    static XLikelySubtags.Data build() {
+    // VisibleForTesting
+    public static XLikelySubtags.Data build() {
         AliasesBuilder languageAliasesBuilder = new AliasesBuilder("language");
         AliasesBuilder regionAliasesBuilder = new AliasesBuilder("territory");
 
@@ -202,7 +204,7 @@ class LikelySubtagsBuilder {
                 }
             }
         }
-        BytesTrie trie = trieBuilder.build();
+        byte[] trie = trieBuilder.build();
         LSR[] lsrs = lsrIndexes.keySet().toArray(new LSR[lsrIndexes.size()]);
         return new XLikelySubtags.Data(
                 languageAliasesBuilder.toCanonical, regionAliasesBuilder.toCanonical, trie, lsrs);
similarity index 87%
rename from icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistanceBuilder.java
rename to icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LocaleDistanceBuilder.java
index 83cbe4a3e74d3d976380c7747a6f7ef8e1e95b9c..327f714b221bf4856fd74855b87fdc8b3558e734 100644 (file)
@@ -1,8 +1,15 @@
 // © 2017 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.locale;
-
+package com.ibm.icu.dev.tool.locale;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -17,11 +24,13 @@ import java.util.TreeSet;
 import com.ibm.icu.impl.ICUData;
 import com.ibm.icu.impl.ICUResourceBundle;
 import com.ibm.icu.impl.UResource;
+import com.ibm.icu.impl.locale.LSR;
+import com.ibm.icu.impl.locale.LocaleDistance;
 import com.ibm.icu.impl.locale.XCldrStub.Multimap;
 import com.ibm.icu.impl.locale.XCldrStub.Predicate;
 import com.ibm.icu.impl.locale.XCldrStub.Splitter;
 import com.ibm.icu.impl.locale.XCldrStub.TreeMultimap;
-import com.ibm.icu.util.BytesTrie;
+import com.ibm.icu.impl.locale.XLikelySubtags;
 import com.ibm.icu.util.BytesTrieBuilder;
 import com.ibm.icu.util.Output;
 import com.ibm.icu.util.ULocale;
@@ -153,7 +162,7 @@ public final class LocaleDistanceBuilder {
             }
         }
 
-        BytesTrie build() {
+        byte[] build() {
             ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
             // Allocate an array with just the necessary capacity,
             // so that we do not hold on to a larger array for a long time.
@@ -162,7 +171,7 @@ public final class LocaleDistanceBuilder {
             if (DEBUG_OUTPUT) {
                 System.out.println("distance trie size: " + bytes.length + " bytes");
             }
-            return new BytesTrie(bytes, 0);
+            return bytes;
         }
     }
 
@@ -468,7 +477,8 @@ public final class LocaleDistanceBuilder {
         return result;
     }
 
-    static LocaleDistance build() {
+    // VisibleForTesting
+    public static LocaleDistance.Data build() {
         // From CLDR supplementalData/languageMatching/languageMatches type="written_new"/
         //   and then paradigmLocales, matchVariable, and the last languageMatch items.
         ICUResourceBundle supplementalData = getSupplementalDataBundle("supplementalData");
@@ -591,8 +601,8 @@ public final class LocaleDistanceBuilder {
 
         TrieBuilder trieBuilder = new TrieBuilder();
         defaultDistanceTable.toTrie(trieBuilder);
-        BytesTrie trie = trieBuilder.build();
-        return new LocaleDistance(
+        byte[] trie = trieBuilder.build();
+        return new LocaleDistance.Data(
                 trie, rmb.regionToPartitionsIndex, rmb.partitionArrays,
                 paradigmLSRs, distances);
     }
@@ -845,4 +855,112 @@ public final class LocaleDistanceBuilder {
             }
         }
     }
+
+    private static final String TXT_PATH = "/tmp";
+    private static final String TXT_FILE_BASE_NAME = "langInfo";
+    private static final String TXT_FILE_NAME = TXT_FILE_BASE_NAME + ".txt";
+
+    private static PrintWriter openWriter() throws IOException {
+        File file = new File(TXT_PATH, TXT_FILE_NAME);
+        return new PrintWriter(
+            new BufferedWriter(
+                new OutputStreamWriter(
+                    new FileOutputStream(file), StandardCharsets.UTF_8), 4096));
+    }
+
+    private static void printManyHexBytes(PrintWriter out, byte[] bytes) {
+        for (int i = 0;; ++i) {
+            if (i == bytes.length) {
+                out.println();
+                break;
+            }
+            if (i != 0 && (i & 0xf) == 0) {
+                out.println();
+            }
+            out.format("%02x", bytes[i] & 0xff);
+        }
+    }
+
+    public static final void main(String[] args) throws IOException {
+        XLikelySubtags.Data likelyData = LikelySubtagsBuilder.build();
+        LocaleDistance.Data distanceData = build();
+        System.out.println("Writing LocaleDistance.Data to " + TXT_PATH + '/' + TXT_FILE_NAME);
+        try (PrintWriter out = openWriter()) {
+            out.println("// © 2019 and later: Unicode, Inc. and others.\n" +
+                    "// License & terms of use: http://www.unicode.org/copyright.html#License\n" +
+                    "// Generated by ICU4J LocaleDistanceBuilder.\n" +
+                    TXT_FILE_BASE_NAME + ":table(nofallback){");
+            out.println("    likely{");
+            out.println("        languageAliases{  // " + likelyData.languageAliases.size());
+            for (Map.Entry<String, String> entry :
+                    new TreeMap<>(likelyData.languageAliases).entrySet()) {
+                out.println("            \"" + entry.getKey() + "\",\"" + entry.getValue() + "\",");
+            }
+            out.println("        }  // languageAliases");
+
+            out.println("        regionAliases{  // " + likelyData.regionAliases.size());
+            for (Map.Entry<String, String> entry :
+                    new TreeMap<>(likelyData.regionAliases).entrySet()) {
+                out.println("            \"" + entry.getKey() + "\",\"" + entry.getValue() + "\",");
+            }
+            out.println("        }  // regionAliases");
+
+            out.println("        trie:bin{  // BytesTrie: " + likelyData.trie.length + " bytes");
+            printManyHexBytes(out, likelyData.trie);
+            out.println("        }  // trie");
+
+            out.println("        lsrs{  // " + likelyData.lsrs.length);
+            for (LSR lsr : likelyData.lsrs) {
+                out.println("            \"" + lsr.language + "\",\"" +
+                        lsr.script + "\",\"" + lsr.region + "\",");
+            }
+            out.println("        }  // lsrs");
+            out.println("    }  // likely");
+
+            out.println("    match{");
+            out.println("        trie:bin{  // BytesTrie: " + distanceData.trie.length + " bytes");
+            printManyHexBytes(out, distanceData.trie);
+            out.println("        }  // trie");
+
+            out.println("        regionToPartitions:bin{  // " +
+                    distanceData.regionToPartitionsIndex.length + " bytes");
+            printManyHexBytes(out, distanceData.regionToPartitionsIndex);
+            out.println("        }  // regionToPartitions");
+
+            out.print("        partitions{");
+            boolean first = true;
+            for (String p : distanceData.partitionArrays) {
+                if (first) {
+                    first = false;
+                } else {
+                    out.append(',');
+                }
+                out.append('"').print(p);
+                out.append('"');
+            }
+            out.println("}");
+
+            out.println("        paradigms{");
+            for (LSR lsr : distanceData.paradigmLSRs) {
+                out.println("            \"" + lsr.language + "\",\"" +
+                        lsr.script + "\",\"" + lsr.region + "\",");
+            }
+            out.println("        }");
+
+            out.print("        distances:intvector{");
+            first = true;
+            for (int d : distanceData.distances) {
+                if (first) {
+                    first = false;
+                } else {
+                    out.append(',');
+                }
+                out.print(d);
+            }
+            out.println("}");
+
+            out.println("    }  // match");
+            out.println("}");
+        }
+    }
 }