From: Markus Scherer Date: Sat, 16 Mar 2019 00:13:11 +0000 (-0700) Subject: ICU-20467 replace the LocaleMatcher implementation, load data from new bundle X-Git-Tag: release-65-rc~180 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=61c4a728cd2919ee4215b38f9ad2e3adf9b7feb2;p=icu ICU-20467 replace the LocaleMatcher implementation, load data from new bundle - remove the old LocaleMatcher implementation code - move the XLocaleMatcher code into LocaleMatcher, same for test - remove unused internal methods - stop comparing old vs. new performance - generate langInfo.txt resource bundle file with precomputed likely-subtags and matcher data - make genrb handle multi-line binary values - load likely-subtags & distance data from new langInfo.res bundle - test that built data == loaded data - move data builders to tools, no more runtime dependency on builder code --- diff --git a/docs/userguide/icu_data/buildtool.md b/docs/userguide/icu_data/buildtool.md index 839db06c4fa..f691c9a986f 100644 --- a/docs/userguide/icu_data/buildtool.md +++ b/docs/userguide/icu_data/buildtool.md @@ -191,7 +191,7 @@ summarizes the ICU data files and their corresponding features and categories: | Confusables | `"confusables"` | unidata/confusables\*.txt | 45 KiB | | Currencies | `"misc"`
`"curr_supplemental"`
`"curr_tree"` | misc/currencyNumericCodes.txt
curr/supplementalData.txt
curr/\*.txt | 3.1 KiB
27 KiB
**2.5 MiB** | | Language Display
Names | `"lang_tree"` | lang/\*.txt | **2.1 MiB** | -| Language Tags | `"misc"` | misc/keyTypeData.txt
misc/likelySubtags.txt
misc/metadata.txt | 6.8 KiB
53 KiB
33 KiB | +| Language Tags | `"misc"` | misc/keyTypeData.txt
misc/langInfo.txt
misc/likelySubtags.txt
misc/metadata.txt | 6.8 KiB
37 KiB
53 KiB
33 KiB | | Normalization | `"normalization"` | in/\*.nrm except in/nfc.nrm | 160 KiB | | Plural Rules | `"misc"` | misc/pluralRanges.txt
misc/plurals.txt | 3.3 KiB
33 KiB | | Region Display
Names | `"region_tree"` | region/\*.txt | **1.1 MiB** | diff --git a/icu4c/source/data/misc/langInfo.txt b/icu4c/source/data/misc/langInfo.txt new file mode 100644 index 00000000000..ca804f046fd --- /dev/null +++ b/icu4c/source/data/misc/langInfo.txt @@ -0,0 +1,2614 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License +// Generated by ICU4J LocaleDistanceBuilder. +langInfo:table(nofallback){ + likely{ + languageAliases{ // 164 + "aam","aas", + "adp","dz", + "aju","jrb", + "alb","sq", + "als","sq", + "arb","ar", + "arm","hy", + "aue","ktz", + "ayr","ay", + "ayx","nun", + "azj","az", + "baq","eu", + "bcc","bal", + "bcl","bik", + "bgm","bcg", + "bh","bho", + "bjd","drl", + "bur","my", + "bxk","luy", + "bxr","bua", + "ccq","rki", + "chi","zh", + "cjr","mom", + "cka","cmr", + "cld","syr", + "cmk","xch", + "cmn","zh", + "coy","pij", + "cqu","quh", + "cwd","cr", + "cze","cs", + "dgo","doi", + "dhd","mwr", + "dik","din", + "diq","zza", + "drh","mn", + "dut","nl", + "ekk","et", + "emk","man", + "esk","ik", + "fat","ak", + "fre","fr", + "fuc","ff", + "gav","dev", + "gaz","om", + "gbo","grb", + "geo","ka", + "ger","de", + "gfx","vaj", + "ggn","gvr", + "gno","gon", + "gre","el", + "gti","nyc", + "gug","gn", + "guv","duz", + "gya","gba", + "hdn","hai", + "hea","hmn", + "him","srx", + "hrr","jal", + "ibi","opa", + "ice","is", + "ike","iu", + "ilw","gal", + "in","id", + "iw","he", + "jeg","oyb", + "ji","yi", + "jw","jv", + "kgc","tdf", + "kgh","kml", + "khk","mn", + "kmr","ku", + "knc","kr", + "kng","kg", + "knn","kok", + "koj","kwv", + "kpv","kv", + "krm","bmf", + "ktr","dtp", + "kvs","gdj", + "kwq","yam", + "kxe","tvd", + "kzj","dtp", + "kzt","dtp", + "lbk","bnc", + "lii","raq", + "lmm","rmx", + "lvs","lv", + "mac","mk", + "mao","mi", + "may","ms", + "meg","cir", + "mhr","chm", + "mnk","man", + "mo","ro", + "mst","mry", + "mup","raj", + "mwj","vaj", + "myt","mry", + "nad","xny", + "ncp","kdz", + "nnx","ngv", + "no","nb", + "npi","ne", + "nts","pij", + "ojg","oj", + "ory","or", + "oun","vaj", + "pbu","ps", + "pcr","adx", + "per","fa", + "pes","fa", + "plt","mg", + "pmc","huw", + "pmu","phr", + "pnb","lah", + "ppa","bfy", + "ppr","lcq", + "pry","prt", + "puz","pub", + "quz","qu", + "rmy","rom", + "rum","ro", + "sca","hle", + "scc","sr", + "scr","hr", + "skk","oyb", + "slo","sk", + "spy","kln", + "src","sc", + "swh","sw", + "tdu","dtp", + "thc","tpo", + "thx","oyb", + "tib","bo", + "tie","ras", + "tkk","twm", + "tl","fil", + "tlw","weo", + "tmp","tyj", + "tne","kak", + "tsf","taj", + "ttq","tmh", + "tw","ak", + "umu","del", + "uok","ema", + "uzn","uz", + "wel","cy", + "xba","cax", + "xia","acn", + "xkh","waw", + "xpe","kpe", + "xsj","suj", + "xsl","den", + "ybd","rki", + "ydd","yi", + "yma","lrr", + "ymt","mtm", + "yos","zom", + "yuu","yug", + "zai","zap", + "zsm","ms", + "zyb","za", + } // languageAliases + regionAliases{ // 38 + "062","034", + "172","RU", + "200","CZ", + "230","ET", + "280","DE", + "532","CW", + "582","FM", + "736","SD", + "830","JE", + "886","YE", + "890","RS", + "AN","CW", + "BU","MM", + "CS","RS", + "CT","KI", + "DD","DE", + "DY","BJ", + "FQ","AQ", + "FX","FR", + "HV","BF", + "JT","UM", + "MI","UM", + "NH","VU", + "NQ","AQ", + "NT","SA", + "PC","FM", + "PU","UM", + "PZ","PA", + "QU","EU", + "RH","ZW", + "SU","RU", + "TP","TL", + "UK","GB", + "VD","VN", + "WK","UM", + "YD","YE", + "YU","RS", + "ZR","CD", + } // regionAliases + trie:bin{ // BytesTrie: 9782 bytes +001a6dcc0b74c4e677c26077a2b378a4 +4e79a4bb7a0e6d7f7a5b7a30e1ad8ce8 +2ef5af2f10e1a537022a3c42cae54811 +616e01e2a3fcf4a3f70b4d24540e54ca +8c552a5610cea3fa10d3a3f94dca9550 +2a5310d2a3f502c1a3f4c6a3f2c8a3f3 +47d4a2473248d81dfb4910c4a3ef01c2 +a3f8c6a3ed6d326e36733a7910e2ad8c +10e9af2d10e5af2e10ed22022aa90643 +b4fd4910c4a908671767326836693a6c +10edaf2c10e8adb910f8ada910e1af2b +612e64326510e1af2a10e7af2810eaaf +2900126d46741b743475387742e1addc +efadf310edadf801f5adf9f6adfa10e1 +adfb6d3c6e406f50725a7301e7ad79eb +adf710efadef02e3adf0e9adf1f5adf2 +01e2adf4f3adf510f3adf66726673668 +3a693e6a4e6c10f3adee10e9ade710e7 +ade802e2ade9f5adeaf6adeb01e1adec +e9aded6138625463646501eca51ef2ad +e604e5adddeaaddeecaddfeeade0f2ad +e102f0ade2f1ade3f2ade410e9ade50c +6e3d7221722e7332774ee8adff10e2af +0504e1adb1e9af06eca52dedaf07f2af +0810e5af096e346f3e7001e5a78af2ad +ae01e1ada7f2af0201e7af03eeaf0465 +1d653a6c3e6d02e6af01eead9df2ada0 +10f3adfe02e1af00e3ad99e4ad9a612e +62326310f2ad6910f6adfc10e9adfd0d +6d49733073307534e9a731efaf1c10f3 +af2104e1af22e532eaaf25f4af26f7af +27012a2e4812616ef3af24012aaf2343 +10ceaf246d3a6f3e7202e2af1ee5af1f +ecaf2010ecaf1b10eeaf1d6520653e67 +426b4c6c02e5af18e7af19ecaf1a10f2 +af1401f2af15f7af1610efaf17612e62 +606410e4a73107f40cf4af0df6af0ef9 +af0ffaaf10eda7b2efaf0af0af0bf3af +0c02e1af11e2af12f9af1374a25275a4 +d0760a6f1fe509e5adcfe9ad57efadd7 +6f3472387501eeaddaf4addb10f4add8 +10efadd9613c654c69566c606d01e6ad +d5f7add602e7adcde9adbeeeadce01e3 +add0f0add101e3add2f6add310f3add4 +00267576e91ff20ef2abbdf3abc1f4ab +c7f767f9abd7e9aba2ebaba9ecabadee +abb3efabb57a247a36e1ab82e5ab93e7 +2ee8ab9e10edabda012a2e41127261e2 +ab9a012aab995010cbab9a753e765477 +6478ccca7901e1abd8f6abd903e8abce +ecabcfedabd0f1abd102e4a7b5ecabd2 +f5abd301e8abd4f1abd56b7d7049705e +716e727273827406f10cf1abb1f2abcb +f3abccf4abcde4abc8e5abc9eaabca02 +e9abb9edabbafaabbb10efabbc02f5ab +bef6abbff7abc004e4abc2e6abc3e7ab +c4eaabc5f7abc66b426c526d626e6c6f +02e6abb6e7abb7f1abb802ecabaaf2ab +abf4abac02e6abaef8abaff9abb001e8 +abb1f9abb210e8abb46546655e667467 +7868886906eb0cebaba5edaba6efaba7 +f6aba8e2a3b0e6aba3e7aba403e4ab94 +edab95efab96f4ab9710e9ab9802e3ab +9befab9cf5ab9d02ecab9ff1aba0f2ab +a16144625a63866403e4ab90e7ab91e8 +ab92f5a54303eaab83ecab84eeab85f1 +ab8606e70ce7ab8aefab8bf7ab8cfaab +8de3ab87e4ab88e6ab8901e9ab8ef9ab +8f0f735fe72ce742ebabe1f2ad3ffa01 +2a8641127261e2adcb012a2e43127972 +ecabde022aabdd4b2a4d10ceabdf10da +abde7360746476687a10ee012a2e4112 +7261e2adcb022aabec412a4310ceadcc +10c6adcb10e1adc710f2adc801e8adc9 +ecadca6d326d3e6e426f707202e9adc4 +f4adc5f7adc610e2abe301f226f8adc3 +012a2e44126576e1adc2012aadc14e10 +d0adc210eba55a62326436673a6c10e9 +abe210f5abdb10edabdc10e1abe070c3 +9570a4f971a6d772a6e47300287884ec +39f229f509f5ab71f6ab77f7ab78f22c +f3ab6af4ab6e22032aab244da4bf522a +5410d2ab2801cfab27d5ab25ecab4ced +ab51eeab58efab5ef16de429e438e5ab +34e7ab39e9ab42ebab48032aab2e4432 +4b3a5312696ee4ab31126576e1ab2f12 +686feaab307834793e7a48e1ab18e3ab +2301eeab7ef7ab7f01ecab80f2a50a10 +ecab816cad724f725873747484758e77 +05e809e8ab78f0ab7cf6ab7de2ab79e3 +ab7ae7ab7b04e2ab67e3ab23eeab68f2 +ab69f8a5ef02e4ab6be7ab6cf9ab6d01 +ebab6ff1ab7004e1ab72e5ab73ebab74 +f2ab75f3ab766c4a6d666e8c6fa24370 +03e4ab64ecab65f3ab66f9a77204e4ab +4de9ab4eecab4fefab48f9ab5005f009 +f0ab55f1ab56f3ab57e1ab52eaab53ee +ab5404e3ab59ebab5af0ab5bf8ab5cf9 +ab5d04e7ab5febab60f1ab61f5ab62f9 +ab63673e67426858696e6a846b02e3ab +49f2ab4af3ab4b03e1ab3af3ab3bf7ab +3cfaab3d03e9ab3eebab3feeab40f5ab +4103e4ab43e7ab44ecab45edab4610f2 +ab47614a6276638664a26b6503e6ab35 +e8ab36e9ab37f3ab3806f30cf3ab1cf4 +ab1df6ab1efaab1fe6ab19e8ab1af1ab +1b02e1ab20e5ab21f0ab2206ee0ceeab +2befab2cf2a5fdf3ab2de32cebab29ec +ab2a22055211522e53325410d2ab2810 +cfab2710d5ab252aab24312a4d10c5ab +261137b2ab2501e3ab32e8ab3300166e +63752bec09eca9e4f3a9d5f4a9f0754c +7750e1012a2e41127261e2a9cd012aa9 +cc5010cba9cd10f5a9f210e1a9f37217 +722e733e7410f0a9f102e1a9ece4a9ed +e7a9ee10f3a9ef6e346f4a7001e1a386 +efa9eb03e2a7c4e7a9e7eea9e8f4a9e9 +10eea9ea66306b176b2e6c386d10f3a9 +e601e1a9e2efa9e301e1a9e5f4a92766 +3468386901eca9e0f0a9e110eca9dd01 +eca9deeea9df614e627e638864926504 +e4a9daefa9dbf2a56ef3a56ef8a9dc04 +e7a9ceec32eda9d1f0a9d2f5a9d3012a +a9cf5012686cf0a9d001e9a9d4f5a9d5 +01e4a9d6eda9d701e3a9d8f4a9d90175 +26f5a9f402e3a9f5e7a9f6faa9f40014 +6f457917ef09efa944f5ab11f7ab1479 +2cedab04eeab0910f5ab176f3c724c74 +5075547701ebab15efab1602e2ab0ce6 +ab0defab0e10efab0f10edab1002e5ab +12e7ab13eda9446934693c6a566b5a6d +5e6e01e1ab0ae7ab0b01e1a9ffe62201 +2aab004e10ccab0110f3ab0210f4ab03 +03e6ab05efab06f4ab07f5ab08613663 +46654a675a6810e7a9fe02e9a9f7eaa9 +59efa9f810e6a9f902eaa9faeca9fbf3 +a9fc10eea9fd6da2576ea6446f0b741d +ed09eda59ff2a9c4f3a9c7742a7a34e3 +a9bc01e1a9c9eba9ca10eda9cb701770 +2e72327310e1a9c810eda9c302efa9c5 +f5a9c6f9a9c4673a6b3e6e02e7a9c0ee +a9c1f3a9c210e3a9bd01f2a9bef6a9bf +0022749fe926ef0fefa944f2a94ff3a4 +92f4a953f9a3cde9a903eba7fdeca939 +ee012abaab4d126f6ee7a53e783d7834 +793e7a64e7a927e8a92d01e3a962eda9 +6305f709f7a967f8a968faa969eba964 +eda965f6a96605f009f0a96df7a96efa +a96feba96aeda96beea96c7444755a76 +707703eba95ff2a535f6a960f7a96103 +e3a954e6a955e9a956f2a95703e1a958 +f0a959f2a95af3a95b02e1a95ceea95d +f9a95e69856e446e426f5e707a719072 +02e4a950eaa951efa95204e1a940e6a9 +41e9a942ebb603f7a94304e1a945e5a9 +46e8a947f3a948f8a94903f0a94af3a9 +4bf4a94cf8a94d10eca94e693e6b686c +7e6d02efa93df5a93ef8a93f03e6a930 +eea931f326f7a934012aa9324d126564 +e6a93303e9a935eca936f0a937f7a938 +02e5a93af0a93bf3a93c6549653e666a +67866802e9a92eeca92ff2a3fe06ee0c +eea91ef2a91ff4a920f5a921e4a91be5 +a91ceba91d04e1a922e5a923eea924ef +a925f1a92604e8a928eca929efa92af0 +a92bf9a92c615e62a25663a2636406e8 +0ce8a917eaa918f2a919f8a91ae1a914 +e5a915e6a9160bee23f718f7a905f926 +faa90922022aa906432a4910c4a90810 +c3a907eeb414efa903f3a904e709e7a9 +00e9a901eba902e3a7fde4a7fee6a7ff +04e8a90aefa90bf1a90cf5a90df7a90e +04e9a90ff0a910f1a911f2a912f5a913 +0021745fe41dee0feea99aefa9a0f2a9 +a7f6a9b4f9a9b8e4a97fe5a982e7a987 +eca54a791579307a3ae1a970e2a97801 +eda9b9eea9ba10e9a9bb74387542775e +7801f1a9b6f2a9b701eda9adf2a9ae04 +e9a9aff0a9b0f3a9b1f6a9b2f8a9b310 +e2a9b56a5d6f326f4870647168726c73 +03eba9a9eea9aaefa9abf3a9ac04e4a9 +a1e5a9a2eea9a3f0a9a4f5a9a510e9a9 +8210efa9a610e2a9a86a4a6b4e6d586e +04e6a99be8a99ceba99deda99ef0a99f +10efa99501e7a996efa99701e7a998fa +a9996637665a675e686e6906ee0ceea9 +91f5a992f9a993faa994e6a98ee9a98f +eaa99010f2a98602e1a988e2a989eca9 +8a02e2a98be5a98cf7a98d613e636a64 +906502e2a983f7a984f8a98506ee0cee +a974f0a975f1a976f3a977e3a971e6a9 +72eba97305e809e8a97cefa97df5a97e +e1a979e5a97ae6a97b01e3a980f3a981 +66c73c69c48269a4166aa4c46ba60e6c +001b744ae717ef0cefa7ebf4a7f3f5a7 +f5f6a7f9e7a7d5e9a7d7eea7e8771577 +307a34e1a7c0e2a7c710eca7fa01e8a7 +fbfaa7fc742e75327610f3a7f910e7a7 +f403e1a7f6efa7f7f9a3d4faa7f86a4a +6d316d326e426f4c7210e3a7f202eea7 +e5efa7e6f0a7e701f3a7e9f5a7ea05f2 +09f2a7eff3a7f0faa7f1eaa7eceba7ed +eca7ee6a346b386c01e5a7e3eea7e410 +f0a7e001e9a7e1f4a7e26448646c6570 +679c6906e70ce7a7dce8a7ddeaa7def3 +a7dfe1a7d8e4a7d9e6012aa7da4c1269 +6de2a7db10e2a7cd06f00cf0a7d1f1a7 +d2f5a7d3faa7d4e4a7cee5a7cfeda7d0 +10e7a7d66134625a6301eda7cbf0a7cc +05e809e8a7c4eaa7c5f3a7c6e2a7c1e4 +a7c2e7a7c302e5a7c8f5a7c9f7a7ca00 +197739eb17f30cf3a70bf4a723f5a717 +f7a724eba566eea71eefa720e409e4a7 +0de7a712e9a715772a7a34e1a70401ed +a725f3a72601e8a727e9a7286a366d17 +6d326e366f3a7210e9a72210efa71d10 +e8a71f10f5a7216a2e6b326c10efa71c +10eaa71604e5a717eba718f4a719f7a7 +1af8a71b641a643466446701e2a713e5 +a71402e4a70ee9a70ff5a71010e5a711 +613a62446302e1a70ae5a70be8a70c01 +eea705f2a70602e1a707e2a708f9a709 +0b721ae909e9a731f6a737f7a738722a +752ee1a72910e1a73510f4a736671767 +34693e6d01e3a733eca73401eba72fef +a73010e2a732612e62386510eea72e01 +e2a72aeda72b01efa72cf5a72d002875 +c0e4eb71f23cf628f6a78ef7a7aff902 +2a32414c4c126174eea7bc022aa7ba43 +2a5410d2a7bc10cea7bb127261e2a7bb +f2a781f3a79af5012aa45941127261e2 +a77ceb38eca771eda776eea780efa785 +012a2e41127261e2a76c042aa76a4132 +4336493a4d10cea76e10c6a76b10cea7 +6c10d2a76d7a187a38e1a5aee7a755e9 +a761eaa76502eaa543f2a7bff4a54375 +3c766e777e788e7901e5a7bdf8a7be07 +ed0ceda7a8eea7a9f0a7aaf3a7abe2a7 +a4e4a7a5e5a7a6eaa7a702e7a7acf2a7 +adf8a7ae02eaa7b0efa7b1f1a7b206ed +0ceda7b6f0a7b7f7a7b8faa7b9e1a7b3 +e3a7b4e5a7b56bc0cd7063704a717072 +8673a2457403e2a7a1eda7a2efa7a3f2 +a54305f209f2a78df6a78ef8a78fe5a7 +8ae6a78befa78c03e2a790e6a791f3a7 +92f9a79305ec09eca797f3a798f5a799 +e3a794e9a795eaa79605e809e8a79eea +a79ff2a7a0e2a79be4a79ce6a79d6b50 +6c5a6d706ea24f6f04e9a786eba783ec +a787f3a788faa78901e3a76feaa77003 +eea772f1a773f4a774f8a77506f21ef2 +32f3a77df5a77ef7a77f012a2e411272 +61e2a77c012aa77a4c10c2a77be2a777 +e8a778efa77904e3a781e6a782e7a755 +eea783f0a784665266486758686869a0 +6a03e4a766e7a767f3a768f9a76902ef +a752f2a753f9a75402e5a756e6a757f0 +a75808f10ff1a75cf3a75df4a75ef7a7 +5ffaa760e1a759e2a75aebac79eea75b +02eaa762f5a763f7a76461446276639c +64a2486502e1a74feea750faa75107e9 +0ce9a73deaa73eeda73fefa740e1a739 +e2a73ae3a73be4a73c05f109f1a744f8 +a745f9a746e4a741eda742f0a74303e7 +a747eba748eca749f4a74a03e5a74be8 +a74ceca74df4a74e66a2b567a46a6800 +137539f20ff2a5fdf4a700f5a701f9a3 +41faa7037536e13ae5a5e7e9a5e9efa5 +f910e9a70222032aa5dd372e43345310 +c4a5de1133b6a5de10cda5df6c386c3c +6d466e506f6c7301e2a5feeea5ff01e1 +a5f0f5a5f101e4a5f2f4a5f304e4a5f4 +e5a5f5eaa5f6eea5f7efa5f802e3a5fa +eaa5fbf4a5fc615862746478687c6905 +e809e8a5edeca5eeeda5efe1a5eae6a5 +ebe7a5ec04e7a5e0eba5e1eda5e2f7a5 +e3faa5e410e2a5e510f9a5e610f9a5e8 +00117553e90fe9a578eaa57cefa57ff2 +a585f9a59475427694e1a56ee6012aa5 +744112646ceda57509e80fe8a58ef1a5 +8ff2a590f6a591f9a592e2a58ae332e4 +a58be5a58ce6a58d012aa5744112646c +eda57510f2a5936d2f6d4e6f52706271 +667204e3a586e5a585f0a587f2a588f3 +a58910f0a57e02e4a580eea581f2a582 +10e5a58310f3a5846132665669606c10 +f2a57d05e908e9a572eea573f467e1a5 +6fe2a570e7a57101e9a576eda57702e1 +a579eca57af4a57b001a6e9f791dec0c +eca5baeea5bef5a5cef6a5d6792ce1a5 +95e4a5a701e1a5a0e9a5dc733a733e75 +4276747702e3a5d9e9a5daf4a5db10f7 +a5cd07f20cf2a5d2f7a5d3f8a5d4faa5 +d5e2a5cfe3a5d0e4a5d1e7a5be02e6a5 +d7f2a5b1f3a5d86e5a6f6a7204e2a5a3 +e332e5a559f4a5cbf7a5cc012aa5c94c +12696ee2a5ca02e4a5bfe7a5c0efa5c1 +07ee0ceea5c1f2a5c6f3a5c7f4a5c8e4 +a5c2e6a5c3e9a5c4eda5c567396a236a +386b486c526d01eda5bcf6a5bd02eba5 +b5eea5b6f5a5b701eea5b8f0a5b910eb +a5bb673468386901eca5b3eda5b410ee +a5b110f3a5b26428642e653e6610eba5 +b002e5a5a8eea5a9f2a5aa05ef09efa5 +aef2a52bfaa5afe2a5abeaa5aceca5ad +612e626c6310f2a5a609ed0feda59bee +a59cf7a59df9a59efaa59fe1a596e6a5 +97e7a598e8a599eaa59a05ef09efa5a3 +f9a5a4faa5a5e1a5a0e6a5a1eda5a263 +c2e863a29d64a4f2650011772dee19ee +38efa562f3a564f4a557f5a364012aa5 +5e53126861f7a55f77307834e5a552ec +a55910efa56c10f4a56d6d466d486e7a +728473887403f2a568f4a569f5a56af8 +a56b02e1a55ae9a55beb012a2e4e126b +6fefa55d012aa55c4710cea55d01eea5 +60f1a56110e9a56302e7a565eba566f5 +a567623e664267466b02e1a556eba557 +f9a55810f5a55110e9a55301eca554f9 +a555001a725ce821f316f3a517f52cf6 +a51df9a51e012aa51b47126c61e7a51c +e8a3e9efa50df2a510771277307a34e1 +a3e0e5a3e510e4a51010e5a517722e73 +547410e4a51a05ec09eca514eda515f3 +a516e8a511eaa512eba51301e2a518f7 +a51968c0ce6c306c326d3c6f6c7010f3 +a50f01e1a509e4a50a02e5a50be7a50c +ee022aa24a42a29b4811616e01e2a3fc +f4a3f710f0a50e68426aa28d6b03e2a5 +05eca506efa507f9a50805ef09efa3ff +f0a500f2a501e92ceba3fdeda3fe022a +3c42a25f4811616e01e2a3fcf4a3f70b +4d335417542e55385610c4a3fa01c8a3 +f6d7a3f701cba3f8d3a3f94d2e503853 +10d2a3f501cfa3f0d9a3f102c6a3f2c8 +a3f3daa3f4470e473048b4684910c4a3 +ef10c6a3ed2aa3ea412a4210cea3ec10 +d5a3eb126f70efa3fb02e1a502eda503 +f6a5046511652e66326710e7a3e810e2 +a3e610e1a3e7613462386301e8a3e3f0 +a3e410eea3e110eaa3e20016726f791c +e508e5a52bf6a54bfa45792a7a34e1a5 +1f01efa54ef5a54f10e7a550751d752e +764a7710f7a54d04e1a546e3a547e4a5 +48e7a549f4a54a10e1a54c7240737474 +03eda542f0a543f3a544f9a54502e82c +e9a53ff3a540012a2e4d126f6ee7a53e +012aa53d4310cea53e10e2a54167446a +1a6a406e446f03e2a53ae9a532f0a53b +f7a53c10e5a53810eaa5396734686069 +01e1a536f1a53706ec0ceca531efa532 +f2a533faa534e1a52ee8a52fe9a53010 +e4a535613c6268637264766501e4a52c +eea52d06e80ce8a523eba524f2a525f6 +a526e4a520e6a521e7a52201e4a527f1 +a52810e3a52910eea52a2aa4fa61be83 +62002373a5e51ded0feda3a7eea3acef +a3b0f2a3b9f3a3bee5a37ae7a387e8a3 +8be9a39277417732783c794c7a72e1a3 +6001e4a3d1f2a3d202e8a3d3eba3d4f2 +a3c605f309f3a3d8f6a3d9f8a3dae5a3 +d5eea3d6f2a3d704e1a3dbe5a3dce6a3 +dde8a3def7a3df7332744875587610e2 +a3d003eaa3bff1a3c0f3a3c1f4a3c202 +efa3c3f4a3c4f6a3c509ed0feda3cbef +a3ccf2a3cdf3a3cef5a3cfe1a3c6e3a3 +c7e4a3c8e7a3c9eba3ca6a7d6e3b6e48 +6f587068716c7203e1a3bae8a3bbf8a3 +bcfaa3bd02e7a3adeda3aef0a3af02ea +a3b1eda3b2eea3b310f9a3b403e3a3b5 +e9a3b6f0a3b7f6a3b86a466b786ca4a6 +6d03e8a3a8eba3a9f1a3aaf5a3ab07ef +0cefa39df2a39ef4a39ffaa3a0e8a399 +e9a39aeaa39beea39c04e3a3a1eda3a2 +f1a3a3f5a3a4f6a3a5656d6562669467 +a24468a24b6906ed0ceda395eea396ef +a397f1a398e2a393e7a394eba37307f4 +0cf4a37ff7a380f8a381faa382e6a37b +e8a37ceaa37deda37e03e4a383f1a384 +f4a385f9a38602e3a388eea389f8a38a +06eb0ceba38feca390efa38bf9a391e2 +a38ce7a38de9a38e6132626463906410 +e4a37907f20cf2a365f3a366f6a367f8 +a368eca361eea362f0a363f1a36406e4 +0ce4a36ceaa36df0a36ef2a36fe1a369 +e2a36ae3a36b09ed0feda374eea375ef +a376f1a377f5a378e3a361e6a370e8a3 +71e9a372eca37300184dc25d54c0c257 +2e5736584e59645a12616ee2adc00161 +2c631168efa99f1172e1adbf01702a73 +1175f8691165efa9db126969e9a71554 +3455a28756126169e9adbe056746674a +68506901622a7210e8adbd10f4012aa3 +b04210d445116ce7adba106101e158e9 +042aab9e43324b364c3a5410c8ab9e10 +cea7cc10c8adbb10c1adbc012aa54b4d +10d6a54b6130657c66116ee7adb9056d +116d2e6e327610f4a3a610ecab8210e7 +abd667346b386c01e5ab90f5a75a10e2 +adb710f2adb8116cf5ab93126761f2ab +e050c0c350a27252a2a8530669436942 +6f6a759079016c2a7210e3a50a10efad +b601643e6e01e4ab31e8012aab424c10 +cbab4210e4adb302672e72387910efa5 +0c01e4ab5fefadb410e1ab67116ee4ad +b5613e675e6801612a7210e4adb210f7 +a55f026d2e72327510f2ab1f10f2ab55 +10e2adb1116ef7a349046138654a6850 +6c76721174e9adae016c2a7510e3a51a +10ed9f1172edadac02612e6c326e10f8 +a9df10e7adad01e9a9cff0a9d01172e4 +a5f2026a306f3675116ef2a9a3116ee7 +adaf1168e7adb04d7c4ea2a44f036742 +6c48724e7301672a6d10e1adab10e5a9 +c81161edab3a1163ebadaa016b2a7910 +e1a9c410e8a9ca07722e725a74607566 +79116df2032aa3cd492e4d325410c8ad +a510cea75e10cda3cd116fefa9521165 +e9ada3116cf4ada4614265746c9a6f01 +642a6e10e7a53e10e9ada20368326b36 +6e3a7210e3ad9e10eaad9b10e1ad9c01 +e4a969e9ad9d0264346e387201e3ada0 +efada110e6a93310e4ad9f1179eda939 +046138624c65506b56731168f5ada901 +6e2a7210e2ada710e4ada61161f49d11 +77e1ada8116fefa55d47c40d4ac3214a +a6ba4ba6d34c03614665a69669a69879 +01632a6410e9ad9a10e9ad99026ea673 +6fa6747410ee001a4bc139545b571457 +ac5458ac5859305a01c1abe8d7ab5810 +d4ad5b544055825603c1ad56c5ad58ce +ad57d5a3920bce14d609d6abd2d7abbf +daab78cead97cfabb5d2abbdcbc5b8cb +abaaccad52cdaba904c1ad98c7ad54d3 +a55ed9ad55daabec4e824e8a50a24a52 +a26d530cce17d40cd4ad4cd5a797d6ad +4dd9ad96cead4acfab5ed2ad4bca09ca +ad48cbab48cdad49c3ad47c5ab77c9ab +4c06c70cc7abe4c9ad3acca54acfa978 +c1ad37c3ad38c5ad390acc14d409d4ad +43d7a9d3d9a5becca9e4cdad40d2ad41 +c1ad3cc5ad3dc6ad3ec7abb9c8a57a03 +c5ad46cfa944d5a797d7ab144b9e4ca2 +434d0010cd1dd40fd4a953d5a923d8ab +e6d9a906daad36cda73bcfad94d1ad34 +d2ad95c60cc6ad33c7a927cbad93cca3 +a7c1ad92c3ad31c4ad32c5ab2601c5ad +26cdad9104c9ad2cd3ad2dd4a7f3d5ad +2ed6a7f943bd465f464c475c48a24849 +04c3ad23c4a70dd2ad90d3a70bd4a723 +02c9a578cfa57fd2a5850cce17d30cd3 +ad1ed4ad1fd5abebd7ad20cead1bd0ad +1cd1ad1dc608c6ad1ac867cca771c1ad +19c2abeec5ad8f04cdad21cead22d2a5 +fdd4a700d5a701435244a2424505d309 +d3a564d4ad89daad18c1ad14c3ad15c5 +a5570fcf1ad60cd6ad0fd7abf0d9ad8d +daa517cfad0cd0ad0dd2ad0ed5abe9c9 +c676c9ad09ccad0acdad0bcead8c05cb +09cba51fcfad12daad8ec5a52bc7ad45 +caad11345e34ae714178420bce14d209 +d2a9f0d5a73bd6ad05cead02cfad03d1 +ad04c909c9ab09caad00ccad01c1a3be +c5abfdc6abfe0cd217d50cd5abe5d7ab +fbd8abfcdaa35cd2abf8d3abf9d4abfa +cd09cdad8bcfabf6d1abf7c4abf3c6ad +8acc6d2aa55e312c321133b0ad891137 +b2a79710e1a9a110ef012aa7eb4c10c1 +a7eb1170e3a7d1026d2e6e327310f5a7 +df10e2a7db01e1a7c1e2a5ca01613a70 +1161ee012aa7294a10d0a729016d2a76 +10e1ad8610efad85046138684c6e7a6f +80741168e9ad88016c2a6e10e1ad8710 +e9a55802612e6d326f10eaab3010f2a9 +ec10f2012aa7764b10c8a7761164e1a7 +801172e5012aa7854b01d0ad28d2a785 +473448a25c49127461eca5690465466c +5a6f60727a75016a2a7210f5a9cc10f2 +a5ce116ff2012aa5ae4710c5a5ae1161 +e7a51c016e2a7410e8a5c801e7ad79ed +a5650161506510eb032aa559432e4732 +5410d2a38a10d9ad1010d2a55910eead +7a056c186c306d3675116ee7ad841175 +f7a5f1106e01e7a5f6f0a9616130659c +691172e1ad83016e2a7410f2a93205ef +24efad7df34cf4032aa3f7482e4d3254 +10d7a3f710cba3ee10cfa3f0012aa3ea +4310cea3eae2a3fce7ad7be9ad7c1162 +f2054917493a533e5502c1ad81cbad7f +d3ad8210cca5e710c5ad802aa5e7432a +4710c2ad7f10c1ad7e43c0f9437044a2 +b9450267406c46741168e9012a754501 +d2ad17d4751179f0a55501622a7910ed +ad7810e1ad770461a26868a2776fa280 +70a282791172ec0a4d335414542e5538 +5810cbad7101caab99d2ad7010c1abe1 +4d2e523e5310cbad6f02c4ad6dcba7fd +cea53d02cfad6ed3ab24d5ab112aab11 +4138423c474c4b01c7a7badaad2a10cc +ad6a02c1ad6bc7a387d9a37a01c52dd2 +ad6c026b2e6e327210e9ad6910eda3e4 +10f3a51001612a6510f2a50110eda503 +1170f4a50e1172f4a5c90265306f8275 +1170ecad761176e106491749324d364e +3a5010cba3c510cea5e910d5ad7410d0 +a9822aa5e9422a4610caad7310d4ad72 +1167f2ad752aa25441aa3142056f1f6f +3e72447501672a6810e4ad6810e9ad67 +1170efa3fb106101e8a9e2e9ad666130 +655c68116bf3ad65036c326d36733a74 +10ebad6410e9ad6310f5a36810f3a3c0 +116ee7012aa3ac4210c4a3ac00214ac1 +c052c0cc5638563c5758586259665a01 +d2abe7d7ab5804c1ad56c4ad57c5ad58 +cead57d5a39201c6ad59d3ab5110cbad +5a03c4abf2c5abf2d4ad5bd5ab245246 +536c54a2575503c1abe1c7ad54d9ad55 +daabec05d309d3ab24d5ab11d7ab14c5 +ad46c8ab58cfa9440ecd1ad40cd4ad4c +d5ab11d6ad4dd9ad4ecdad49cead4acf +ab5ed2ad4bc50cc5ab77c9ab4ccaad48 +cbab48c1abedc3ad47c4abf10ecd1ad2 +0cd2abbdd6abd2d7a3f7daab78cdaba9 +cead53cfabb5d0ad52c80cc8ab9ecaab +99cbabaaccad52c4ad4fc6ad50c7ad51 +4e644e404fa89650785102c1ad44cfad +45d5abee09cc0fcca54acfa978d0a982 +d1abf7d4abedc1ad37c3ad38c5ad39c8 +a392c9ad3a0dcd17d40cd4ad43d7a9d3 +d9a5bedaad3ccdad40d2ad41d3ad42c7 +0cc7abb9c8a57acbad3fcca9e4c1ad3c +c5ad3dc6ad3e4aa2484ba24c4ca2644d +0012ce20d50fd5a923d6a54bd8abe6d9 +a906daad36cea53dcfa3f0d1ad34d2ad +35d4a953c60fc6ad33c7a927cba7fdcc +a3a7cda3cdc1ad30c3ad31c4ad32c5ab +2601cfad25d0a72907d00cd0ad28d2a7 +85d7ad29daad2ac5ad26c7a7bac8a776 +cdad2708d30fd3ad2dd4a7f3d5ad2ed6 +a7f9d9ad2fc1a7ebc2ad2bc9ad2ccbab +4241c1604598456a46a24147a24e48a2 +754907d10cd1ad24d2a56ed3a70bd4a7 +23c3ad23c4a70dcca5e7cea5e909d20e +d2ad17d3a564d475d5abeedaad18c1ad +14c3ad15c5a557c799c8ad1604c9a578 +cfa57fd1abf7d2a585d8a5850bd014d3 +09d3ad1ed4ad1fd7ad20d0ad1cd1ad1d +d2a559c808c867cca771cead1bc1ad19 +c5a5aec6ad1a06d20cd2a5fdd4a700d5 +a701d6abfecba3eecdad21cead22415e +42a24c43a2834406cb0ccba51fcfad12 +d9ad00daad13c4a52bc5a52bcaad110d +d117d40cd4abfad7abfbd8abfcdaa35c +d1abf7d2abf8d3abf9cc0bcc6dcda341 +ceabf0cfabf6c4abf3c5abf4c6abf500 +10cc1cd20ed2a9f0d445d5a3cdd6ad05 +d9a37accad01cead02cfad03d1ad04c7 +0cc7a387c8abffc9ab09caad00c1a3be +c4a3acc5abfdc6abfe0010cf1dd50fd5 +abe9d6ad0fd7abf0d9ad10daa517cfad +0cd0ad0dd2ad0ed3ab24c90cc9ad09cc +ad0acdad0bcea3eac4abe7c6ad06c7ad +07c8ad08342234423548374e3801382a +3910b0ab2410b6abf21131b9abef1133 +b2abf01133b6abf12aa55e305031a26b +3202302e33383810b0a52b01b0a517b2 +abe410b0750533233334354a3601b1ab +51b2a5e903b0a3eab4a5e9b5a70db9a7 +2302b3abe5b4abeab7abeb3034314a32 +01b1a55eb9abe903b2abe4b3a55eb5a9 +f0b9abe506b50bb599b7abe7b8abe8b9 +a55eb1abe4b3abe6b4ab7802342e353e +3710b2ab1102b2a3eab3abecb5abed03 +b0ab11b1ab11b4abeeb5a52b04643667 +3c68427246761173f44b116ceda57511 +68e2ad5c116fed6301613c6d01e99bee +012aa3414110cda34110e200134c6251 +31513c53405450556a5901c5abf2d4ab +7910c1ad4402c1abedc4abf1d9ad4e04 +c78fc8a922caad61cead53d2ad6210cb +ad5d4c3c4d464e624f665001cbad3fd3 +ad4201c2ad2bd9ad2f04c1ad30cda9fe +cea76ed2ad35d5ad5f10c7ad6010cdad +3b452e4542474a494e4a644b02c8a502 +cdad27d7ad2901c799c8ad1610c2ad5d +03c4a908cead5ed1ad24d2a56e10cfad +252a994132423c43464410daad1301c5 +abf4c6abf501c8abffd5a9fe01c3a907 +ceabdd00207492e52bf221f299f3a347 +f6a351f9a35afa012a2e41127261e2a3 +5e022aa35c49985210d5a35fe54be651 +eb67ed757934792c7a36e125e22d01e2 +a35bf2a35a10ea012a2e41127261e2a3 +5e032aa35c312e49345310d5a35f1137 +b2a35f01d1a35dd2a35e744475547658 +7703e1a356e2a357efa358f8a35902e1 +a34de7a34eeaa34f10f9a35003eca352 +eea353f4a354f5a3556b856f586f4a70 +54726e7304e1a348e5a349e7a34aefa3 +4bf4a34c02ea87ed89fa8b05f206f293 +f395fa97e38de48fe59108ee0feea342 +efa343f1a344f9a345faa346e299e32a +e8a1eda341022a9b4e2c5012616ced9f +126261f49d6b3c6c3e6d586e03e37feb +81ee83f98510eb6905ee06ee71f36df4 +73e16be26de96f03ed77ee79ef7bf07d +65256530673668506a10e76501e24df9 +4f05ed06ed59ef5bf15de353e455e757 +02e15fec61ef636148625263646405f0 +06f045f947fa49e13fe541ea4302e927 +eb29f52b04e92ff131f233f435f93702 +e439e53be83d + } // trie + lsrs{ // 1584 + "","","", + "skip","script","", + "aa","Latn","ET", + "aai","Latn","ZZ", + "aak","Latn","ZZ", + "aau","Latn","ZZ", + "ab","Cyrl","GE", + "abi","Latn","ZZ", + "abq","Cyrl","ZZ", + "abr","Latn","GH", + "abt","Latn","ZZ", + "aby","Latn","ZZ", + "acd","Latn","ZZ", + "ace","Latn","ID", + "ach","Latn","UG", + "ada","Latn","GH", + "ade","Latn","ZZ", + "adj","Latn","ZZ", + "dz","Tibt","BT", + "ady","Cyrl","RU", + "adz","Latn","ZZ", + "ae","Avst","IR", + "aeb","Arab","TN", + "aey","Latn","ZZ", + "af","Latn","ZA", + "agc","Latn","ZZ", + "agd","Latn","ZZ", + "agg","Latn","ZZ", + "agm","Latn","ZZ", + "ago","Latn","ZZ", + "agq","Latn","CM", + "aha","Latn","ZZ", + "ahl","Latn","ZZ", + "aho","Ahom","IN", + "ajg","Latn","ZZ", + "ak","Latn","GH", + "akk","Xsux","IQ", + "ala","Latn","ZZ", + "sq","Latn","AL", + "ali","Latn","ZZ", + "aln","Latn","XK", + "alt","Cyrl","RU", + "am","Ethi","ET", + "amm","Latn","ZZ", + "amn","Latn","ZZ", + "amo","Latn","NG", + "amp","Latn","ZZ", + "anc","Latn","ZZ", + "ank","Latn","ZZ", + "ann","Latn","ZZ", + "any","Latn","ZZ", + "aoj","Latn","ZZ", + "aom","Latn","ZZ", + "aoz","Latn","ID", + "apc","Arab","ZZ", + "apd","Arab","TG", + "ape","Latn","ZZ", + "apr","Latn","ZZ", + "aps","Latn","ZZ", + "apz","Latn","ZZ", + "ar","Arab","EG", + "arc","Armi","IR", + "arc","Nbat","JO", + "arc","Palm","SY", + "arh","Latn","ZZ", + "hy","Armn","AM", + "arn","Latn","CL", + "aro","Latn","BO", + "arq","Arab","DZ", + "ary","Arab","MA", + "arz","Arab","EG", + "as","Beng","IN", + "asa","Latn","TZ", + "ase","Sgnw","US", + "asg","Latn","ZZ", + "aso","Latn","ZZ", + "ast","Latn","ES", + "ata","Latn","ZZ", + "atg","Latn","ZZ", + "atj","Latn","CA", + "auy","Latn","ZZ", + "av","Cyrl","RU", + "avl","Arab","ZZ", + "avn","Latn","ZZ", + "avt","Latn","ZZ", + "avu","Latn","ZZ", + "awa","Deva","IN", + "awb","Latn","ZZ", + "awo","Latn","ZZ", + "awx","Latn","ZZ", + "ay","Latn","BO", + "ayb","Latn","ZZ", + "az","Latn","AZ", + "az","Arab","IQ", + "az","Arab","IR", + "az","Cyrl","RU", + "ba","Cyrl","RU", + "bal","Arab","PK", + "ban","Latn","ID", + "bap","Deva","NP", + "eu","Latn","ES", + "bar","Latn","AT", + "bas","Latn","CM", + "bav","Latn","ZZ", + "bax","Bamu","CM", + "bba","Latn","ZZ", + "bbb","Latn","ZZ", + "bbc","Latn","ID", + "bbd","Latn","ZZ", + "bbj","Latn","CM", + "bbp","Latn","ZZ", + "bbr","Latn","ZZ", + "bcf","Latn","ZZ", + "bch","Latn","ZZ", + "bci","Latn","CI", + "bik","Latn","PH", + "bcm","Latn","ZZ", + "bcn","Latn","ZZ", + "bco","Latn","ZZ", + "bcq","Ethi","ZZ", + "bcu","Latn","ZZ", + "bdd","Latn","ZZ", + "be","Cyrl","BY", + "bef","Latn","ZZ", + "beh","Latn","ZZ", + "bej","Arab","SD", + "bem","Latn","ZM", + "bet","Latn","ZZ", + "bew","Latn","ID", + "bex","Latn","ZZ", + "bez","Latn","TZ", + "bfd","Latn","CM", + "bfq","Taml","IN", + "bft","Arab","PK", + "bfy","Deva","IN", + "bg","Cyrl","BG", + "bgc","Deva","IN", + "bgn","Arab","PK", + "bgx","Grek","TR", + "bho","Deva","IN", + "bhb","Deva","IN", + "bhg","Latn","ZZ", + "bhi","Deva","IN", + "bhk","Latn","PH", + "bhl","Latn","ZZ", + "bhy","Latn","ZZ", + "bi","Latn","VU", + "bib","Latn","ZZ", + "big","Latn","ZZ", + "bim","Latn","ZZ", + "bin","Latn","NG", + "bio","Latn","ZZ", + "biq","Latn","ZZ", + "bjh","Latn","ZZ", + "bji","Ethi","ZZ", + "bjj","Deva","IN", + "bjn","Latn","ID", + "bjo","Latn","ZZ", + "bjr","Latn","ZZ", + "bjt","Latn","SN", + "bjz","Latn","ZZ", + "bkc","Latn","ZZ", + "bkm","Latn","CM", + "bkq","Latn","ZZ", + "bku","Latn","PH", + "bkv","Latn","ZZ", + "blt","Tavt","VN", + "bm","Latn","ML", + "bmh","Latn","ZZ", + "bmk","Latn","ZZ", + "bmq","Latn","ML", + "bmu","Latn","ZZ", + "bn","Beng","BD", + "bng","Latn","ZZ", + "bnm","Latn","ZZ", + "bnp","Latn","ZZ", + "bo","Tibt","CN", + "boj","Latn","ZZ", + "bom","Latn","ZZ", + "bon","Latn","ZZ", + "bpy","Beng","IN", + "bqc","Latn","ZZ", + "bqi","Arab","IR", + "bqp","Latn","ZZ", + "bqv","Latn","CI", + "br","Latn","FR", + "bra","Deva","IN", + "brh","Arab","PK", + "brx","Deva","IN", + "brz","Latn","ZZ", + "bs","Latn","BA", + "bsj","Latn","ZZ", + "bsq","Bass","LR", + "bss","Latn","CM", + "bst","Ethi","ZZ", + "bto","Latn","PH", + "btt","Latn","ZZ", + "btv","Deva","PK", + "bua","Cyrl","RU", + "buc","Latn","YT", + "bud","Latn","ZZ", + "bug","Latn","ID", + "buk","Latn","ZZ", + "bum","Latn","CM", + "buo","Latn","ZZ", + "my","Mymr","MM", + "bus","Latn","ZZ", + "buu","Latn","ZZ", + "bvb","Latn","GQ", + "bwd","Latn","ZZ", + "bwr","Latn","ZZ", + "bxh","Latn","ZZ", + "luy","Latn","KE", + "bye","Latn","ZZ", + "byn","Ethi","ER", + "byr","Latn","ZZ", + "bys","Latn","ZZ", + "byv","Latn","CM", + "byx","Latn","ZZ", + "bza","Latn","ZZ", + "bze","Latn","ML", + "bzf","Latn","ZZ", + "bzh","Latn","ZZ", + "bzw","Latn","ZZ", + "ca","Latn","ES", + "can","Latn","ZZ", + "cbj","Latn","ZZ", + "cch","Latn","NG", + "ccp","Cakm","BD", + "ce","Cyrl","RU", + "ceb","Latn","PH", + "cfa","Latn","ZZ", + "cgg","Latn","UG", + "ch","Latn","GU", + "zh","Hans","CN", + "zh","Hant","AU", + "zh","Hant","BN", + "zh","Hant","GF", + "zh","Hant","HK", + "zh","Hant","ID", + "zh","Hant","MO", + "zh","Hant","MY", + "zh","Hant","PF", + "zh","Hant","PH", + "zh","Hant","PA", + "zh","Hant","SR", + "zh","Hant","TH", + "zh","Hant","TW", + "zh","Hant","GB", + "zh","Hant","US", + "zh","Hant","VN", + "zh","Bopo","TW", + "zh","Hanb","TW", + "chk","Latn","FM", + "chm","Cyrl","RU", + "cho","Latn","US", + "chp","Latn","CA", + "chr","Cher","US", + "cja","Arab","KH", + "cjm","Cham","VN", + "cjv","Latn","ZZ", + "ckb","Arab","IQ", + "ckl","Latn","ZZ", + "cko","Latn","ZZ", + "cky","Latn","ZZ", + "cla","Latn","ZZ", + "syr","Syrc","IQ", + "cme","Latn","ZZ", + "cmg","Soyo","MN", + "co","Latn","FR", + "cop","Copt","EG", + "cps","Latn","PH", + "cr","Cans","CA", + "crh","Cyrl","UA", + "crj","Cans","CA", + "crk","Cans","CA", + "crl","Cans","CA", + "crm","Cans","CA", + "crs","Latn","SC", + "cs","Latn","CZ", + "csb","Latn","PL", + "csw","Cans","CA", + "ctd","Pauc","MM", + "cu","Cyrl","RU", + "cu","Glag","BG", + "cv","Cyrl","RU", + "cy","Latn","GB", + "da","Latn","DK", + "dad","Latn","ZZ", + "daf","Latn","ZZ", + "dag","Latn","ZZ", + "dah","Latn","ZZ", + "dak","Latn","US", + "dar","Cyrl","RU", + "dav","Latn","KE", + "dbd","Latn","ZZ", + "dbq","Latn","ZZ", + "dcc","Arab","IN", + "ddn","Latn","ZZ", + "de","Latn","DE", + "ded","Latn","ZZ", + "den","Latn","CA", + "dga","Latn","ZZ", + "dgh","Latn","ZZ", + "dgi","Latn","ZZ", + "dgl","Arab","ZZ", + "doi","Arab","IN", + "dgr","Latn","CA", + "dgz","Latn","ZZ", + "mwr","Deva","IN", + "dia","Latn","ZZ", + "zza","Latn","TR", + "dje","Latn","NE", + "dnj","Latn","CI", + "dob","Latn","ZZ", + "dop","Latn","ZZ", + "dow","Latn","ZZ", + "mn","Cyrl","MN", + "mn","Mong","CN", + "dri","Latn","ZZ", + "drs","Ethi","ZZ", + "dsb","Latn","DE", + "dtm","Latn","ML", + "dtp","Latn","MY", + "dts","Latn","ZZ", + "dty","Deva","NP", + "dua","Latn","CM", + "duc","Latn","ZZ", + "dud","Latn","ZZ", + "dug","Latn","ZZ", + "nl","Latn","NL", + "dv","Thaa","MV", + "dva","Latn","ZZ", + "dww","Latn","ZZ", + "dyo","Latn","SN", + "dyu","Latn","BF", + "dzg","Latn","ZZ", + "ebu","Latn","KE", + "ee","Latn","GH", + "efi","Latn","NG", + "egl","Latn","IT", + "egy","Egyp","EG", + "eka","Latn","ZZ", + "et","Latn","EE", + "eky","Kali","MM", + "el","Grek","GR", + "ema","Latn","ZZ", + "emi","Latn","ZZ", + "man","Latn","GM", + "man","Nkoo","GN", + "en","Latn","US", + "en","Shaw","GB", + "enn","Latn","ZZ", + "enq","Latn","ZZ", + "eo","Latn","001", + "eri","Latn","ZZ", + "es","Latn","ES", + "esg","Gonm","IN", + "ik","Latn","US", + "esu","Latn","US", + "etr","Latn","ZZ", + "ett","Ital","IT", + "etu","Latn","ZZ", + "etx","Latn","ZZ", + "ewo","Latn","CM", + "ext","Latn","ES", + "fa","Arab","IR", + "faa","Latn","ZZ", + "fab","Latn","ZZ", + "fag","Latn","ZZ", + "fai","Latn","ZZ", + "fan","Latn","GQ", + "ff","Latn","SN", + "ff","Adlm","GN", + "ffi","Latn","ZZ", + "ffm","Latn","ML", + "fi","Latn","FI", + "fia","Arab","SD", + "fil","Latn","PH", + "fit","Latn","SE", + "fj","Latn","FJ", + "flr","Latn","ZZ", + "fmp","Latn","ZZ", + "fo","Latn","FO", + "fod","Latn","ZZ", + "fon","Latn","BJ", + "for","Latn","ZZ", + "fpe","Latn","ZZ", + "fqs","Latn","ZZ", + "fr","Latn","FR", + "frc","Latn","US", + "frp","Latn","FR", + "frr","Latn","DE", + "frs","Latn","DE", + "fub","Arab","CM", + "fud","Latn","WF", + "fue","Latn","ZZ", + "fuf","Latn","GN", + "fuh","Latn","ZZ", + "fuq","Latn","NE", + "fur","Latn","IT", + "fuv","Latn","NG", + "fuy","Latn","ZZ", + "fvr","Latn","SD", + "fy","Latn","NL", + "ga","Latn","IE", + "gaa","Latn","GH", + "gaf","Latn","ZZ", + "gag","Latn","MD", + "gah","Latn","ZZ", + "gaj","Latn","ZZ", + "gam","Latn","ZZ", + "gan","Hans","CN", + "gaw","Latn","ZZ", + "gay","Latn","ID", + "om","Latn","ET", + "gba","Latn","ZZ", + "gbf","Latn","ZZ", + "gbm","Deva","IN", + "grb","Latn","ZZ", + "gby","Latn","ZZ", + "gbz","Arab","IR", + "gcr","Latn","GF", + "gd","Latn","GB", + "gde","Latn","ZZ", + "gdn","Latn","ZZ", + "gdr","Latn","ZZ", + "geb","Latn","ZZ", + "gej","Latn","ZZ", + "gel","Latn","ZZ", + "ka","Geor","GE", + "gez","Ethi","ET", + "gfk","Latn","ZZ", + "gvr","Deva","NP", + "ghs","Latn","ZZ", + "gil","Latn","KI", + "gim","Latn","ZZ", + "gjk","Arab","PK", + "gjn","Latn","ZZ", + "gju","Arab","PK", + "gkn","Latn","ZZ", + "gkp","Latn","ZZ", + "gl","Latn","ES", + "glk","Arab","IR", + "gmm","Latn","ZZ", + "gmv","Ethi","ZZ", + "gn","Latn","PY", + "gnd","Latn","ZZ", + "gng","Latn","ZZ", + "gon","Telu","IN", + "god","Latn","ZZ", + "gof","Ethi","ZZ", + "goi","Latn","ZZ", + "gom","Deva","IN", + "gor","Latn","ID", + "gos","Latn","NL", + "got","Goth","UA", + "grc","Cprt","CY", + "grc","Linb","GR", + "grt","Beng","IN", + "grw","Latn","ZZ", + "gsw","Latn","CH", + "gu","Gujr","IN", + "gub","Latn","BR", + "guc","Latn","CO", + "gud","Latn","ZZ", + "gur","Latn","GH", + "guw","Latn","ZZ", + "gux","Latn","ZZ", + "guz","Latn","KE", + "gv","Latn","IM", + "gvf","Latn","ZZ", + "gvs","Latn","ZZ", + "gwc","Arab","ZZ", + "gwi","Latn","CA", + "gwt","Arab","ZZ", + "gyi","Latn","ZZ", + "ha","Latn","NG", + "ha","Arab","SD", + "ha","Arab","CM", + "hag","Latn","ZZ", + "hak","Hans","CN", + "ham","Latn","ZZ", + "haw","Latn","US", + "haz","Arab","AF", + "hbb","Latn","ZZ", + "hdy","Ethi","ZZ", + "he","Hebr","IL", + "hhy","Latn","ZZ", + "hi","Deva","IN", + "hia","Latn","ZZ", + "hif","Latn","FJ", + "hig","Latn","ZZ", + "hih","Latn","ZZ", + "hil","Latn","PH", + "srx","Deva","IN", + "hla","Latn","ZZ", + "hlu","Hluw","TR", + "hmd","Plrd","CN", + "hmt","Latn","ZZ", + "hnd","Arab","PK", + "hne","Deva","IN", + "hnj","Hmng","LA", + "hnn","Latn","PH", + "hno","Arab","PK", + "ho","Latn","PG", + "hoc","Deva","IN", + "hoj","Deva","IN", + "hot","Latn","ZZ", + "hr","Latn","HR", + "hsb","Latn","DE", + "hsn","Hans","CN", + "ht","Latn","HT", + "hu","Latn","HU", + "hui","Latn","ZZ", + "hz","Latn","NA", + "ia","Latn","001", + "ian","Latn","ZZ", + "iar","Latn","ZZ", + "iba","Latn","MY", + "ibb","Latn","NG", + "iby","Latn","ZZ", + "ica","Latn","ZZ", + "is","Latn","IS", + "ich","Latn","ZZ", + "id","Latn","ID", + "idd","Latn","ZZ", + "idi","Latn","ZZ", + "idu","Latn","ZZ", + "ife","Latn","TG", + "ig","Latn","NG", + "igb","Latn","ZZ", + "ige","Latn","ZZ", + "ii","Yiii","CN", + "ijj","Latn","ZZ", + "iu","Cans","CA", + "ikk","Latn","ZZ", + "ikt","Latn","CA", + "ikw","Latn","ZZ", + "ikx","Latn","ZZ", + "ilo","Latn","PH", + "imo","Latn","ZZ", + "in","Latn","ID", + "inh","Cyrl","RU", + "io","Latn","001", + "iou","Latn","ZZ", + "iri","Latn","ZZ", + "it","Latn","IT", + "iw","Hebr","IL", + "iwm","Latn","ZZ", + "iws","Latn","ZZ", + "izh","Latn","RU", + "izi","Latn","ZZ", + "ja","Jpan","JP", + "jab","Latn","ZZ", + "jam","Latn","JM", + "jbo","Latn","001", + "jbu","Latn","ZZ", + "jen","Latn","ZZ", + "jgk","Latn","ZZ", + "jgo","Latn","CM", + "yi","Hebr","001", + "jib","Latn","ZZ", + "jmc","Latn","TZ", + "jml","Deva","NP", + "jra","Latn","ZZ", + "jut","Latn","DK", + "jv","Latn","ID", + "jw","Latn","ID", + "kaa","Cyrl","UZ", + "kab","Latn","DZ", + "kac","Latn","MM", + "kad","Latn","ZZ", + "kai","Latn","ZZ", + "kaj","Latn","NG", + "kam","Latn","KE", + "kao","Latn","ML", + "kbd","Cyrl","RU", + "kbm","Latn","ZZ", + "kbp","Latn","ZZ", + "kbq","Latn","ZZ", + "kbx","Latn","ZZ", + "kby","Arab","NE", + "kcg","Latn","NG", + "kck","Latn","ZW", + "kcl","Latn","ZZ", + "kct","Latn","ZZ", + "kde","Latn","TZ", + "kdh","Arab","TG", + "kdl","Latn","ZZ", + "kdt","Thai","TH", + "kea","Latn","CV", + "ken","Latn","CM", + "kez","Latn","ZZ", + "kfo","Latn","CI", + "kfr","Deva","IN", + "kfy","Deva","IN", + "kg","Latn","CD", + "kge","Latn","ID", + "kgf","Latn","ZZ", + "kgp","Latn","BR", + "kha","Latn","IN", + "khb","Talu","CN", + "khn","Deva","IN", + "khq","Latn","ML", + "khs","Latn","ZZ", + "kht","Mymr","IN", + "khw","Arab","PK", + "khz","Latn","ZZ", + "ki","Latn","KE", + "kij","Latn","ZZ", + "kiu","Latn","TR", + "kiw","Latn","ZZ", + "kj","Latn","NA", + "kjd","Latn","ZZ", + "kjg","Laoo","LA", + "kjs","Latn","ZZ", + "kjy","Latn","ZZ", + "kk","Cyrl","KZ", + "kk","Arab","AF", + "kk","Arab","CN", + "kk","Arab","IR", + "kk","Arab","MN", + "kkc","Latn","ZZ", + "kkj","Latn","CM", + "kl","Latn","GL", + "kln","Latn","KE", + "klq","Latn","ZZ", + "klt","Latn","ZZ", + "klx","Latn","ZZ", + "km","Khmr","KH", + "kmb","Latn","AO", + "kmh","Latn","ZZ", + "kmo","Latn","ZZ", + "ku","Latn","TR", + "ku","Arab","LB", + "ku","Arab","IQ", + "kms","Latn","ZZ", + "kmu","Latn","ZZ", + "kmw","Latn","ZZ", + "kn","Knda","IN", + "kr","Latn","ZZ", + "knf","Latn","GW", + "kok","Deva","IN", + "knp","Latn","ZZ", + "ko","Kore","KR", + "koi","Cyrl","RU", + "kol","Latn","ZZ", + "kos","Latn","FM", + "koz","Latn","ZZ", + "kpe","Latn","LR", + "kpf","Latn","ZZ", + "kpo","Latn","ZZ", + "kpr","Latn","ZZ", + "kv","Cyrl","RU", + "kpx","Latn","ZZ", + "kqb","Latn","ZZ", + "kqf","Latn","ZZ", + "kqs","Latn","ZZ", + "kqy","Ethi","ZZ", + "krc","Cyrl","RU", + "kri","Latn","SL", + "krj","Latn","PH", + "krl","Latn","RU", + "krs","Latn","ZZ", + "kru","Deva","IN", + "ks","Arab","IN", + "ksb","Latn","TZ", + "ksd","Latn","ZZ", + "ksf","Latn","CM", + "ksh","Latn","DE", + "ksj","Latn","ZZ", + "ksr","Latn","ZZ", + "ktb","Ethi","ZZ", + "ktm","Latn","ZZ", + "kto","Latn","ZZ", + "kub","Latn","ZZ", + "kud","Latn","ZZ", + "kue","Latn","ZZ", + "kuj","Latn","ZZ", + "kum","Cyrl","RU", + "kun","Latn","ZZ", + "kup","Latn","ZZ", + "kus","Latn","ZZ", + "kvg","Latn","ZZ", + "kvr","Latn","ID", + "kvx","Arab","PK", + "kw","Latn","GB", + "kwj","Latn","ZZ", + "kwo","Latn","ZZ", + "yam","Latn","ZZ", + "kxa","Latn","ZZ", + "kxc","Ethi","ZZ", + "tvd","Latn","ZZ", + "kxm","Thai","TH", + "kxp","Arab","PK", + "kxw","Latn","ZZ", + "kxz","Latn","ZZ", + "ky","Cyrl","KG", + "ky","Arab","CN", + "ky","Latn","TR", + "kye","Latn","ZZ", + "kyx","Latn","ZZ", + "kzr","Latn","ZZ", + "la","Latn","VA", + "lab","Lina","GR", + "lad","Hebr","IL", + "lag","Latn","TZ", + "lah","Arab","PK", + "laj","Latn","UG", + "las","Latn","ZZ", + "lb","Latn","LU", + "lbe","Cyrl","RU", + "lbu","Latn","ZZ", + "lbw","Latn","ID", + "lcm","Latn","ZZ", + "lcp","Thai","CN", + "ldb","Latn","ZZ", + "led","Latn","ZZ", + "lee","Latn","ZZ", + "lem","Latn","ZZ", + "lep","Lepc","IN", + "leq","Latn","ZZ", + "leu","Latn","ZZ", + "lez","Cyrl","RU", + "lg","Latn","UG", + "lgg","Latn","ZZ", + "li","Latn","NL", + "lia","Latn","ZZ", + "lid","Latn","ZZ", + "lif","Deva","NP", + "lif","Limb","IN", + "lig","Latn","ZZ", + "lih","Latn","ZZ", + "lij","Latn","IT", + "lis","Lisu","CN", + "ljp","Latn","ID", + "lki","Arab","IR", + "lkt","Latn","US", + "lle","Latn","ZZ", + "lln","Latn","ZZ", + "lmn","Telu","IN", + "lmo","Latn","IT", + "lmp","Latn","ZZ", + "ln","Latn","CD", + "lns","Latn","ZZ", + "lnu","Latn","ZZ", + "lo","Laoo","LA", + "loj","Latn","ZZ", + "lok","Latn","ZZ", + "lol","Latn","CD", + "lor","Latn","ZZ", + "los","Latn","ZZ", + "loz","Latn","ZM", + "lrc","Arab","IR", + "lt","Latn","LT", + "ltg","Latn","LV", + "lu","Latn","CD", + "lua","Latn","CD", + "luo","Latn","KE", + "luz","Arab","IR", + "lv","Latn","LV", + "lwl","Thai","TH", + "lzh","Hans","CN", + "lzz","Latn","TR", + "mk","Cyrl","MK", + "mad","Latn","ID", + "maf","Latn","CM", + "mag","Deva","IN", + "mai","Deva","IN", + "mak","Latn","ID", + "mi","Latn","NZ", + "mas","Latn","KE", + "maw","Latn","ZZ", + "ms","Latn","MY", + "ms","Arab","CC", + "ms","Arab","ID", + "maz","Latn","MX", + "mbh","Latn","ZZ", + "mbo","Latn","ZZ", + "mbq","Latn","ZZ", + "mbu","Latn","ZZ", + "mbw","Latn","ZZ", + "mci","Latn","ZZ", + "mcp","Latn","ZZ", + "mcq","Latn","ZZ", + "mcr","Latn","ZZ", + "mcu","Latn","ZZ", + "mda","Latn","ZZ", + "mde","Arab","ZZ", + "mdf","Cyrl","RU", + "mdh","Latn","PH", + "mdj","Latn","ZZ", + "mdr","Latn","ID", + "mdx","Ethi","ZZ", + "med","Latn","ZZ", + "mee","Latn","ZZ", + "mek","Latn","ZZ", + "men","Latn","SL", + "mer","Latn","KE", + "met","Latn","ZZ", + "meu","Latn","ZZ", + "mfa","Arab","TH", + "mfe","Latn","MU", + "mfn","Latn","ZZ", + "mfo","Latn","ZZ", + "mfq","Latn","ZZ", + "mg","Latn","MG", + "mgh","Latn","MZ", + "mgl","Latn","ZZ", + "mgo","Latn","CM", + "mgp","Deva","NP", + "mgy","Latn","TZ", + "mh","Latn","MH", + "mhi","Latn","ZZ", + "mhl","Latn","ZZ", + "mif","Latn","ZZ", + "min","Latn","ID", + "mis","Hatr","IQ", + "mis","Medf","NG", + "miw","Latn","ZZ", + "mki","Arab","ZZ", + "mkl","Latn","ZZ", + "mkp","Latn","ZZ", + "mkw","Latn","ZZ", + "ml","Mlym","IN", + "mle","Latn","ZZ", + "mlp","Latn","ZZ", + "mls","Latn","SD", + "mmo","Latn","ZZ", + "mmu","Latn","ZZ", + "mmx","Latn","ZZ", + "mna","Latn","ZZ", + "mnf","Latn","ZZ", + "mni","Beng","IN", + "mnw","Mymr","MM", + "ro","Latn","RO", + "moa","Latn","ZZ", + "moe","Latn","CA", + "moh","Latn","CA", + "mos","Latn","BF", + "mox","Latn","ZZ", + "mpp","Latn","ZZ", + "mps","Latn","ZZ", + "mpt","Latn","ZZ", + "mpx","Latn","ZZ", + "mql","Latn","ZZ", + "mr","Deva","IN", + "mrd","Deva","NP", + "mrj","Cyrl","RU", + "mro","Mroo","BD", + "mt","Latn","MT", + "mtc","Latn","ZZ", + "mtf","Latn","ZZ", + "mti","Latn","ZZ", + "mtr","Deva","IN", + "mua","Latn","CM", + "raj","Deva","IN", + "mur","Latn","ZZ", + "mus","Latn","US", + "mva","Latn","ZZ", + "mvn","Latn","ZZ", + "mvy","Arab","PK", + "mwk","Latn","ML", + "mwv","Latn","ID", + "mww","Hmnp","US", + "mxc","Latn","ZW", + "mxm","Latn","ZZ", + "myk","Latn","ZZ", + "mym","Ethi","ZZ", + "myv","Cyrl","RU", + "myw","Latn","ZZ", + "myx","Latn","UG", + "myz","Mand","IR", + "mzk","Latn","ZZ", + "mzm","Latn","ZZ", + "mzn","Arab","IR", + "mzp","Latn","ZZ", + "mzw","Latn","ZZ", + "mzz","Latn","ZZ", + "na","Latn","NR", + "nac","Latn","ZZ", + "naf","Latn","ZZ", + "nak","Latn","ZZ", + "nan","Hans","CN", + "nap","Latn","IT", + "naq","Latn","NA", + "nas","Latn","ZZ", + "nb","Latn","NO", + "nca","Latn","ZZ", + "nce","Latn","ZZ", + "ncf","Latn","ZZ", + "nch","Latn","MX", + "nco","Latn","ZZ", + "ncu","Latn","ZZ", + "nd","Latn","ZW", + "ndc","Latn","MZ", + "nds","Latn","DE", + "ne","Deva","NP", + "neb","Latn","ZZ", + "new","Deva","NP", + "nex","Latn","ZZ", + "nfr","Latn","ZZ", + "ng","Latn","NA", + "nga","Latn","ZZ", + "ngb","Latn","ZZ", + "ngl","Latn","MZ", + "nhb","Latn","ZZ", + "nhe","Latn","MX", + "nhw","Latn","MX", + "nif","Latn","ZZ", + "nii","Latn","ZZ", + "nij","Latn","ID", + "nin","Latn","ZZ", + "niu","Latn","NU", + "niy","Latn","ZZ", + "niz","Latn","ZZ", + "njo","Latn","IN", + "nkg","Latn","ZZ", + "nko","Latn","ZZ", + "nmg","Latn","CM", + "nmz","Latn","ZZ", + "nn","Latn","NO", + "nnf","Latn","ZZ", + "nnh","Latn","CM", + "nnk","Latn","ZZ", + "nnm","Latn","ZZ", + "nnp","Wcho","IN", + "no","Latn","NO", + "nod","Lana","TH", + "noe","Deva","IN", + "non","Runr","SE", + "nop","Latn","ZZ", + "nou","Latn","ZZ", + "nqo","Nkoo","GN", + "nr","Latn","ZA", + "nrb","Latn","ZZ", + "nsk","Cans","CA", + "nsn","Latn","ZZ", + "nso","Latn","ZA", + "nss","Latn","ZZ", + "ntm","Latn","ZZ", + "ntr","Latn","ZZ", + "nui","Latn","ZZ", + "nup","Latn","ZZ", + "nus","Latn","SS", + "nuv","Latn","ZZ", + "nux","Latn","ZZ", + "nv","Latn","US", + "nwb","Latn","ZZ", + "nxq","Latn","CN", + "nxr","Latn","ZZ", + "ny","Latn","MW", + "nym","Latn","TZ", + "nyn","Latn","UG", + "nzi","Latn","GH", + "oc","Latn","FR", + "ogc","Latn","ZZ", + "okr","Latn","ZZ", + "okv","Latn","ZZ", + "ong","Latn","ZZ", + "onn","Latn","ZZ", + "ons","Latn","ZZ", + "opm","Latn","ZZ", + "or","Orya","IN", + "oro","Latn","ZZ", + "oru","Arab","ZZ", + "os","Cyrl","GE", + "osa","Osge","US", + "ota","Arab","ZZ", + "otk","Orkh","MN", + "ozm","Latn","ZZ", + "pa","Guru","IN", + "pa","Arab","PK", + "pag","Latn","PH", + "pal","Phli","IR", + "pal","Phlp","CN", + "pam","Latn","PH", + "pap","Latn","AW", + "pau","Latn","PW", + "pbi","Latn","ZZ", + "ps","Arab","AF", + "pcd","Latn","FR", + "pcm","Latn","NG", + "pdc","Latn","US", + "pdt","Latn","CA", + "ped","Latn","ZZ", + "peo","Xpeo","IR", + "pex","Latn","ZZ", + "pfl","Latn","DE", + "phl","Arab","ZZ", + "phn","Phnx","LB", + "pil","Latn","ZZ", + "pip","Latn","ZZ", + "pka","Brah","IN", + "pko","Latn","KE", + "pl","Latn","PL", + "pla","Latn","ZZ", + "pms","Latn","IT", + "png","Latn","ZZ", + "pnn","Latn","ZZ", + "pnt","Grek","GR", + "pon","Latn","FM", + "ppo","Latn","ZZ", + "pra","Khar","PK", + "prd","Arab","IR", + "prg","Latn","001", + "pss","Latn","ZZ", + "pt","Latn","BR", + "ptp","Latn","ZZ", + "puu","Latn","GA", + "pwa","Latn","ZZ", + "qu","Latn","PE", + "quc","Latn","GT", + "qug","Latn","EC", + "rai","Latn","ZZ", + "rao","Latn","ZZ", + "rcf","Latn","RE", + "rej","Latn","ID", + "rel","Latn","ZZ", + "res","Latn","ZZ", + "rgn","Latn","IT", + "rhg","Arab","MM", + "ria","Latn","IN", + "rif","Tfng","MA", + "rif","Latn","NL", + "rjs","Deva","NP", + "rkt","Beng","BD", + "rm","Latn","CH", + "rmf","Latn","FI", + "rmo","Latn","CH", + "rmt","Arab","IR", + "rmu","Latn","SE", + "rn","Latn","BI", + "rna","Latn","ZZ", + "rng","Latn","MZ", + "rob","Latn","ID", + "rof","Latn","TZ", + "roo","Latn","ZZ", + "rro","Latn","ZZ", + "rtm","Latn","FJ", + "ru","Cyrl","RU", + "rue","Cyrl","UA", + "rug","Latn","SB", + "rw","Latn","RW", + "rwk","Latn","TZ", + "rwo","Latn","ZZ", + "ryu","Kana","JP", + "sa","Deva","IN", + "saf","Latn","GH", + "sah","Cyrl","RU", + "saq","Latn","KE", + "sas","Latn","ID", + "sat","Latn","IN", + "sav","Latn","SN", + "saz","Saur","IN", + "sba","Latn","ZZ", + "sbe","Latn","ZZ", + "sbp","Latn","TZ", + "sc","Latn","IT", + "sr","Cyrl","RS", + "sr","Latn","RU", + "sr","Latn","ME", + "sr","Latn","RO", + "sr","Latn","TR", + "sck","Deva","IN", + "scl","Arab","ZZ", + "scn","Latn","IT", + "sco","Latn","GB", + "scs","Latn","CA", + "sd","Arab","PK", + "sd","Deva","IN", + "sd","Khoj","IN", + "sd","Sind","IN", + "sdc","Latn","IT", + "sdh","Arab","IR", + "se","Latn","NO", + "sef","Latn","CI", + "seh","Latn","MZ", + "sei","Latn","MX", + "ses","Latn","ML", + "sg","Latn","CF", + "sga","Ogam","IE", + "sgs","Latn","LT", + "sgw","Ethi","ZZ", + "sgz","Latn","ZZ", + "shi","Tfng","MA", + "shk","Latn","ZZ", + "shn","Mymr","MM", + "shu","Arab","ZZ", + "si","Sinh","LK", + "sid","Latn","ET", + "sig","Latn","ZZ", + "sil","Latn","ZZ", + "sim","Latn","ZZ", + "sjr","Latn","ZZ", + "sk","Latn","SK", + "skc","Latn","ZZ", + "skr","Arab","PK", + "sks","Latn","ZZ", + "sl","Latn","SI", + "sld","Latn","ZZ", + "sli","Latn","PL", + "sll","Latn","ZZ", + "sly","Latn","ID", + "sm","Latn","WS", + "sma","Latn","SE", + "smj","Latn","SE", + "smn","Latn","FI", + "smp","Samr","IL", + "smq","Latn","ZZ", + "sms","Latn","FI", + "sn","Latn","ZW", + "snc","Latn","ZZ", + "snk","Latn","ML", + "snp","Latn","ZZ", + "snx","Latn","ZZ", + "sny","Latn","ZZ", + "so","Latn","SO", + "sog","Sogd","UZ", + "sok","Latn","ZZ", + "soq","Latn","ZZ", + "sou","Thai","TH", + "soy","Latn","ZZ", + "spd","Latn","ZZ", + "spl","Latn","ZZ", + "sps","Latn","ZZ", + "srb","Sora","IN", + "srn","Latn","SR", + "srr","Latn","SN", + "ss","Latn","ZA", + "ssd","Latn","ZZ", + "ssg","Latn","ZZ", + "ssy","Latn","ER", + "st","Latn","ZA", + "stk","Latn","ZZ", + "stq","Latn","DE", + "su","Latn","ID", + "sua","Latn","ZZ", + "sue","Latn","ZZ", + "suk","Latn","TZ", + "sur","Latn","ZZ", + "sus","Latn","GN", + "sv","Latn","SE", + "sw","Latn","TZ", + "swb","Arab","YT", + "swc","Latn","CD", + "swg","Latn","DE", + "swp","Latn","ZZ", + "swv","Deva","IN", + "sxn","Latn","ID", + "sxw","Latn","ZZ", + "syl","Beng","BD", + "szl","Latn","PL", + "ta","Taml","IN", + "taj","Deva","NP", + "tal","Latn","ZZ", + "tan","Latn","ZZ", + "taq","Latn","ZZ", + "tbc","Latn","ZZ", + "tbd","Latn","ZZ", + "tbf","Latn","ZZ", + "tbg","Latn","ZZ", + "tbo","Latn","ZZ", + "tbw","Latn","PH", + "tbz","Latn","ZZ", + "tci","Latn","ZZ", + "tcy","Knda","IN", + "tdd","Tale","CN", + "tdg","Deva","NP", + "tdh","Deva","NP", + "te","Telu","IN", + "ted","Latn","ZZ", + "tem","Latn","SL", + "teo","Latn","UG", + "tet","Latn","TL", + "tfi","Latn","ZZ", + "tg","Cyrl","TJ", + "tg","Arab","PK", + "tgc","Latn","ZZ", + "tgo","Latn","ZZ", + "tgu","Latn","ZZ", + "th","Thai","TH", + "thl","Deva","NP", + "thq","Deva","NP", + "thr","Deva","NP", + "ti","Ethi","ET", + "tif","Latn","ZZ", + "tig","Ethi","ER", + "tik","Latn","ZZ", + "tim","Latn","ZZ", + "tio","Latn","ZZ", + "tiv","Latn","NG", + "tk","Latn","TM", + "tkl","Latn","TK", + "tkr","Latn","AZ", + "tkt","Deva","NP", + "tl","Latn","PH", + "tlf","Latn","ZZ", + "tlx","Latn","ZZ", + "tly","Latn","AZ", + "tmh","Latn","NE", + "tmy","Latn","ZZ", + "tn","Latn","ZA", + "tnh","Latn","ZZ", + "to","Latn","TO", + "tof","Latn","ZZ", + "tog","Latn","MW", + "toq","Latn","ZZ", + "tpi","Latn","PG", + "tpm","Latn","ZZ", + "tpz","Latn","ZZ", + "tqo","Latn","ZZ", + "tr","Latn","TR", + "tru","Latn","TR", + "trv","Latn","TW", + "trw","Arab","ZZ", + "ts","Latn","ZA", + "tsd","Grek","GR", + "tsf","Deva","NP", + "tsg","Latn","PH", + "tsj","Tibt","BT", + "tsw","Latn","ZZ", + "tt","Cyrl","RU", + "ttd","Latn","ZZ", + "tte","Latn","ZZ", + "ttj","Latn","UG", + "ttr","Latn","ZZ", + "tts","Thai","TH", + "ttt","Latn","AZ", + "tuh","Latn","ZZ", + "tul","Latn","ZZ", + "tum","Latn","MW", + "tuq","Latn","ZZ", + "tvl","Latn","TV", + "tvu","Latn","ZZ", + "twh","Latn","ZZ", + "twq","Latn","NE", + "txg","Tang","CN", + "ty","Latn","PF", + "tya","Latn","ZZ", + "tyv","Cyrl","RU", + "tzm","Latn","MA", + "ubu","Latn","ZZ", + "udm","Cyrl","RU", + "ug","Arab","CN", + "ug","Cyrl","KZ", + "ug","Cyrl","MN", + "uga","Ugar","SY", + "uk","Cyrl","UA", + "uli","Latn","FM", + "umb","Latn","AO", + "en","Latn","NG", + "en","Latn","AU", + "es","Latn","MX", + "sw","Latn","CD", + "en","Latn","ZA", + "es","Latn","CU", + "en","Latn","PG", + "en","Latn","GU", + "uz","Latn","UZ", + "ar","Arab","SA", + "en","Latn","GB", + "es","Latn","419", + "pap","Latn","CW", + "ar","Arab","SD", + "ar","Arab","YE", + "ca","Latn","AD", + "ar","Arab","AE", + "fa","Arab","AF", + "pt","Latn","AO", + "und","Latn","AQ", + "es","Latn","AR", + "sm","Latn","AS", + "de","Latn","AT", + "nl","Latn","AW", + "sv","Latn","AX", + "nl","Latn","BE", + "fr","Latn","BF", + "ar","Arab","BH", + "fr","Latn","BJ", + "fr","Latn","BL", + "ms","Latn","BN", + "es","Latn","BO", + "pap","Latn","BQ", + "und","Latn","BV", + "fr","Latn","CF", + "fr","Latn","CG", + "de","Latn","CH", + "fr","Latn","CI", + "es","Latn","CL", + "fr","Latn","CM", + "es","Latn","CO", + "und","Latn","CP", + "es","Latn","CR", + "pt","Latn","CV", + "el","Grek","CY", + "aa","Latn","DJ", + "es","Latn","DO", + "ar","Arab","DZ", + "es","Latn","EA", + "es","Latn","EC", + "ar","Arab","EH", + "ti","Ethi","ER", + "de","Latn","EZ", + "fr","Latn","GA", + "fr","Latn","GF", + "fr","Latn","GN", + "fr","Latn","GP", + "es","Latn","GQ", + "und","Latn","GS", + "es","Latn","GT", + "pt","Latn","GW", + "und","Latn","HM", + "es","Latn","HN", + "es","Latn","IC", + "ar","Arab","IQ", + "ar","Arab","JO", + "sw","Latn","KE", + "ar","Arab","KM", + "ko","Kore","KP", + "ar","Arab","KW", + "ru","Cyrl","KZ", + "ar","Arab","LB", + "de","Latn","LI", + "st","Latn","LS", + "fr","Latn","LU", + "ar","Arab","LY", + "ar","Arab","MA", + "fr","Latn","MC", + "ro","Latn","MD", + "fr","Latn","MF", + "fr","Latn","MQ", + "ar","Arab","MR", + "pt","Latn","MZ", + "af","Latn","NA", + "fr","Latn","NC", + "ha","Latn","NE", + "es","Latn","NI", + "ar","Arab","OM", + "es","Latn","PA", + "es","Latn","PE", + "fr","Latn","PF", + "ur","Arab","PK", + "fr","Latn","PM", + "es","Latn","PR", + "ar","Arab","PS", + "pt","Latn","PT", + "ar","Arab","QA", + "en","Latn","DG", + "fr","Latn","RE", + "fr","Latn","SC", + "nb","Latn","SJ", + "it","Latn","SM", + "fr","Latn","SN", + "nl","Latn","SR", + "pt","Latn","ST", + "es","Latn","SV", + "ar","Arab","SY", + "fr","Latn","TD", + "fr","Latn","TF", + "fr","Latn","TG", + "pt","Latn","TL", + "ar","Arab","TN", + "sw","Latn","UG", + "es","Latn","UY", + "it","Latn","VA", + "vi","Latn","VN", + "es","Latn","VE", + "fr","Latn","WF", + "sq","Latn","XK", + "fr","Latn","YT", + "lez","Aghb","RU", + "ks","Arab","GB", + "ur","Arab","IN", + "ur","Arab","MU", + "ha","Arab","NG", + "fa","Arab","TJ", + "az","Arab","TR", + "ban","Bali","ID", + "bbc","Batk","ID", + "sa","Bhks","IN", + "fr","Brai","FR", + "bug","Bugi","ID", + "bku","Buhd","PH", + "xcr","Cari","TR", + "mk","Cyrl","AL", + "sr","Cyrl","BA", + "mk","Cyrl","GR", + "uk","Cyrl","MD", + "bg","Cyrl","RO", + "uk","Cyrl","SK", + "kbd","Cyrl","TR", + "sr","Cyrl","XK", + "ne","Deva","BT", + "hif","Deva","FJ", + "bho","Deva","MU", + "doi","Dogr","IN", + "fr","Dupl","FR", + "sq","Elba","AL", + "arc","Elym","IR", + "wsg","Gong","IN", + "sa","Gran","IN", + "ko","Hang","KR", + "zh","Hani","CN", + "hnn","Hano","PH", + "yi","Hebr","CA", + "yi","Hebr","GB", + "yi","Hebr","SE", + "yi","Hebr","UA", + "yi","Hebr","US", + "ja","Hira","JP", + "hu","Hung","HU", + "ko","Jamo","KR", + "jv","Java","ID", + "ja","Kana","JP", + "bho","Kthi","IN", + "en","Latn","ET", + "tk","Latn","AF", + "ku","Latn","AM", + "za","Latn","CN", + "tr","Latn","CY", + "fr","Latn","DZ", + "ku","Latn","GE", + "tk","Latn","IR", + "fr","Latn","KM", + "fr","Latn","MA", + "sq","Latn","MK", + "pt","Latn","MO", + "fr","Latn","MR", + "fr","Latn","SY", + "fr","Latn","TN", + "pl","Latn","UA", + "xlc","Lyci","TR", + "xld","Lydi","TR", + "hi","Mahj","IN", + "mak","Maka","ID", + "xmn","Mani","CN", + "bo","Marc","CN", + "men","Mend","SL", + "xmr","Merc","SD", + "xmr","Mero","SD", + "mr","Modi","IN", + "mni","Mtei","IN", + "skr","Mult","PK", + "mnw","Mymr","TH", + "sa","Nand","IN", + "xna","Narb","SA", + "new","Newa","NP", + "zhx","Nshu","CN", + "sat","Olck","IN", + "so","Osma","SO", + "kv","Perm","RU", + "lzh","Phag","CN", + "xpr","Prti","IR", + "rej","Rjng","ID", + "rhg","Rohg","MM", + "xsa","Sarb","YE", + "sa","Shrd","IN", + "sa","Sidd","IN", + "sog","Sogo","UZ", + "su","Sund","ID", + "syl","Sylo","BD", + "tbw","Tagb","PH", + "doi","Takr","IN", + "zgh","Tfng","MA", + "fil","Tglg","PH", + "kdt","Thai","KH", + "kdt","Thai","LA", + "mai","Tirh","IN", + "vai","Vaii","LR", + "hoc","Wara","IN", + "cmg","Zanb","MN", + "unr","Beng","IN", + "unr","Deva","NP", + "unx","Beng","IN", + "uri","Latn","ZZ", + "urt","Latn","ZZ", + "urw","Latn","ZZ", + "usa","Latn","ZZ", + "utr","Latn","ZZ", + "uvh","Latn","ZZ", + "uvl","Latn","ZZ", + "uz","Arab","AF", + "uz","Cyrl","CN", + "vag","Latn","ZZ", + "van","Latn","ZZ", + "ve","Latn","ZA", + "vec","Latn","IT", + "vep","Latn","RU", + "vic","Latn","SX", + "viv","Latn","ZZ", + "vls","Latn","BE", + "vmf","Latn","DE", + "vmw","Latn","MZ", + "vo","Latn","001", + "vot","Latn","RU", + "vro","Latn","EE", + "vun","Latn","TZ", + "vut","Latn","ZZ", + "wa","Latn","BE", + "wae","Latn","CH", + "waj","Latn","ZZ", + "wal","Ethi","ET", + "wan","Latn","ZZ", + "war","Latn","PH", + "wbp","Latn","AU", + "wbq","Telu","IN", + "wbr","Deva","IN", + "wci","Latn","ZZ", + "wer","Latn","ZZ", + "wgi","Latn","ZZ", + "whg","Latn","ZZ", + "wib","Latn","ZZ", + "wiu","Latn","ZZ", + "wiv","Latn","ZZ", + "wja","Latn","ZZ", + "wji","Latn","ZZ", + "wls","Latn","WF", + "wmo","Latn","ZZ", + "wnc","Latn","ZZ", + "wni","Arab","KM", + "wnu","Latn","ZZ", + "wo","Latn","SN", + "wob","Latn","ZZ", + "wos","Latn","ZZ", + "wrs","Latn","ZZ", + "wsk","Latn","ZZ", + "wtm","Deva","IN", + "wuu","Hans","CN", + "wuv","Latn","ZZ", + "wwa","Latn","ZZ", + "xav","Latn","BR", + "xbi","Latn","ZZ", + "xes","Latn","ZZ", + "xh","Latn","ZA", + "xla","Latn","ZZ", + "xmf","Geor","GE", + "xnr","Deva","IN", + "xog","Latn","UG", + "xon","Latn","ZZ", + "xrb","Latn","ZZ", + "xsi","Latn","ZZ", + "xsm","Latn","ZZ", + "xsr","Deva","NP", + "xwe","Latn","ZZ", + "yao","Latn","MZ", + "yap","Latn","FM", + "yas","Latn","ZZ", + "yat","Latn","ZZ", + "yav","Latn","CM", + "yay","Latn","ZZ", + "yaz","Latn","ZZ", + "yba","Latn","ZZ", + "ybb","Latn","CM", + "yby","Latn","ZZ", + "yer","Latn","ZZ", + "ygr","Latn","ZZ", + "ygw","Latn","ZZ", + "yko","Latn","ZZ", + "yle","Latn","ZZ", + "ylg","Latn","ZZ", + "yll","Latn","ZZ", + "yml","Latn","ZZ", + "yo","Latn","NG", + "yon","Latn","ZZ", + "yrb","Latn","ZZ", + "yre","Latn","ZZ", + "yrl","Latn","BR", + "yss","Latn","ZZ", + "yua","Latn","MX", + "yue","Hant","HK", + "yue","Hans","CN", + "yuj","Latn","ZZ", + "yut","Latn","ZZ", + "yuw","Latn","ZZ", + "zag","Latn","SD", + "zdj","Arab","KM", + "zea","Latn","NL", + "zia","Latn","ZZ", + "zlm","Latn","TG", + "zmi","Latn","MY", + "zne","Latn","ZZ", + "zu","Latn","ZA", + } // lsrs + } // likely + match{ + trie:bin{ // BytesTrie: 1269 bytes +00186dc27f73c16e778077a25f78aaa2 +79a25e7a01e82af51165ee35117ae820 +012a854811616e01f398f41248616e01 +f347f42007b314b32ab426b536b605b3 +c45ab329b429b62910b5292a2bb026b1 +22b205b3c446b329b429b62905b3c43c +b329b429b6291348616ef43f12ef66f2 +3501e92aef1165ee351165ee34012a85 +48166562f24c6174ee35736074a29275 +02e74af2a275fa1172f534012a854c16 +6174ee437972ec35117ae8350bef30f4 +0af428f5a45ef71165ee35ef72f16ef2 +107301e829f220022a8543344c166174 +ee437972ec2b167972ec4c6174ee2be8 +24e82ee93eee1165ee350262a68a6822 +7310f2291165ee34012a855316696ee8 +4c6174ee35734ae1a62ee41165ee3401 +2a8541167261e24c6174ee3512f961e1 +2909e921e93ceba82aeea41aefa417f4 +1172f5351165ee34012a8545167468e9 +4c6174ee356ca86a75a8a1e12ee54ce7 +1172f5351165ee34012a855416616dec +4c6174ee351165ee34012a855416656c +f54c6174ee35705d704671a2537202ed +30eea2c2f71166f2351164e5350363a8 +5be156f374f41170f4a28007b3c217b3 +a61bb4a618b5a615b601b129b6291165 +ee34012a8547167572f54c6174ee3511 +65ee34012a8541167261e24c6174ee35 +12f565f3356d5a6ea2626f02e3a800ed +a264f21165ee34012a854f167279e14c +6174ee3509ee21eea2d7f2a43bf34af4 +a244f91165ee34012a854d16796df24c +6174ee351169e4356652e7a6c2e96eeb +4eec1165ee34012a854d166c79ed4c61 +74ee3512e565ee351162e73506e52ae5 +32ee50ef5af91165ee351165ee34012a +8544166576e14c6174ee35106e01e235 +ef350164446e10e223733e7944e20164 +2c6e01ee35ef2310e13112ef65ee3512 +ee65ee3567c0f46a946a8e6b946c06e2 +1ee250e7a292eea646ef1165ee34012a +854c16616fef4c6174ee351164e5296f +2e7534e11169f43512fa65ee3512e166 +f23512f669e43507ed31ed32ee50f56e +f91172f5351165ee34012a854b16686d +f24c6174ee351165ee34012a854b166e +64e14c6174ee351174f2357234e13ae7 +a4ddeb1172f53512e965ee351165ee34 +012a854716656ff24c6174ee35673a68 +7c6902e126e722f31165ee3506e40de4 +40eca26aeea267f51168e935612e7334 +e11165ee3512e165ee3512f764e52904 +6154e1a28cf254f4a485f91172f53401 +2a854116726dee437972ec3512f765ee +350162287310e82910f329649e643e65 +4a6601ef2af9116eec351164e13511e1 +6e01e231ef3105f343f32ef496f51165 +f3351165f3a28007b30cb342b43eb53a +b601b129b6292a2bb026b14eb204b029 +b229b329b429b52904b029b229b329b4 +29b52901b129b6291166e935e526ee2a +ef1165ee351165eea28007b317b362b4 +30b55ab601b229b62904b027b127b327 +b427b5272a2bb030b12cb201b229b629 +04b029b129b329b427b529617662a29c +6305720f7230efa2b6f91165ee3512f3 +66f235653068386b12e261f23513e266 +69ec3512f265ee3507eb5beb4ef252f9 +a250fa1172f534012a854c166174ee43 +7972ec351165ee351161f2a28007b313 +b33cb426b522b605b422b429b529b629 +10b3292a2bb026b122b205b40fb429b5 +29b62905b406b429b529b629b029b129 +b2291165f3356334e13ae2a244e6116e +ec3512e865ee35127373f92905ee24ee +3af258f30168287310e82910f2291165 +ee34012a854216656ee74c6174ee3511 +66f235652ee534e81168e93512ed65ee +351172f535 + } // trie + regionToPartitions:bin{ // 1677 bytes +000008090a00020000000b0009000201 +090001010a000a000000000000000a0c +00000001010000000100000000000000 +000000000000010100000b0000000b00 +00000000000000000000000000000000 +00000000000000000000000000000000 +00000000000000000000000000000000 +00000000000000000000000000000000 +0000000000000000000000000000000c +010001000000000d0100000dade00 +00000007000000000002010100020002 +00020007000000000100000000000001 +00000000000000000000010000000000 +00000000000000010000000000000000 +00000000000000000001000000000000 +00000000000000000000000001010000 +00000000000000000000000001010000 +00000001000000000000000000000001 +00000000010000000001000000 + } // regionToPartitions + partitions{".","0","1","2","3","4","5","6","0123456","03","16","02","05","04","012346"} + paradigms{ + "pt","Latn","BR", + "pt","Latn","PT", + "es","Latn","ES", + "es","Latn","419", + "en","Latn","US", + "en","Latn","GB", + } + distances:intvector{80,50,4,3} + } // match +} diff --git a/icu4c/source/tools/genrb/genrb.cpp b/icu4c/source/tools/genrb/genrb.cpp index 885f3039bf6..120db6844f2 100644 --- a/icu4c/source/tools/genrb/genrb.cpp +++ b/icu4c/source/tools/genrb/genrb.cpp @@ -205,10 +205,10 @@ main(int argc, "\t-c or --copyright include copyright notice\n"); fprintf(stderr, "\t-e or --encoding encoding of source files\n" - "\t-d of --destdir destination directory, followed by the path, defaults to %s\n" - "\t-s or --sourcedir source directory for files followed by path, defaults to %s\n" + "\t-d or --destdir destination directory, followed by the path, defaults to '%s'\n" + "\t-s or --sourcedir source directory for files followed by path, defaults to '%s'\n" "\t-i or --icudatadir directory for locating any needed intermediate data files,\n" - "\t followed by path, defaults to %s\n", + "\t followed by path, defaults to '%s'\n", u_getDataDirectory(), u_getDataDirectory(), u_getDataDirectory()); fprintf(stderr, "\t-j or --write-java write a Java ListResourceBundle for ICU4J, followed by optional encoding\n" diff --git a/icu4c/source/tools/genrb/parse.cpp b/icu4c/source/tools/genrb/parse.cpp index 884d5d56660..18a8c76dbc5 100644 --- a/icu4c/source/tools/genrb/parse.cpp +++ b/icu4c/source/tools/genrb/parse.cpp @@ -274,11 +274,11 @@ expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenV } } -static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status) +static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, + int32_t &stringLength, UErrorCode *status) { struct UString *tokenValue; char *result; - uint32_t count; expect(state, TOK_STRING, &tokenValue, comment, line, status); @@ -287,14 +287,13 @@ static char *getInvariantString(ParseState* state, uint32_t *line, struct UStrin return NULL; } - count = u_strlen(tokenValue->fChars); - if(!uprv_isInvariantUString(tokenValue->fChars, count)) { + if(!uprv_isInvariantUString(tokenValue->fChars, tokenValue->fLength)) { *status = U_INVALID_FORMAT_ERROR; error(*line, "invariant characters required for table keys, binary data, etc."); return NULL; } - result = static_cast(uprv_malloc(count+1)); + result = static_cast(uprv_malloc(tokenValue->fLength+1)); if (result == NULL) { @@ -302,7 +301,8 @@ static char *getInvariantString(ParseState* state, uint32_t *line, struct UStrin return NULL; } - u_UCharsToChars(tokenValue->fChars, result, count+1); + u_UCharsToChars(tokenValue->fChars, result, tokenValue->fLength+1); + stringLength = tokenValue->fLength; return result; } @@ -1371,7 +1371,6 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US int32_t value; UBool readToken = FALSE; char *stopstring; - uint32_t len; struct UString memberComments; IntVectorResource *result = intvector_open(state->bundle, tag, comment, status); @@ -1404,7 +1403,8 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US return result; } - string = getInvariantString(state, NULL, NULL, status); + int32_t stringLength; + string = getInvariantString(state, NULL, NULL, stringLength, status); if (U_FAILURE(*status)) { @@ -1414,9 +1414,9 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US /* For handling illegal char in the Intvector */ value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/ - len=(uint32_t)(stopstring-string); + int32_t len = (int32_t)(stopstring-string); - if(len==uprv_strlen(string)) + if(len==stringLength) { result->add(value, *status); uprv_free(string); @@ -1454,7 +1454,8 @@ static struct SResource * parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) { uint32_t line; - LocalMemory string(getInvariantString(state, &line, NULL, status)); + int32_t stringLength; + LocalMemory string(getInvariantString(state, &line, NULL, stringLength, status)); if (string.isNull() || U_FAILURE(*status)) { return NULL; @@ -1470,46 +1471,45 @@ parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UStri printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); } - uint32_t count = (uint32_t)uprv_strlen(string.getAlias()); - if (count > 0){ - if((count % 2)==0){ - LocalMemory value; - if (value.allocateInsteadAndCopy(count) == NULL) - { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - char toConv[3] = {'\0', '\0', '\0'}; - for (uint32_t i = 0; i < count; i += 2) - { - toConv[0] = string[i]; - toConv[1] = string[i + 1]; + LocalMemory value; + int32_t count = 0; + if (stringLength > 0 && value.allocateInsteadAndCopy(stringLength) == NULL) + { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } - char *stopstring; - value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16); - uint32_t len=(uint32_t)(stopstring-toConv); + char toConv[3] = {'\0', '\0', '\0'}; + for (int32_t i = 0; i < stringLength;) + { + // Skip spaces (which may have been line endings). + char c0 = string[i++]; + if (c0 == ' ') { continue; } + if (i == stringLength) { + *status=U_INVALID_CHAR_FOUND; + error(line, "Encountered invalid binary value (odd number of hex digits)"); + return NULL; + } + toConv[0] = c0; + toConv[1] = string[i++]; - if(len!=2) - { - *status=U_INVALID_CHAR_FOUND; - return NULL; - } - } + char *stopstring; + value[count++] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16); + uint32_t len=(uint32_t)(stopstring-toConv); - return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status); - } - else + if(len!=2) { - *status = U_INVALID_CHAR_FOUND; - error(line, "Encountered invalid binary value (length is odd)"); + *status=U_INVALID_CHAR_FOUND; + error(line, "Encountered invalid binary value (not all pairs of hex digits)"); return NULL; } } - else - { + + if (count == 0) { warning(startline, "Encountered empty binary value"); return bin_open(state->bundle, tag, 0, NULL, "", comment, status); + } else { + return bin_open(state->bundle, tag, count, value.getAlias(), NULL, comment, status); } } @@ -1520,9 +1520,9 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr int32_t value; char *string; char *stopstring; - uint32_t len; - string = getInvariantString(state, NULL, NULL, status); + int32_t stringLength; + string = getInvariantString(state, NULL, NULL, stringLength, status); if (string == NULL || U_FAILURE(*status)) { @@ -1541,7 +1541,7 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); } - if (uprv_strlen(string) <= 0) + if (stringLength == 0) { warning(startline, "Encountered empty integer. Default value is 0."); } @@ -1549,8 +1549,8 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr /* Allow integer support for hexdecimal, octal digit and decimal*/ /* and handle illegal char in the integer*/ value = uprv_strtoul(string, &stopstring, 0); - len=(uint32_t)(stopstring-string); - if(len==uprv_strlen(string)) + int32_t len = (int32_t)(stopstring-string); + if(len==stringLength) { result = int_open(state->bundle, tag, value, comment, status); } @@ -1567,7 +1567,8 @@ static struct SResource * parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) { uint32_t line; - LocalMemory filename(getInvariantString(state, &line, NULL, status)); + int32_t stringLength; + LocalMemory filename(getInvariantString(state, &line, NULL, stringLength, status)); if (U_FAILURE(*status)) { return NULL; @@ -1628,12 +1629,11 @@ parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UStr UCHARBUF *ucbuf; char *fullname = NULL; - int32_t count = 0; const char* cp = NULL; const UChar* uBuffer = NULL; - filename = getInvariantString(state, &line, NULL, status); - count = (int32_t)uprv_strlen(filename); + int32_t stringLength; + filename = getInvariantString(state, &line, NULL, stringLength, status); if (U_FAILURE(*status)) { @@ -1652,7 +1652,7 @@ parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UStr printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); } - fullname = (char *) uprv_malloc(state->inputdirLength + count + 2); + fullname = (char *) uprv_malloc(state->inputdirLength + stringLength + 2); /* test for NULL */ if(fullname == NULL) { diff --git a/icu4j/build.xml b/icu4j/build.xml index 781a6d0229e..a8dfaa5c409 100644 --- a/icu4j/build.xml +++ b/icu4j/build.xml @@ -368,6 +368,7 @@ + @@ -1201,7 +1202,7 @@ - + @@ -1249,7 +1250,7 @@ - + diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java index 317f5444ebc..d1dc775d183 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LSR.java @@ -4,18 +4,18 @@ package com.ibm.icu.impl.locale; import java.util.Objects; -final class LSR { - static final int REGION_INDEX_LIMIT = 1001 + 26 * 26; +public final class LSR { + public static final int REGION_INDEX_LIMIT = 1001 + 26 * 26; - static final boolean DEBUG_OUTPUT = false; + public static final boolean DEBUG_OUTPUT = false; - final String language; - final String script; - final String region; + public final String language; + public final String script; + public final String region; /** Index for region, negative if ill-formed. @see indexForRegion */ final int regionIndex; - LSR(String language, String script, String region) { + public LSR(String language, String script, String region) { this.language = language; this.script = script; this.region = region; @@ -27,7 +27,7 @@ final class LSR { * Do not rely on a particular region->index mapping; it may change. * Returns 0 for ill-formed strings. */ - static final int indexForRegion(String region) { + public static final int indexForRegion(String region) { if (region.length() == 2) { int a = region.charAt(0) - 'A'; if (a < 0 || 25 < a) { return 0; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java index 56735a8b5cd..8fe0fe8042a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java @@ -2,12 +2,20 @@ // License & terms of use: http://www.unicode.org/copyright.html#License package com.ibm.icu.impl.locale; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; import java.util.Map; +import java.util.MissingResourceException; import java.util.Set; import java.util.TreeMap; -import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag; +import com.ibm.icu.impl.ICUData; +import com.ibm.icu.impl.ICUResourceBundle; +import com.ibm.icu.impl.UResource; import com.ibm.icu.util.BytesTrie; +import com.ibm.icu.util.LocaleMatcher.FavorSubtag; import com.ibm.icu.util.ULocale; /** @@ -16,17 +24,17 @@ import com.ibm.icu.util.ULocale; */ public class LocaleDistance { /** Distance value bit flag, set by the builder. */ - static final int DISTANCE_SKIP_SCRIPT = 0x80; + public static final int DISTANCE_SKIP_SCRIPT = 0x80; /** Distance value bit flag, set by trieNext(). */ private static final int DISTANCE_IS_FINAL = 0x100; private static final int DISTANCE_IS_FINAL_OR_SKIP_SCRIPT = DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT; // Indexes into array of distances. - static final int IX_DEF_LANG_DISTANCE = 0; - static final int IX_DEF_SCRIPT_DISTANCE = 1; - static final int IX_DEF_REGION_DISTANCE = 2; - static final int IX_MIN_REGION_DISTANCE = 3; - static final int IX_LIMIT = 4; + public static final int IX_DEF_LANG_DISTANCE = 0; + public static final int IX_DEF_SCRIPT_DISTANCE = 1; + public static final int IX_DEF_REGION_DISTANCE = 2; + public static final int IX_MIN_REGION_DISTANCE = 3; + public static final int IX_LIMIT = 4; private static final int ABOVE_THRESHOLD = 100; private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT; @@ -54,22 +62,100 @@ public class LocaleDistance { private final int minRegionDistance; private final int defaultDemotionPerDesiredLocale; - // TODO: Load prebuilt data from a resource bundle - // to avoid the dependency on the builder code. // VisibleForTesting - public static final LocaleDistance INSTANCE = LocaleDistanceBuilder.build(); - - LocaleDistance(BytesTrie trie, - byte[] regionToPartitionsIndex, String[] partitionArrays, - Set paradigmLSRs, int[] distances) { - this.trie = trie; - this.regionToPartitionsIndex = regionToPartitionsIndex; - this.partitionArrays = partitionArrays; - this.paradigmLSRs = paradigmLSRs; - defaultLanguageDistance = distances[IX_DEF_LANG_DISTANCE]; - defaultScriptDistance = distances[IX_DEF_SCRIPT_DISTANCE]; - defaultRegionDistance = distances[IX_DEF_REGION_DISTANCE]; - this.minRegionDistance = distances[IX_MIN_REGION_DISTANCE]; + public static final class Data { + public byte[] trie; + public byte[] regionToPartitionsIndex; + public String[] partitionArrays; + public Set paradigmLSRs; + public int[] distances; + + public Data(byte[] trie, + byte[] regionToPartitionsIndex, String[] partitionArrays, + Set paradigmLSRs, int[] distances) { + this.trie = trie; + this.regionToPartitionsIndex = regionToPartitionsIndex; + this.partitionArrays = partitionArrays; + this.paradigmLSRs = paradigmLSRs; + this.distances = distances; + } + + private static UResource.Value getValue(UResource.Table table, + String key, UResource.Value value) { + if (!table.findValue(key, value)) { + throw new MissingResourceException( + "langInfo.res missing data", "", "match/" + key); + } + return value; + } + + // VisibleForTesting + public static Data load() throws MissingResourceException { + ICUResourceBundle langInfo = ICUResourceBundle.getBundleInstance( + ICUData.ICU_BASE_NAME, "langInfo", + ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT); + UResource.Value value = langInfo.getValueWithFallback("match"); + UResource.Table matchTable = value.getTable(); + + ByteBuffer buffer = getValue(matchTable, "trie", value).getBinary(); + byte[] trie = new byte[buffer.remaining()]; + buffer.get(trie); + + buffer = getValue(matchTable, "regionToPartitions", value).getBinary(); + byte[] regionToPartitions = new byte[buffer.remaining()]; + buffer.get(regionToPartitions); + if (regionToPartitions.length < LSR.REGION_INDEX_LIMIT) { + throw new MissingResourceException( + "langInfo.res binary data too short", "", "match/regionToPartitions"); + } + + String[] partitions = getValue(matchTable, "partitions", value).getStringArray(); + + Set paradigmLSRs; + if (matchTable.findValue("paradigms", value)) { + String[] paradigms = value.getStringArray(); + paradigmLSRs = new HashSet<>(paradigms.length / 3); + for (int i = 0; i < paradigms.length; i += 3) { + paradigmLSRs.add(new LSR(paradigms[i], paradigms[i + 1], paradigms[i + 2])); + } + } else { + paradigmLSRs = Collections.emptySet(); + } + + int[] distances = getValue(matchTable, "distances", value).getIntVector(); + if (distances.length < IX_LIMIT) { + throw new MissingResourceException( + "langInfo.res intvector too short", "", "match/distances"); + } + + return new Data(trie, regionToPartitions, partitions, paradigmLSRs, distances); + } + + @Override + public boolean equals(Object other) { + if (this == other) { return true; } + if (!getClass().equals(other.getClass())) { return false; } + Data od = (Data)other; + return Arrays.equals(trie, od.trie) && + Arrays.equals(regionToPartitionsIndex, od.regionToPartitionsIndex) && + Arrays.equals(partitionArrays, od.partitionArrays) && + paradigmLSRs.equals(od.paradigmLSRs) && + Arrays.equals(distances, od.distances); + } + } + + // VisibleForTesting + public static final LocaleDistance INSTANCE = new LocaleDistance(Data.load()); + + private LocaleDistance(Data data) { + this.trie = new BytesTrie(data.trie, 0); + this.regionToPartitionsIndex = data.regionToPartitionsIndex; + this.partitionArrays = data.partitionArrays; + this.paradigmLSRs = data.paradigmLSRs; + defaultLanguageDistance = data.distances[IX_DEF_LANG_DISTANCE]; + defaultScriptDistance = data.distances[IX_DEF_SCRIPT_DISTANCE]; + defaultRegionDistance = data.distances[IX_DEF_REGION_DISTANCE]; + this.minRegionDistance = data.distances[IX_MIN_REGION_DISTANCE]; LSR en = new LSR("en", "Latn", "US"); LSR enGB = new LSR("en", "Latn", "GB"); @@ -102,7 +188,7 @@ public class LocaleDistance { * (negative if none has a distance below the threshold), * and its distance (0..ABOVE_THRESHOLD) in bits 7..0. */ - int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs, + public int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs, int threshold, FavorSubtag favorSubtag) { BytesTrie iter = new BytesTrie(trie); // Look up the desired language only once for all supported LSRs. @@ -335,7 +421,7 @@ public class LocaleDistance { return partitionArrays[pIndex]; } - boolean isParadigmLSR(LSR lsr) { + public boolean isParadigmLSR(LSR lsr) { return paradigmLSRs.contains(lsr); } @@ -348,7 +434,7 @@ public class LocaleDistance { return defaultRegionDistance; } - int getDefaultDemotionPerDesiredLocale() { + public int getDefaultDemotionPerDesiredLocale() { return defaultDemotionPerDesiredLocale; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java index 0873b6d3241..3b63705e6a3 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java @@ -2,10 +2,18 @@ // License & terms of use: http://www.unicode.org/copyright.html#License package com.ibm.icu.impl.locale; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; import java.util.Locale; import java.util.Map; +import java.util.MissingResourceException; import java.util.TreeMap; +import com.ibm.icu.impl.ICUData; +import com.ibm.icu.impl.ICUResourceBundle; +import com.ibm.icu.impl.UResource; import com.ibm.icu.util.BytesTrie; import com.ibm.icu.util.ULocale; @@ -14,30 +22,93 @@ public final class XLikelySubtags { private static final String PSEUDO_BIDI_PREFIX = "+"; // -XB, -PSBIDI private static final String PSEUDO_CRACKED_PREFIX = ","; // -XC, -PSCRACK - static final int SKIP_SCRIPT = 1; + public static final int SKIP_SCRIPT = 1; private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT; - // TODO: Load prebuilt data from a resource bundle - // to avoid the dependency on the builder code. // VisibleForTesting - public static final XLikelySubtags INSTANCE = new XLikelySubtags(LikelySubtagsBuilder.build()); - - static final class Data { - private final Map languageAliases; - private final Map regionAliases; - private final BytesTrie trie; - private final LSR[] lsrs; - - Data(Map languageAliases, Map regionAliases, - BytesTrie trie, LSR[] lsrs) { + public static final class Data { + public final Map languageAliases; + public final Map regionAliases; + public final byte[] trie; + public final LSR[] lsrs; + + public Data(Map languageAliases, Map regionAliases, + byte[] trie, LSR[] lsrs) { this.languageAliases = languageAliases; this.regionAliases = regionAliases; this.trie = trie; this.lsrs = lsrs; } + + private static UResource.Value getValue(UResource.Table table, + String key, UResource.Value value) { + if (!table.findValue(key, value)) { + throw new MissingResourceException( + "langInfo.res missing data", "", "likely/" + key); + } + return value; + } + + // VisibleForTesting + public static Data load() throws MissingResourceException { + ICUResourceBundle langInfo = ICUResourceBundle.getBundleInstance( + ICUData.ICU_BASE_NAME, "langInfo", + ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT); + UResource.Value value = langInfo.getValueWithFallback("likely"); + UResource.Table likelyTable = value.getTable(); + + Map languageAliases; + if (likelyTable.findValue("languageAliases", value)) { + String[] pairs = value.getStringArray(); + languageAliases = new HashMap<>(pairs.length / 2); + for (int i = 0; i < pairs.length; i += 2) { + languageAliases.put(pairs[i], pairs[i + 1]); + } + } else { + languageAliases = Collections.emptyMap(); + } + + Map regionAliases; + if (likelyTable.findValue("regionAliases", value)) { + String[] pairs = value.getStringArray(); + regionAliases = new HashMap<>(pairs.length / 2); + for (int i = 0; i < pairs.length; i += 2) { + regionAliases.put(pairs[i], pairs[i + 1]); + } + } else { + regionAliases = Collections.emptyMap(); + } + + ByteBuffer buffer = getValue(likelyTable, "trie", value).getBinary(); + byte[] trie = new byte[buffer.remaining()]; + buffer.get(trie); + + String[] lsrSubtags = getValue(likelyTable, "lsrs", value).getStringArray(); + LSR[] lsrs = new LSR[lsrSubtags.length / 3]; + for (int i = 0, j = 0; i < lsrSubtags.length; i += 3, ++j) { + lsrs[j] = new LSR(lsrSubtags[i], lsrSubtags[i + 1], lsrSubtags[i + 2]); + } + + return new Data(languageAliases, regionAliases, trie, lsrs); + } + + @Override + public boolean equals(Object other) { + if (this == other) { return true; } + if (!getClass().equals(other.getClass())) { return false; } + Data od = (Data)other; + return + languageAliases.equals(od.languageAliases) && + regionAliases.equals(od.regionAliases) && + Arrays.equals(trie, od.trie) && + Arrays.equals(lsrs, od.lsrs); + } } + // VisibleForTesting + public static final XLikelySubtags INSTANCE = new XLikelySubtags(Data.load()); + private final Map languageAliases; private final Map regionAliases; @@ -54,7 +125,7 @@ public final class XLikelySubtags { private XLikelySubtags(XLikelySubtags.Data data) { languageAliases = data.languageAliases; regionAliases = data.regionAliases; - trie = data.trie; + trie = new BytesTrie(data.trie, 0); lsrs = data.lsrs; // Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**"). @@ -85,6 +156,23 @@ public final class XLikelySubtags { } } + /** + * Implementation of LocaleMatcher.canonicalize(ULocale). + */ + public ULocale canonicalize(ULocale locale) { + String lang = locale.getLanguage(); + String lang2 = languageAliases.get(lang); + String region = locale.getCountry(); + String region2 = regionAliases.get(region); + if (lang2 != null || region2 != null) { + return new ULocale( + lang2 == null ? lang : lang2, + locale.getScript(), + region2 == null ? region : region2); + } + return locale; + } + private static String getCanonical(Map aliases, String alias) { String canonical = aliases.get(alias); return canonical == null ? alias : canonical; @@ -101,7 +189,7 @@ public final class XLikelySubtags { locale.getVariant()); } - LSR makeMaximizedLsrFrom(Locale locale) { + public LSR makeMaximizedLsrFrom(Locale locale) { String tag = locale.toLanguageTag(); if (tag.startsWith("x-")) { // Private use language tag x-subtag-subtag... diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java deleted file mode 100644 index f7ffeb22e77..00000000000 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLocaleMatcher.java +++ /dev/null @@ -1,900 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html#License -package com.ibm.icu.impl.locale; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Objects; - -import com.ibm.icu.util.LocalePriorityList; -import com.ibm.icu.util.ULocale; - -/** - * Immutable class that picks the best match between a user's desired locales and - * and application's supported locales. - * - *

If there are multiple supported locales with the same (language, script, region) - * likely subtags, then the current implementation returns the first of those locales. - * It ignores variant subtags (except for pseudolocale variants) and extensions. - * This may change in future versions. - * - *

For example, the current implementation does not distinguish between - * de, de-DE, de-Latn, de-1901, de-u-co-phonebk. - * - *

If you prefer one equivalent locale over another, then provide only the preferred one, - * or place it earlier in the list of supported locales. - * - *

Otherwise, the order of supported locales may have no effect on the best-match results. - * The current implementation compares each desired locale with supported locales - * in the following order: - * 1. Default locale, if supported; - * 2. CLDR "paradigm locales" like en-GB and es-419; - * 3. other supported locales. - * This may change in future versions. - * - *

TODO: Migration notes. - * - * @author markdavis - */ -public final class XLocaleMatcher { - private static final LSR UND_LSR = new LSR("und","",""); - private static final ULocale UND_ULOCALE = new ULocale("und"); - private static final Locale UND_LOCALE = new Locale("und"); - - // Activates debugging output to stderr with details of GetBestMatch. - private static final boolean TRACE_MATCHER = false; - - private static abstract class LsrIterator implements Iterator { - int bestDesiredIndex = -1; - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - - public abstract void rememberCurrent(int desiredIndex); - } - - /** - * Builder option for whether the language subtag or the script subtag is most important. - * - * @see Builder#setFavorSubtag(FavorSubtag) - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public enum FavorSubtag { - /** - * Language differences are most important, then script differences, then region differences. - * (This is the default behavior.) - * - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - LANGUAGE, - /** - * Makes script differences matter relatively more than language differences. - * - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - SCRIPT - } - - /** - * Builder option for whether all desired locales are treated equally or - * earlier ones are preferred. - * - * @see Builder#setDemotionPerDesiredLocale(Demotion) - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public enum Demotion { - /** - * All desired locales are treated equally. - * - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - NONE, - /** - * Earlier desired locales are preferred. - * - *

From each desired locale to the next, - * the distance to any supported locale is increased by an additional amount - * which is at least as large as most region mismatches. - * A later desired locale has to have a better match with some supported locale - * due to more than merely having the same region subtag. - * - *

For example: Supported={en, sv} desired=[en-GB, sv] - * yields Result(en-GB, en) because - * with the demotion of sv its perfect match is no better than - * the region distance between the earlier desired locale en-GB and en=en-US. - * - *

Notes: - *

    - *
  • In some cases, language and/or script differences can be as small as - * the typical region difference. (Example: sr-Latn vs. sr-Cyrl) - *
  • It is possible for certain region differences to be larger than usual, - * and larger than the demotion. - * (As of CLDR 35 there is no such case, but - * this is possible in future versions of the data.) - *
- * - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - REGION - } - - /** - * Data for the best-matching pair of a desired and a supported locale. - * - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public static final class Result { - private final ULocale desiredULocale; - private final ULocale supportedULocale; - private final Locale desiredLocale; - private final Locale supportedLocale; - private final int desiredIndex; - private final int supportedIndex; - - private Result(ULocale udesired, ULocale usupported, - Locale desired, Locale supported, - int desIndex, int suppIndex) { - desiredULocale = udesired; - supportedULocale = usupported; - desiredLocale = desired; - supportedLocale = supported; - desiredIndex = desIndex; - supportedIndex = suppIndex; - } - - /** - * Returns the best-matching desired locale. - * null if the list of desired locales is empty or if none matched well enough. - * - * @return the best-matching desired locale, or null. - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public ULocale getDesiredULocale() { - return desiredULocale == null && desiredLocale != null ? - ULocale.forLocale(desiredLocale) : desiredULocale; - } - /** - * Returns the best-matching desired locale. - * null if the list of desired locales is empty or if none matched well enough. - * - * @return the best-matching desired locale, or null. - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public Locale getDesiredLocale() { - return desiredLocale == null && desiredULocale != null ? - desiredULocale.toLocale() : desiredLocale; - } - - /** - * Returns the best-matching supported locale. - * If none matched well enough, this is the default locale. - * The default locale is null if the list of supported locales is empty and - * no explicit default locale is set. - * - * @return the best-matching supported locale, or null. - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public ULocale getSupportedULocale() { return supportedULocale; } - /** - * Returns the best-matching supported locale. - * If none matched well enough, this is the default locale. - * The default locale is null if the list of supported locales is empty and - * no explicit default locale is set. - * - * @return the best-matching supported locale, or null. - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public Locale getSupportedLocale() { return supportedLocale; } - - /** - * Returns the index of the best-matching desired locale in the input Iterable order. - * -1 if the list of desired locales is empty or if none matched well enough. - * - * @return the index of the best-matching desired locale, or -1. - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public int getDesiredIndex() { return desiredIndex; } - - /** - * Returns the index of the best-matching supported locale in the constructor’s or builder’s input order - * (“set” Collection plus “added” locales). - * If the matcher was built from a locale list string, then the iteration order is that - * of a LocalePriorityList built from the same string. - * -1 if the list of supported locales is empty or if none matched well enough. - * - * @return the index of the best-matching supported locale, or -1. - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public int getSupportedIndex() { return supportedIndex; } - - /** - * Takes the best-matching supported locale and adds relevant fields of the - * best-matching desired locale, such as the -t- and -u- extensions. - * May replace some fields of the supported locale. - * The result is the locale that should be used for date and number formatting, collation, etc. - * - *

Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn - * - * @return the service locale, combining the best-matching desired and supported locales. - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public ULocale makeServiceULocale() { - ULocale bestDesired = getDesiredULocale(); - ULocale serviceLocale = supportedULocale; - if (!serviceLocale.equals(bestDesired) && bestDesired != null) { - ULocale.Builder b = new ULocale.Builder().setLocale(serviceLocale); - - // Copy the region from bestDesired, if there is one. - // TODO: Seems wrong to clobber serviceLocale.getCountry() if that is not empty. - String region = bestDesired.getCountry(); - if (!region.isEmpty()) { - b.setRegion(region); - } - - // Copy the variants from bestDesired, if there are any. - // Note that this will override any serviceLocale variants. - // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster). - // TODO: Why replace? Why not append? - String variants = bestDesired.getVariant(); - if (!variants.isEmpty()) { - b.setVariant(variants); - } - - // Copy the extensions from bestDesired, if there are any. - // Note that this will override any serviceLocale extensions. - // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native" - // (replacing calendar). - // TODO: Maybe enumerate -u- keys to not replace others in the serviceLocale?? - // (Unsure about this one.) - for (char extensionKey : bestDesired.getExtensionKeys()) { - b.setExtension(extensionKey, bestDesired.getExtension(extensionKey)); - } - serviceLocale = b.build(); - } - return serviceLocale; - } - - /** - * Takes the best-matching supported locale and adds relevant fields of the - * best-matching desired locale, such as the -t- and -u- extensions. - * May replace some fields of the supported locale. - * The result is the locale that should be used for date and number formatting, collation, etc. - * - *

Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn - * - * @return the service locale, combining the best-matching desired and supported locales. - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public Locale makeServiceLocale() { - return makeServiceULocale().toLocale(); - } - } - - private final int thresholdDistance; - private final int demotionPerDesiredLocale; - private final FavorSubtag favorSubtag; - - // These are in input order. - private final ULocale[] supportedULocales; - private final Locale[] supportedLocales; - // These are in preference order: 1. Default locale 2. paradigm locales 3. others. - private final Map supportedLsrToIndex; - // Array versions of the supportedLsrToIndex keys and values. - // The distance lookup loops over the supportedLsrs and returns the index of the best match. - private final LSR[] supportedLsrs; - private final int[] supportedIndexes; - private final ULocale defaultULocale; - private final Locale defaultLocale; - private final int defaultLocaleIndex; - - /** - * LocaleMatcher Builder. - * - * @see XLocaleMatcher#builder() - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public static class Builder { - private List supportedLocales; - private int thresholdDistance = -1; - private Demotion demotion; - private ULocale defaultLocale; - private FavorSubtag favor; - - /** - * Parses the string like {@link LocalePriorityList} does and - * sets the supported locales accordingly. - * Clears any previously set/added supported locales first. - * - * @param locales the languagePriorityList to set - * @return this Builder object - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public Builder setSupportedLocales(String locales) { - return setSupportedULocales(LocalePriorityList.add(locales).build().getULocales()); - } - - /** - * Copies the supported locales, preserving iteration order. - * Clears any previously set/added supported locales first. - * Duplicates are allowed, and are not removed. - * - * @param locales the list of locale - * @return this Builder object - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public Builder setSupportedULocales(Collection locales) { - supportedLocales = new ArrayList<>(locales); - return this; - } - - /** - * Copies the supported locales, preserving iteration order. - * Clears any previously set/added supported locales first. - * Duplicates are allowed, and are not removed. - * - * @param locales the list of locale - * @return this Builder object - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public Builder setSupportedLocales(Collection locales) { - supportedLocales = new ArrayList<>(locales.size()); - for (Locale locale : locales) { - supportedLocales.add(ULocale.forLocale(locale)); - } - return this; - } - - /** - * Adds another supported locale. - * Duplicates are allowed, and are not removed. - * - * @param locale the list of locale - * @return this Builder object - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public Builder addSupportedULocale(ULocale locale) { - if (supportedLocales == null) { - supportedLocales = new ArrayList<>(); - } - supportedLocales.add(locale); - return this; - } - - /** - * Adds another supported locale. - * Duplicates are allowed, and are not removed. - * - * @param locale the list of locale - * @return this Builder object - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public Builder addSupportedLocale(Locale locale) { - return addSupportedULocale(ULocale.forLocale(locale)); - } - - /** - * Sets the default locale; if null, or if it is not set explicitly, - * then the first supported locale is used as the default locale. - * - * @param defaultLocale the default locale - * @return this Builder object - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public Builder setDefaultULocale(ULocale defaultLocale) { - this.defaultLocale = defaultLocale; - return this; - } - - /** - * Sets the default locale; if null, or if it is not set explicitly, - * then the first supported locale is used as the default locale. - * - * @param defaultLocale the default locale - * @return this Builder object - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public Builder setDefaultLocale(Locale defaultLocale) { - this.defaultLocale = ULocale.forLocale(defaultLocale); - return this; - } - - /** - * If SCRIPT, then the language differences are smaller than script differences. - * This is used in situations (such as maps) where - * it is better to fall back to the same script than a similar language. - * - * @param subtag the subtag to favor - * @return this Builder object - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public Builder setFavorSubtag(FavorSubtag subtag) { - this.favor = subtag; - return this; - } - - /** - * Option for whether all desired locales are treated equally or - * earlier ones are preferred (this is the default). - * - * @param demotion the demotion per desired locale to set. - * @return this Builder object - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public Builder setDemotionPerDesiredLocale(Demotion demotion) { - this.demotion = demotion; - return this; - } - - /** - * Internal only! - * - * @param thresholdDistance the thresholdDistance to set, with -1 = default - * @return this Builder object - * @internal - * @deprecated This API is ICU internal only. - */ - @Deprecated - public Builder internalSetThresholdDistance(int thresholdDistance) { - if (thresholdDistance > 100) { - thresholdDistance = 100; - } - this.thresholdDistance = thresholdDistance; - return this; - } - - /** - * Builds and returns a new locale matcher. - * This builder can continue to be used. - * - * @return new XLocaleMatcher. - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public XLocaleMatcher build() { - return new XLocaleMatcher(this); - } - - @Override - public String toString() { - StringBuilder s = new StringBuilder().append("{XLocaleMatcher.Builder"); - if (!supportedLocales.isEmpty()) { - s.append(" supported={").append(supportedLocales.toString()).append('}'); - } - if (defaultLocale != null) { - s.append(" default=").append(defaultLocale.toString()); - } - if (favor != null) { - s.append(" distance=").append(favor.toString()); - } - if (thresholdDistance >= 0) { - s.append(String.format(" threshold=%d", thresholdDistance)); - } - if (demotion != null) { - s.append(" demotion=").append(demotion.toString()); - } - return s.append('}').toString(); - } - } - - /** - * Returns a builder used in chaining parameters for building a LocaleMatcher. - * - * @return a new Builder object - * @draft ICU 65 - * @provisional This API might change or be removed in a future release. - */ - public static Builder builder() { - return new Builder(); - } - - /** Convenience method */ - public XLocaleMatcher(String supportedLocales) { - this(builder().setSupportedLocales(supportedLocales)); - } - /** Convenience method */ - public XLocaleMatcher(LocalePriorityList supportedLocales) { - this(builder().setSupportedULocales(supportedLocales.getULocales())); - } - - private XLocaleMatcher(Builder builder) { - thresholdDistance = builder.thresholdDistance < 0 ? - LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance; - // Store the supported locales in input order, - // so that when different types are used (e.g., java.util.Locale) - // we can return those by parallel index. - int supportedLocalesLength = builder.supportedLocales.size(); - supportedULocales = new ULocale[supportedLocalesLength]; - supportedLocales = new Locale[supportedLocalesLength]; - // Supported LRSs in input order. - LSR lsrs[] = new LSR[supportedLocalesLength]; - // Also find the first supported locale whose LSR is - // the same as that for the default locale. - ULocale udef = builder.defaultLocale; - Locale def = null; - LSR defLSR = null; - int idef = -1; - if (udef != null) { - def = udef.toLocale(); - defLSR = getMaximalLsrOrUnd(udef); - } - int i = 0; - for (ULocale locale : builder.supportedLocales) { - supportedULocales[i] = locale; - supportedLocales[i] = locale.toLocale(); - LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale); - if (idef < 0 && defLSR != null && lsr.equals(defLSR)) { - idef = i; - } - ++i; - } - - // We need an unordered map from LSR to first supported locale with that LSR, - // and an ordered list of (LSR, Indexes). - // We use a LinkedHashMap for both, - // and insert the supported locales in the following order: - // 1. Default locale, if it is supported. - // 2. Priority locales in builder order. - // 3. Remaining locales in builder order. - supportedLsrToIndex = new LinkedHashMap<>(supportedLocalesLength); - Map otherLsrToIndex = null; - if (idef >= 0) { - supportedLsrToIndex.put(defLSR, idef); - } - i = 0; - for (ULocale locale : supportedULocales) { - if (i == idef) { continue; } - LSR lsr = lsrs[i]; - if (defLSR == null) { - assert i == 0; - udef = locale; - def = supportedLocales[0]; - defLSR = lsr; - idef = 0; - supportedLsrToIndex.put(lsr, 0); - } else if (lsr.equals(defLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) { - putIfAbsent(supportedLsrToIndex, lsr, i); - } else { - if (otherLsrToIndex == null) { - otherLsrToIndex = new LinkedHashMap<>(supportedLocalesLength); - } - putIfAbsent(otherLsrToIndex, lsr, i); - } - ++i; - } - if (otherLsrToIndex != null) { - supportedLsrToIndex.putAll(otherLsrToIndex); - } - int numSuppLsrs = supportedLsrToIndex.size(); - supportedLsrs = new LSR[numSuppLsrs]; - supportedIndexes = new int[numSuppLsrs]; - i = 0; - for (Map.Entry entry : supportedLsrToIndex.entrySet()) { - supportedLsrs[i] = entry.getKey(); // = lsrs[entry.getValue()] - supportedIndexes[i++] = entry.getValue(); - } - - defaultULocale = udef; - defaultLocale = def; - defaultLocaleIndex = idef; - demotionPerDesiredLocale = - builder.demotion == Demotion.NONE ? 0 : - LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale(); // null or REGION - favorSubtag = builder.favor; - } - - private static final void putIfAbsent(Map lsrToIndex, LSR lsr, int i) { - Integer index = lsrToIndex.get(lsr); - if (index == null) { - lsrToIndex.put(lsr, i); - } - } - - private static final LSR getMaximalLsrOrUnd(ULocale locale) { - if (locale.equals(UND_ULOCALE)) { - return UND_LSR; - } else { - return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale); - } - } - - private static final LSR getMaximalLsrOrUnd(Locale locale) { - if (locale.equals(UND_LOCALE)) { - return UND_LSR; - } else { - return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale); - } - } - - private static final class ULocaleLsrIterator extends LsrIterator { - private Iterator locales; - private ULocale current, remembered; - - ULocaleLsrIterator(Iterator locales) { - this.locales = locales; - } - - @Override - public boolean hasNext() { - return locales.hasNext(); - } - - @Override - public LSR next() { - current = locales.next(); - return getMaximalLsrOrUnd(current); - } - - @Override - public void rememberCurrent(int desiredIndex) { - bestDesiredIndex = desiredIndex; - remembered = current; - } - } - - private static final class LocaleLsrIterator extends LsrIterator { - private Iterator locales; - private Locale current, remembered; - - LocaleLsrIterator(Iterator locales) { - this.locales = locales; - } - - @Override - public boolean hasNext() { - return locales.hasNext(); - } - - @Override - public LSR next() { - current = locales.next(); - return getMaximalLsrOrUnd(current); - } - - @Override - public void rememberCurrent(int desiredIndex) { - bestDesiredIndex = desiredIndex; - remembered = current; - } - } - - public ULocale getBestMatch(ULocale desiredLocale) { - LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale); - int suppIndex = getBestSuppIndex(desiredLSR, null); - return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale; - } - - public ULocale getBestMatch(Iterable desiredLocales) { - Iterator desiredIter = desiredLocales.iterator(); - if (!desiredIter.hasNext()) { - return defaultULocale; - } - ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter); - LSR desiredLSR = lsrIter.next(); - int suppIndex = getBestSuppIndex(desiredLSR, lsrIter); - return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale; - } - - public ULocale getBestMatch(String desiredLocaleList) { - return getBestMatch(LocalePriorityList.add(desiredLocaleList).build()); - } - - public Locale getBestLocale(Locale desiredLocale) { - LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale); - int suppIndex = getBestSuppIndex(desiredLSR, null); - return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale; - } - - public Locale getBestLocale(Iterable desiredLocales) { - Iterator desiredIter = desiredLocales.iterator(); - if (!desiredIter.hasNext()) { - return defaultLocale; - } - LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter); - LSR desiredLSR = lsrIter.next(); - int suppIndex = getBestSuppIndex(desiredLSR, lsrIter); - return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale; - } - - private Result makeResult(ULocale desiredLocale, ULocaleLsrIterator lsrIter, int suppIndex) { - if (suppIndex < 0) { - return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex); - } else if (desiredLocale != null) { - return new Result(desiredLocale, supportedULocales[suppIndex], - null, supportedLocales[suppIndex], 0, suppIndex); - } else { - return new Result(lsrIter.remembered, supportedULocales[suppIndex], - null, supportedLocales[suppIndex], lsrIter.bestDesiredIndex, suppIndex); - } - } - - private Result makeResult(Locale desiredLocale, LocaleLsrIterator lsrIter, int suppIndex) { - if (suppIndex < 0) { - return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex); - } else if (desiredLocale != null) { - return new Result(null, supportedULocales[suppIndex], - desiredLocale, supportedLocales[suppIndex], 0, suppIndex); - } else { - return new Result(null, supportedULocales[suppIndex], - lsrIter.remembered, supportedLocales[suppIndex], - lsrIter.bestDesiredIndex, suppIndex); - } - } - - public Result getBestMatchResult(ULocale desiredLocale) { - LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale); - int suppIndex = getBestSuppIndex(desiredLSR, null); - return makeResult(desiredLocale, null, suppIndex); - } - - /** - * Returns the best match between the desired and supported locales. - * - * @param desiredLocales Typically a user's languages, in order of preference (descending). - * @return the best-matching pair of a desired and a supported locale. - */ - public Result getBestMatchResult(Iterable desiredLocales) { - Iterator desiredIter = desiredLocales.iterator(); - if (!desiredIter.hasNext()) { - return makeResult(UND_ULOCALE, null, -1); - } - ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter); - LSR desiredLSR = lsrIter.next(); - int suppIndex = getBestSuppIndex(desiredLSR, lsrIter); - return makeResult(null, lsrIter, suppIndex); - } - - public Result getBestLocaleResult(Locale desiredLocale) { - LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale); - int suppIndex = getBestSuppIndex(desiredLSR, null); - return makeResult(desiredLocale, null, suppIndex); - } - - public Result getBestLocaleResult(Iterable desiredLocales) { - Iterator desiredIter = desiredLocales.iterator(); - if (!desiredIter.hasNext()) { - return makeResult(UND_LOCALE, null, -1); - } - LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter); - LSR desiredLSR = lsrIter.next(); - int suppIndex = getBestSuppIndex(desiredLSR, lsrIter); - return makeResult(null, lsrIter, suppIndex); - } - - /** - * @param desiredLSR The first desired locale's LSR. - * @param remainingIter Remaining desired LSRs, null or empty if none. - * @return the index of the best-matching supported locale, or -1 if there is no good match. - */ - private int getBestSuppIndex(LSR desiredLSR, LsrIterator remainingIter) { - int desiredIndex = 0; - int bestSupportedLsrIndex = -1; - for (int bestDistance = thresholdDistance;;) { - // Quick check for exact maximized LSR. - Integer index = supportedLsrToIndex.get(desiredLSR); - if (index != null) { - int suppIndex = index; - if (TRACE_MATCHER) { - System.err.printf("Returning %s: desiredLSR=supportedLSR\n", - supportedULocales[suppIndex]); - } - if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); } - return suppIndex; - } - int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance( - desiredLSR, supportedLsrs, bestDistance, favorSubtag); - if (bestIndexAndDistance >= 0) { - bestDistance = bestIndexAndDistance & 0xff; - if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); } - bestSupportedLsrIndex = bestIndexAndDistance >> 8; - } - if ((bestDistance -= demotionPerDesiredLocale) <= 0) { - break; - } - if (remainingIter == null || !remainingIter.hasNext()) { - break; - } - desiredLSR = remainingIter.next(); - } - if (bestSupportedLsrIndex < 0) { - if (TRACE_MATCHER) { - System.err.printf("Returning default %s: no good match\n", defaultULocale); - } - return -1; - } - int suppIndex = supportedIndexes[bestSupportedLsrIndex]; - if (TRACE_MATCHER) { - System.err.printf("Returning %s: best matching supported locale\n", - supportedULocales[suppIndex]); - } - return suppIndex; - } - - @Override - public String toString() { - StringBuilder s = new StringBuilder().append("{XLocaleMatcher"); - if (supportedULocales.length > 0) { - s.append(" supported={").append(supportedULocales[0].toString()); - for (int i = 1; i < supportedULocales.length; ++i) { - s.append(", ").append(supportedULocales[i].toString()); - } - s.append('}'); - } - s.append(" default=").append(Objects.toString(defaultULocale)); - if (favorSubtag != null) { - s.append(" distance=").append(favorSubtag.toString()); - } - if (thresholdDistance >= 0) { - s.append(String.format(" threshold=%d", thresholdDistance)); - } - s.append(String.format(" demotion=%d", demotionPerDesiredLocale)); - return s.append('}').toString(); - } - - /** - * Returns a fraction between 0 and 1, where 1 means that the languages are a - * perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0. - *
Note that - * the precise values may change over time; no code should be made dependent - * on the values remaining constant. - * @param desired Desired locale - * @param desiredMax Maximized locale (using likely subtags) - * @param supported Supported locale - * @param supportedMax Maximized locale (using likely subtags) - * @return value between 0 and 1, inclusive. - * @deprecated ICU 65 Build and use a matcher rather than comparing pairs of locales. - */ - @Deprecated - public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) { - // Returns the inverse of the distance: That is, 1-distance(desired, supported). - int distance = LocaleDistance.INSTANCE.getBestIndexAndDistance( - XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired), - new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) }, - thresholdDistance, favorSubtag) & 0xff; - return (100 - distance) / 100.0; - } - - /** - * Canonicalize a locale (language). Note that for now, it is canonicalizing - * according to CLDR conventions (he vs iw, etc), since that is what is needed - * for likelySubtags. - * @param ulocale language/locale code - * @return ULocale with remapped subtags. - * @stable ICU 4.4 - */ - public ULocale canonicalize(ULocale ulocale) { - // TODO - return null; - } -} diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java index c1bf6af2f92..1f3bf81052c 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java @@ -8,43 +8,52 @@ */ package com.ibm.icu.util; -import java.util.HashMap; -import java.util.HashSet; +import java.util.ArrayList; +import java.util.Collection; import java.util.Iterator; import java.util.LinkedHashMap; -import java.util.LinkedHashSet; +import java.util.List; +import java.util.Locale; import java.util.Map; -import java.util.Map.Entry; import java.util.Objects; -import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import com.ibm.icu.impl.ICUData; -import com.ibm.icu.impl.ICUResourceBundle; -import com.ibm.icu.impl.Relation; -import com.ibm.icu.impl.Row; -import com.ibm.icu.impl.Row.R3; -import com.ibm.icu.impl.locale.XLocaleMatcher; -import com.ibm.icu.impl.locale.XLocaleMatcher.Builder; -import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag; + +import com.ibm.icu.impl.locale.LSR; +import com.ibm.icu.impl.locale.LocaleDistance; +import com.ibm.icu.impl.locale.XLikelySubtags; /** - * Provides a way to match the languages (locales) supported by a product to the - * languages (locales) acceptable to a user, and get the best match. For - * example: + * Immutable class that picks the best match between a user's desired locales and + * and application's supported locales. * + *

Example: *

- * LocaleMatcher matcher = new LocaleMatcher("fr, en-GB, en");
- *
- * // afterwards:
- * matcher.getBestMatch("en-US").toLanguageTag() => "en"
+ * LocaleMatcher matcher = LocaleMatcher.builder().setSupportedLocales("fr, en-GB, en").build();
+ * Locale bestSupported = matcher.getBestLocale(Locale.US);  // "en"
  * 
* - * It takes into account when languages are close to one another, such as fil - * and tl, and when language regional variants are close, like en-GB and en-AU. - * It also handles scripts, like zh-Hant vs zh-TW. For examples, see the test - * file. + *

A matcher takes into account when languages are close to one another, + * such as Danish and Norwegian, + * and when regional variants are close, like en-GB and en-AU as opposed to en-US. + * + *

If there are multiple supported locales with the same (language, script, region) + * likely subtags, then the current implementation returns the first of those locales. + * It ignores variant subtags (except for pseudolocale variants) and extensions. + * This may change in future versions. + * + *

For example, the current implementation does not distinguish between + * de, de-DE, de-Latn, de-1901, de-u-co-phonebk. + * + *

If you prefer one equivalent locale over another, then provide only the preferred one, + * or place it earlier in the list of supported locales. + * + *

Otherwise, the order of supported locales may have no effect on the best-match results. + * The current implementation compares each desired locale with supported locales + * in the following order: + * 1. Default locale, if supported; + * 2. CLDR "paradigm locales" like en-GB and es-419; + * 3. other supported locales. + * This may change in future versions. + * *

All classes implementing this interface should be immutable. Often a * product will just need one static instance, built with the languages * that it supports. However, it may want multiple instances with different @@ -54,880 +63,958 @@ import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag; * @stable ICU 4.4 */ public class LocaleMatcher { + private static final LSR UND_LSR = new LSR("und","",""); + private static final ULocale UND_ULOCALE = new ULocale("und"); + private static final Locale UND_LOCALE = new Locale("und"); - /** - * @internal - * @deprecated This API is ICU internal only. - */ - @Deprecated - public static final boolean DEBUG = false; - - private static final ULocale UNKNOWN_LOCALE = new ULocale("und"); - - /** - * Threshold for falling back to the default (first) language. May make this - * a parameter in the future. - */ - private static final double DEFAULT_THRESHOLD = 0.5; + // Activates debugging output to stderr with details of GetBestMatch. + private static final boolean TRACE_MATCHER = false; - /** - * The default language, in case the threshold is not met. - */ - private final ULocale defaultLanguage; + private static abstract class LsrIterator implements Iterator { + int bestDesiredIndex = -1; - /** - * The default language, in case the threshold is not met. - */ - private final double threshold; + @Override + public void remove() { + throw new UnsupportedOperationException(); + } - /** - * Create a new language matcher. The highest-weighted language is the - * default. That means that if no other language is matches closer than a given - * threshold, that default language is chosen. Typically the default is English, - * but it could be different based on additional information, such as the domain - * of the page. - * - * @param languagePriorityList weighted list - * @stable ICU 4.4 - */ - public LocaleMatcher(LocalePriorityList languagePriorityList) { - this(languagePriorityList, defaultWritten); + public abstract void rememberCurrent(int desiredIndex); } /** - * Create a new language matcher from a String form. The highest-weighted - * language is the default. + * Builder option for whether the language subtag or the script subtag is most important. * - * @param languagePriorityListString String form of LanguagePriorityList - * @stable ICU 4.4 - */ - public LocaleMatcher(String languagePriorityListString) { - this(LocalePriorityList.add(languagePriorityListString).build()); - } - - /** - * Internal testing function; may expose API later. - * @param languagePriorityList LocalePriorityList to match - * @param matcherData Internal matching data - * @internal - * @deprecated This API is ICU internal only. - */ - @Deprecated - public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData) { - this(languagePriorityList, matcherData, DEFAULT_THRESHOLD); - } - - /** - * Internal testing function; may expose API later. - * @param languagePriorityList LocalePriorityList to match - * @param matcherData Internal matching data - * @internal - * @deprecated This API is ICU internal only. + * @see Builder#setFavorSubtag(FavorSubtag) + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. */ - @Deprecated - public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold) { - this.matcherData = matcherData == null ? defaultWritten : matcherData.freeze(); - this.languagePriorityList = languagePriorityList; - for (final ULocale language : languagePriorityList) { - add(language, languagePriorityList.getWeight(language)); - } - processMapping(); - Iterator it = languagePriorityList.iterator(); - defaultLanguage = it.hasNext() ? it.next() : null; - this.threshold = threshold; - } - - - /** - * Returns a fraction between 0 and 1, where 1 means that the languages are a - * perfect match, and 0 means that they are completely different. Note that - * the precise values may change over time; no code should be made dependent - * on the values remaining constant. - * @param desired Desired locale - * @param desiredMax Maximized locale (using likely subtags) - * @param supported Supported locale - * @param supportedMax Maximized locale (using likely subtags) - * @return value between 0 and 1, inclusive. - * @stable ICU 4.4 - */ - public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) { - return matcherData.match(desired, desiredMax, supported, supportedMax); + public enum FavorSubtag { + /** + * Language differences are most important, then script differences, then region differences. + * (This is the default behavior.) + * + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + LANGUAGE, + /** + * Makes script differences matter relatively more than language differences. + * + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + SCRIPT } - /** - * Canonicalize a locale (language). Note that for now, it is canonicalizing - * according to CLDR conventions (he vs iw, etc), since that is what is needed - * for likelySubtags. - * @param ulocale language/locale code - * @return ULocale with remapped subtags. - * @stable ICU 4.4 + * Builder option for whether all desired locales are treated equally or + * earlier ones are preferred. + * + * @see Builder#setDemotionPerDesiredLocale(Demotion) + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. */ - public ULocale canonicalize(ULocale ulocale) { - // TODO Get the data from CLDR, use Java conventions. - String lang = ulocale.getLanguage(); - String lang2 = canonicalMap.get(lang); - String script = ulocale.getScript(); - String script2 = canonicalMap.get(script); - String region = ulocale.getCountry(); - String region2 = canonicalMap.get(region); - if (lang2 != null || script2 != null || region2 != null) { - return new ULocale( - lang2 == null ? lang : lang2, - script2 == null ? script : script2, - region2 == null ? region : region2 - ); - } - return ulocale; + public enum Demotion { + /** + * All desired locales are treated equally. + * + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + NONE, + /** + * Earlier desired locales are preferred. + * + *

From each desired locale to the next, + * the distance to any supported locale is increased by an additional amount + * which is at least as large as most region mismatches. + * A later desired locale has to have a better match with some supported locale + * due to more than merely having the same region subtag. + * + *

For example: Supported={en, sv} desired=[en-GB, sv] + * yields Result(en-GB, en) because + * with the demotion of sv its perfect match is no better than + * the region distance between the earlier desired locale en-GB and en=en-US. + * + *

Notes: + *

    + *
  • In some cases, language and/or script differences can be as small as + * the typical region difference. (Example: sr-Latn vs. sr-Cyrl) + *
  • It is possible for certain region differences to be larger than usual, + * and larger than the demotion. + * (As of CLDR 35 there is no such case, but + * this is possible in future versions of the data.) + *
+ * + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + REGION } /** - * Get the best match for a LanguagePriorityList + * Data for the best-matching pair of a desired and a supported locale. * - * @param languageList list to match - * @return best matching language code - * @stable ICU 4.4 + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. */ - public ULocale getBestMatch(LocalePriorityList languageList) { - double bestWeight = 0; - ULocale bestTableMatch = null; - double penalty = 0; - OutputDouble matchWeight = new OutputDouble(); - for (final ULocale language : languageList) { - final ULocale matchLocale = getBestMatchInternal(language, matchWeight); - final double weight = matchWeight.value * languageList.getWeight(language) - penalty; - if (weight > bestWeight) { - bestWeight = weight; - bestTableMatch = matchLocale; - } - penalty += 0.07000001; - } - if (bestWeight < threshold) { - bestTableMatch = defaultLanguage; + public static final class Result { + private final ULocale desiredULocale; + private final ULocale supportedULocale; + private final Locale desiredLocale; + private final Locale supportedLocale; + private final int desiredIndex; + private final int supportedIndex; + + private Result(ULocale udesired, ULocale usupported, + Locale desired, Locale supported, + int desIndex, int suppIndex) { + desiredULocale = udesired; + supportedULocale = usupported; + desiredLocale = desired; + supportedLocale = supported; + desiredIndex = desIndex; + supportedIndex = suppIndex; } - return bestTableMatch; - } - /** - * Convenience method: Get the best match for a LanguagePriorityList - * - * @param languageList String form of language priority list - * @return best matching language code - * @stable ICU 4.4 - */ - public ULocale getBestMatch(String languageList) { - return getBestMatch(LocalePriorityList.add(languageList).build()); - } + /** + * Returns the best-matching desired locale. + * null if the list of desired locales is empty or if none matched well enough. + * + * @return the best-matching desired locale, or null. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public ULocale getDesiredULocale() { + return desiredULocale == null && desiredLocale != null ? + ULocale.forLocale(desiredLocale) : desiredULocale; + } + /** + * Returns the best-matching desired locale. + * null if the list of desired locales is empty or if none matched well enough. + * + * @return the best-matching desired locale, or null. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Locale getDesiredLocale() { + return desiredLocale == null && desiredULocale != null ? + desiredULocale.toLocale() : desiredLocale; + } - /** - * Get the best match for an individual language code. - * - * @param ulocale locale/language code to match - * @return best matching language code - * @stable ICU 4.4 - */ - public ULocale getBestMatch(ULocale ulocale) { - return getBestMatchInternal(ulocale, null); - } + /** + * Returns the best-matching supported locale. + * If none matched well enough, this is the default locale. + * The default locale is null if the list of supported locales is empty and + * no explicit default locale is set. + * + * @return the best-matching supported locale, or null. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public ULocale getSupportedULocale() { return supportedULocale; } + /** + * Returns the best-matching supported locale. + * If none matched well enough, this is the default locale. + * The default locale is null if the list of supported locales is empty and + * no explicit default locale is set. + * + * @return the best-matching supported locale, or null. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Locale getSupportedLocale() { return supportedLocale; } - /** - * @internal - * @deprecated This API is ICU internal only. - */ - @Deprecated - public ULocale getBestMatch(ULocale... ulocales) { - return getBestMatch(LocalePriorityList.add(ulocales).build()); - } + /** + * Returns the index of the best-matching desired locale in the input Iterable order. + * -1 if the list of desired locales is empty or if none matched well enough. + * + * @return the index of the best-matching desired locale, or -1. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public int getDesiredIndex() { return desiredIndex; } - /** - * {@inheritDoc} - * @stable ICU 4.4 - */ - @Override - public String toString() { - return "{" + defaultLanguage + ", " - + localeToMaxLocaleAndWeight + "}"; - } - // ================= Privates ===================== + /** + * Returns the index of the best-matching supported locale in the + * constructor’s or builder’s input order (“set” Collection plus “added” locales). + * If the matcher was built from a locale list string, then the iteration order is that + * of a LocalePriorityList built from the same string. + * -1 if the list of supported locales is empty or if none matched well enough. + * + * @return the index of the best-matching supported locale, or -1. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public int getSupportedIndex() { return supportedIndex; } - /** - * Get the best match for an individual language code. - * - * @param languageCode - * @return best matching language code and weight (as per - * {@link #match(ULocale, ULocale)}) - */ - private ULocale getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight) { - languageCode = canonicalize(languageCode); - final ULocale maximized = addLikelySubtags(languageCode); - if (DEBUG) { - System.out.println("\ngetBestMatchInternal: " + languageCode + ";\t" + maximized); - } - double bestWeight = 0; - ULocale bestTableMatch = null; - String baseLanguage = maximized.getLanguage(); - Set> searchTable = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(baseLanguage); - if (searchTable != null) { // we preprocessed the table so as to filter by language - if (DEBUG) System.out.println("\tSearching: " + searchTable); - for (final R3 tableKeyValue : searchTable) { - ULocale tableKey = tableKeyValue.get0(); - ULocale maxLocale = tableKeyValue.get1(); - Double matchedWeight = tableKeyValue.get2(); - final double match = match(languageCode, maximized, tableKey, maxLocale); - if (DEBUG) { - System.out.println("\t" + tableKeyValue + ";\t" + match + "\n"); - } - final double weight = match * matchedWeight; - if (weight > bestWeight) { - bestWeight = weight; - bestTableMatch = tableKey; - if (weight > 0.999d) { // bail on good enough match. - break; - } + /** + * Takes the best-matching supported locale and adds relevant fields of the + * best-matching desired locale, such as the -t- and -u- extensions. + * May replace some fields of the supported locale. + * The result is the locale that should be used for date and number formatting, collation, etc. + * + *

Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn + * + * @return the service locale, combining the best-matching desired and supported locales. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public ULocale makeServiceULocale() { + ULocale bestDesired = getDesiredULocale(); + ULocale serviceLocale = supportedULocale; + if (!serviceLocale.equals(bestDesired) && bestDesired != null) { + ULocale.Builder b = new ULocale.Builder().setLocale(serviceLocale); + + // Copy the region from bestDesired, if there is one. + String region = bestDesired.getCountry(); + if (!region.isEmpty()) { + b.setRegion(region); } - } - } - if (bestWeight < threshold) { - bestTableMatch = defaultLanguage; - } - if (outputWeight != null) { - outputWeight.value = bestWeight; // only return the weight when needed - } - return bestTableMatch; - } - /** - * @internal - * @deprecated This API is ICU internal only. - */ - @Deprecated - private static class OutputDouble { // TODO, move to where OutputInt is - double value; - } - - private void add(ULocale language, Double weight) { - language = canonicalize(language); - R3 row = Row.of(language, addLikelySubtags(language), weight); - row.freeze(); - localeToMaxLocaleAndWeight.add(row); - } + // Copy the variants from bestDesired, if there are any. + // Note that this will override any serviceLocale variants. + // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster). + String variants = bestDesired.getVariant(); + if (!variants.isEmpty()) { + b.setVariant(variants); + } - /** - * We preprocess the data to get just the possible matches for each desired base language. - */ - private void processMapping() { - for (Entry> desiredToMatchingLanguages : matcherData.matchingLanguages().keyValuesSet()) { - String desired = desiredToMatchingLanguages.getKey(); - Set supported = desiredToMatchingLanguages.getValue(); - for (R3 localeToMaxAndWeight : localeToMaxLocaleAndWeight) { - final ULocale key = localeToMaxAndWeight.get0(); - String lang = key.getLanguage(); - if (supported.contains(lang)) { - addFiltered(desired, localeToMaxAndWeight); + // Copy the extensions from bestDesired, if there are any. + // Note that this will override any serviceLocale extensions. + // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native" + // (replacing calendar). + for (char extensionKey : bestDesired.getExtensionKeys()) { + b.setExtension(extensionKey, bestDesired.getExtension(extensionKey)); } + serviceLocale = b.build(); } + return serviceLocale; } - // now put in the values directly, since languages always map to themselves - for (R3 localeToMaxAndWeight : localeToMaxLocaleAndWeight) { - final ULocale key = localeToMaxAndWeight.get0(); - String lang = key.getLanguage(); - addFiltered(lang, localeToMaxAndWeight); - } - } - private void addFiltered(String desired, R3 localeToMaxAndWeight) { - Set> map = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(desired); - if (map == null) { - desiredLanguageToPossibleLocalesToMaxLocaleToData.put(desired, map = new LinkedHashSet<>()); - } - map.add(localeToMaxAndWeight); - if (DEBUG) { - System.out.println(desired + ", " + localeToMaxAndWeight); + /** + * Takes the best-matching supported locale and adds relevant fields of the + * best-matching desired locale, such as the -t- and -u- extensions. + * May replace some fields of the supported locale. + * The result is the locale that should be used for + * date and number formatting, collation, etc. + * + *

Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn + * + * @return the service locale, combining the best-matching desired and supported locales. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Locale makeServiceLocale() { + return makeServiceULocale().toLocale(); } } - Set> localeToMaxLocaleAndWeight = new LinkedHashSet<>(); - Map>> desiredLanguageToPossibleLocalesToMaxLocaleToData - = new LinkedHashMap<>(); - - // =============== Special Mapping Information ============== + private final int thresholdDistance; + private final int demotionPerDesiredLocale; + private final FavorSubtag favorSubtag; + + // These are in input order. + private final ULocale[] supportedULocales; + private final Locale[] supportedLocales; + // These are in preference order: 1. Default locale 2. paradigm locales 3. others. + private final Map supportedLsrToIndex; + // Array versions of the supportedLsrToIndex keys and values. + // The distance lookup loops over the supportedLsrs and returns the index of the best match. + private final LSR[] supportedLsrs; + private final int[] supportedIndexes; + private final ULocale defaultULocale; + private final Locale defaultLocale; + private final int defaultLocaleIndex; /** - * We need to add another method to addLikelySubtags that doesn't return - * null, but instead substitutes Zzzz and ZZ if unknown. There are also - * a few cases where addLikelySubtags needs to have expanded data, to handle - * all deprecated codes. - * @param languageCode - * @return "fixed" addLikelySubtags + * LocaleMatcher Builder. + * + * @see LocaleMatcher#builder() + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. */ - private ULocale addLikelySubtags(ULocale languageCode) { - // max("und") = "en_Latn_US", and since matching is based on maximized tags, the undefined - // language would normally match English. But that would produce the counterintuitive results - // that getBestMatch("und", LocaleMatcher("it,en")) would be "en", and - // getBestMatch("en", LocaleMatcher("it,und")) would be "und". - // - // To avoid that, we change the matcher's definitions of max (AddLikelySubtagsWithDefaults) - // so that max("und")="und". That produces the following, more desirable results: - if (languageCode.equals(UNKNOWN_LOCALE)) { - return UNKNOWN_LOCALE; - } - final ULocale result = ULocale.addLikelySubtags(languageCode); - // should have method on getLikelySubtags for this - if (result == null || result.equals(languageCode)) { - final String language = languageCode.getLanguage(); - final String script = languageCode.getScript(); - final String region = languageCode.getCountry(); - return new ULocale((language.length()==0 ? "und" - : language) - + "_" - + (script.length()==0 ? "Zzzz" : script) - + "_" - + (region.length()==0 ? "ZZ" : region)); - } - return result; - } + public static class Builder { + private List supportedLocales; + private int thresholdDistance = -1; + private Demotion demotion; + private ULocale defaultLocale; + private FavorSubtag favor; - private static class LocalePatternMatcher { - // a value of null means a wildcard; matches any. - private String lang; - private String script; - private String region; - private Level level; - static Pattern pattern = Pattern.compile( - "([a-z]{1,8}|\\*)" - + "(?:[_-]([A-Z][a-z]{3}|\\*))?" - + "(?:[_-]([A-Z]{2}|[0-9]{3}|\\*))?"); - - public LocalePatternMatcher(String toMatch) { - Matcher matcher = pattern.matcher(toMatch); - if (!matcher.matches()) { - throw new IllegalArgumentException("Bad pattern: " + toMatch); - } - lang = matcher.group(1); - script = matcher.group(2); - region = matcher.group(3); - level = region != null ? Level.region : script != null ? Level.script : Level.language; + private Builder() {} - if (lang.equals("*")) { - lang = null; - } - if (script != null && script.equals("*")) { - script = null; - } - if (region != null && region.equals("*")) { - region = null; - } - } - - boolean matches(ULocale ulocale) { - if (lang != null && !lang.equals(ulocale.getLanguage())) { - return false; - } - if (script != null && !script.equals(ulocale.getScript())) { - return false; - } - if (region != null && !region.equals(ulocale.getCountry())) { - return false; - } - return true; + /** + * Parses the string like {@link LocalePriorityList} does and + * sets the supported locales accordingly. + * Clears any previously set/added supported locales first. + * + * @param locales the string of locales to set, to be parsed like LocalePriorityList does + * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Builder setSupportedLocales(String locales) { + return setSupportedULocales(LocalePriorityList.add(locales).build().getULocales()); } - public Level getLevel() { - return level; + /** + * Copies the supported locales, preserving iteration order. + * Clears any previously set/added supported locales first. + * Duplicates are allowed, and are not removed. + * + * @param locales the list of locales + * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Builder setSupportedULocales(Collection locales) { + supportedLocales = new ArrayList<>(locales); + return this; } - public String getLanguage() { - return (lang == null ? "*" : lang); + /** + * Copies the supported locales, preserving iteration order. + * Clears any previously set/added supported locales first. + * Duplicates are allowed, and are not removed. + * + * @param locales the list of locale + * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Builder setSupportedLocales(Collection locales) { + supportedLocales = new ArrayList<>(locales.size()); + for (Locale locale : locales) { + supportedLocales.add(ULocale.forLocale(locale)); + } + return this; } - public String getScript() { - return (script == null ? "*" : script); + /** + * Adds another supported locale. + * Duplicates are allowed, and are not removed. + * + * @param locale the list of locale + * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Builder addSupportedULocale(ULocale locale) { + if (supportedLocales == null) { + supportedLocales = new ArrayList<>(); + } + supportedLocales.add(locale); + return this; } - public String getRegion() { - return (region == null ? "*" : region); + /** + * Adds another supported locale. + * Duplicates are allowed, and are not removed. + * + * @param locale the list of locale + * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Builder addSupportedLocale(Locale locale) { + return addSupportedULocale(ULocale.forLocale(locale)); } - @Override - public String toString() { - String result = getLanguage(); - if (level != Level.language) { - result += "-" + getScript(); - if (level != Level.script) { - result += "-" + getRegion(); - } - } - return result; + /** + * Sets the default locale; if null, or if it is not set explicitly, + * then the first supported locale is used as the default locale. + * + * @param defaultLocale the default locale + * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Builder setDefaultULocale(ULocale defaultLocale) { + this.defaultLocale = defaultLocale; + return this; } - /* (non-Javadoc) - * @see java.lang.Object#equals(java.lang.Object) + /** + * Sets the default locale; if null, or if it is not set explicitly, + * then the first supported locale is used as the default locale. + * + * @param defaultLocale the default locale + * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. */ - @Override - public boolean equals(Object obj) { - if (obj == this) { - return true; - } - if (obj == null || !(obj instanceof LocalePatternMatcher)) { - return false; - } - LocalePatternMatcher other = (LocalePatternMatcher) obj; - return Objects.equals(level, other.level) - && Objects.equals(lang, other.lang) - && Objects.equals(script, other.script) - && Objects.equals(region, other.region); + public Builder setDefaultLocale(Locale defaultLocale) { + this.defaultLocale = ULocale.forLocale(defaultLocale); + return this; } - /* (non-Javadoc) - * @see java.lang.Object#hashCode() + /** + * If SCRIPT, then the language differences are smaller than script differences. + * This is used in situations (such as maps) where + * it is better to fall back to the same script than a similar language. + * + * @param subtag the subtag to favor + * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. */ - @Override - public int hashCode() { - return level.ordinal() - ^ (lang == null ? 0 : lang.hashCode()) - ^ (script == null ? 0 : script.hashCode()) - ^ (region == null ? 0 : region.hashCode()); + public Builder setFavorSubtag(FavorSubtag subtag) { + this.favor = subtag; + return this; } - } - enum Level { - language(0.99), - script(0.2), - region(0.04); - - final double worst; - - Level(double d) { - worst = d; + /** + * Option for whether all desired locales are treated equally or + * earlier ones are preferred (this is the default). + * + * @param demotion the demotion per desired locale to set. + * @return this Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Builder setDemotionPerDesiredLocale(Demotion demotion) { + this.demotion = demotion; + return this; } - } - private static class ScoreData implements Freezable { - @SuppressWarnings("unused") - private static final double maxUnequal_changeD_sameS = 0.5; - - @SuppressWarnings("unused") - private static final double maxUnequal_changeEqual = 0.75; - - LinkedHashSet> scores = new LinkedHashSet<>(); - final Level level; - - public ScoreData(Level level) { - this.level = level; - } - - void addDataToScores(String desired, String supported, R3 data) { - // Map>> lang_result = scores.get(desired); - // if (lang_result == null) { - // scores.put(desired, lang_result = new HashMap()); - // } - // Set> result = lang_result.get(supported); - // if (result == null) { - // lang_result.put(supported, result = new LinkedHashSet()); - // } - // result.add(data); - boolean added = scores.add(data); - if (!added) { - throw new ICUException("trying to add duplicate data: " + data); + /** + * Internal only! + * + * @param thresholdDistance the thresholdDistance to set, with -1 = default + * @return this Builder object + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + public Builder internalSetThresholdDistance(int thresholdDistance) { + if (thresholdDistance > 100) { + thresholdDistance = 100; } + this.thresholdDistance = thresholdDistance; + return this; } - double getScore(ULocale dMax, String desiredRaw, String desiredMax, - ULocale sMax, String supportedRaw, String supportedMax) { - double distance = 0; - if (!desiredMax.equals(supportedMax)) { - distance = getRawScore(dMax, sMax); - } else if (!desiredRaw.equals(supportedRaw)) { // maxes are equal, changes are equal - distance += 0.001; - } - return distance; + /** + * Builds and returns a new locale matcher. + * This builder can continue to be used. + * + * @return new LocaleMatcher. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public LocaleMatcher build() { + return new LocaleMatcher(this); } - private double getRawScore(ULocale desiredLocale, ULocale supportedLocale) { - if (DEBUG) { - System.out.println("\t\t\t" + level + " Raw Score:\t" + desiredLocale + ";\t" + supportedLocale); + /** + * {@inheritDoc} + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + @Override + public String toString() { + StringBuilder s = new StringBuilder().append("{LocaleMatcher.Builder"); + if (!supportedLocales.isEmpty()) { + s.append(" supported={").append(supportedLocales.toString()).append('}'); } - for (R3 datum : scores) { // : result - if (datum.get0().matches(desiredLocale) - && datum.get1().matches(supportedLocale)) { - if (DEBUG) { - System.out.println("\t\t\t\tFOUND\t" + datum); - } - return datum.get2(); - } + if (defaultLocale != null) { + s.append(" default=").append(defaultLocale.toString()); } - if (DEBUG) { - System.out.println("\t\t\t\tNOTFOUND\t" + level.worst); + if (favor != null) { + s.append(" distance=").append(favor.toString()); } - return level.worst; - } - - @Override - public String toString() { - StringBuilder result = new StringBuilder().append(level); - for (R3 score : scores) { - result.append("\n\t\t").append(score); + if (thresholdDistance >= 0) { + s.append(String.format(" threshold=%d", thresholdDistance)); + } + if (demotion != null) { + s.append(" demotion=").append(demotion.toString()); } - return result.toString(); + return s.append('}').toString(); } + } + /** + * Returns a builder used in chaining parameters for building a LocaleMatcher. + * + * @return a new Builder object + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public static Builder builder() { + return new Builder(); + } - @Override - @SuppressWarnings("unchecked") - public ScoreData cloneAsThawed() { - try { - ScoreData result = (ScoreData) clone(); - result.scores = (LinkedHashSet>) result.scores.clone(); - result.frozen = false; - return result; - } catch (CloneNotSupportedException e) { - throw new ICUCloneNotSupportedException(e); // will never happen - } + /** + * Copies the supported locales, preserving iteration order, and constructs a LocaleMatcher. + * The first locale is used as the default locale for when there is no good match. + * + * @param supportedLocales list of locales + * @stable ICU 4.4 + */ + public LocaleMatcher(LocalePriorityList supportedLocales) { + this(builder().setSupportedULocales(supportedLocales.getULocales())); + } - } + /** + * Parses the string like {@link LocalePriorityList} does and + * constructs a LocaleMatcher for the supported locales parsed from the string. + * The first one (in LocalePriorityList iteration order) is used as the default locale for + * when there is no good match. + * + * @param supportedLocales the string of locales to set, + * to be parsed like LocalePriorityList does + * @stable ICU 4.4 + */ + public LocaleMatcher(String supportedLocales) { + this(builder().setSupportedLocales(supportedLocales)); + } - private volatile boolean frozen = false; + private LocaleMatcher(Builder builder) { + thresholdDistance = builder.thresholdDistance < 0 ? + LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance; + // Store the supported locales in input order, + // so that when different types are used (e.g., java.util.Locale) + // we can return those by parallel index. + int supportedLocalesLength = builder.supportedLocales.size(); + supportedULocales = new ULocale[supportedLocalesLength]; + supportedLocales = new Locale[supportedLocalesLength]; + // Supported LRSs in input order. + LSR lsrs[] = new LSR[supportedLocalesLength]; + // Also find the first supported locale whose LSR is + // the same as that for the default locale. + ULocale udef = builder.defaultLocale; + Locale def = null; + LSR defLSR = null; + int idef = -1; + if (udef != null) { + def = udef.toLocale(); + defLSR = getMaximalLsrOrUnd(udef); + } + int i = 0; + for (ULocale locale : builder.supportedLocales) { + supportedULocales[i] = locale; + supportedLocales[i] = locale.toLocale(); + LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale); + if (idef < 0 && defLSR != null && lsr.equals(defLSR)) { + idef = i; + } + ++i; + } + + // We need an unordered map from LSR to first supported locale with that LSR, + // and an ordered list of (LSR, Indexes). + // We use a LinkedHashMap for both, + // and insert the supported locales in the following order: + // 1. Default locale, if it is supported. + // 2. Priority locales in builder order. + // 3. Remaining locales in builder order. + supportedLsrToIndex = new LinkedHashMap<>(supportedLocalesLength); + Map otherLsrToIndex = null; + if (idef >= 0) { + supportedLsrToIndex.put(defLSR, idef); + } + i = 0; + for (ULocale locale : supportedULocales) { + if (i == idef) { continue; } + LSR lsr = lsrs[i]; + if (defLSR == null) { + assert i == 0; + udef = locale; + def = supportedLocales[0]; + defLSR = lsr; + idef = 0; + supportedLsrToIndex.put(lsr, 0); + } else if (lsr.equals(defLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) { + putIfAbsent(supportedLsrToIndex, lsr, i); + } else { + if (otherLsrToIndex == null) { + otherLsrToIndex = new LinkedHashMap<>(supportedLocalesLength); + } + putIfAbsent(otherLsrToIndex, lsr, i); + } + ++i; + } + if (otherLsrToIndex != null) { + supportedLsrToIndex.putAll(otherLsrToIndex); + } + int numSuppLsrs = supportedLsrToIndex.size(); + supportedLsrs = new LSR[numSuppLsrs]; + supportedIndexes = new int[numSuppLsrs]; + i = 0; + for (Map.Entry entry : supportedLsrToIndex.entrySet()) { + supportedLsrs[i] = entry.getKey(); // = lsrs[entry.getValue()] + supportedIndexes[i++] = entry.getValue(); + } + + defaultULocale = udef; + defaultLocale = def; + defaultLocaleIndex = idef; + demotionPerDesiredLocale = + builder.demotion == Demotion.NONE ? 0 : + LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale(); // null or REGION + favorSubtag = builder.favor; + } - @Override - public ScoreData freeze() { - return this; + private static final void putIfAbsent(Map lsrToIndex, LSR lsr, int i) { + Integer index = lsrToIndex.get(lsr); + if (index == null) { + lsrToIndex.put(lsr, i); } + } - @Override - public boolean isFrozen() { - return frozen; + private static final LSR getMaximalLsrOrUnd(ULocale locale) { + if (locale.equals(UND_ULOCALE)) { + return UND_LSR; + } else { + return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale); } + } - public Relation getMatchingLanguages() { - Relation desiredToSupported = Relation.of(new LinkedHashMap>(), HashSet.class); - for (R3 item : scores) { - LocalePatternMatcher desired = item.get0(); - LocalePatternMatcher supported = item.get1(); - if (desired.lang != null && supported.lang != null) { // explicitly mentioned languages must have reasonable distance - desiredToSupported.put(desired.lang, supported.lang); - } - } - desiredToSupported.freeze(); - return desiredToSupported; + private static final LSR getMaximalLsrOrUnd(Locale locale) { + if (locale.equals(UND_LOCALE)) { + return UND_LSR; + } else { + return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale); } } - /** - * Only for testing and use by tools. Interface may change!! - * @internal - * @deprecated This API is ICU internal only. - */ - @Deprecated - public static class LanguageMatcherData implements Freezable { - private ScoreData languageScores = new ScoreData(Level.language); - private ScoreData scriptScores = new ScoreData(Level.script); - private ScoreData regionScores = new ScoreData(Level.region); - private Relation matchingLanguages; - private volatile boolean frozen = false; - + private static final class ULocaleLsrIterator extends LsrIterator { + private Iterator locales; + private ULocale current, remembered; - /** - * @internal - * @deprecated This API is ICU internal only. - */ - @Deprecated - public LanguageMatcherData() { + ULocaleLsrIterator(Iterator locales) { + this.locales = locales; } - /** - * @internal - * @deprecated This API is ICU internal only. - */ - @Deprecated - public Relation matchingLanguages() { - return matchingLanguages; + @Override + public boolean hasNext() { + return locales.hasNext(); } - /** - * @internal - * @deprecated This API is ICU internal only. - */ @Override - @Deprecated - public String toString() { - return languageScores + "\n\t" + scriptScores + "\n\t" + regionScores; + public LSR next() { + current = locales.next(); + return getMaximalLsrOrUnd(current); } - /** - * @internal - * @deprecated This API is ICU internal only. - */ - @Deprecated - public double match(ULocale a, ULocale aMax, ULocale b, ULocale bMax) { - double diff = 0; - diff += languageScores.getScore(aMax, a.getLanguage(), aMax.getLanguage(), bMax, b.getLanguage(), bMax.getLanguage()); - if (diff > 0.999d) { // with no language match, we bail - return 0.0d; - } - diff += scriptScores.getScore(aMax, a.getScript(), aMax.getScript(), bMax, b.getScript(), bMax.getScript()); - diff += regionScores.getScore(aMax, a.getCountry(), aMax.getCountry(), bMax, b.getCountry(), bMax.getCountry()); - - if (!a.getVariant().equals(b.getVariant())) { - diff += 0.01; - } - if (diff < 0.0d) { - diff = 0.0d; - } else if (diff > 1.0d) { - diff = 1.0d; - } - if (DEBUG) { - System.out.println("\t\t\tTotal Distance\t" + diff); - } - return 1.0 - diff; + @Override + public void rememberCurrent(int desiredIndex) { + bestDesiredIndex = desiredIndex; + remembered = current; } + } - /** - * @internal - * @deprecated This API is ICU internal only. - */ - @Deprecated - public LanguageMatcherData addDistance(String desired, String supported, int percent, String comment) { - return addDistance(desired, supported, percent, false, comment); - } - /** - * @internal - * @deprecated This API is ICU internal only. - */ - @Deprecated - public LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway) { - return addDistance(desired, supported, percent, oneway, null); - } - - private LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway, String comment) { - if (DEBUG) { - System.out.println("\t" - + (comment == null ? "" : "\t")); - // // .addDistance("nn", "nb", 4, true) - // System.out.println(".addDistance(\"" + desired + "\"" + - // ", \"" + supported + "\"" + - // ", " + percent + "" - // + (oneway ? "" : ", true") - // + (comment == null ? "" : ", \"" + comment + "\"") - // + ")" - // ); + private static final class LocaleLsrIterator extends LsrIterator { + private Iterator locales; + private Locale current, remembered; - } - double score = 1-percent/100.0; // convert from percentage - LocalePatternMatcher desiredMatcher = new LocalePatternMatcher(desired); - Level desiredLen = desiredMatcher.getLevel(); - LocalePatternMatcher supportedMatcher = new LocalePatternMatcher(supported); - Level supportedLen = supportedMatcher.getLevel(); - if (desiredLen != supportedLen) { - throw new IllegalArgumentException("Lengths unequal: " + desired + ", " + supported); - } - R3 data = Row.of(desiredMatcher, supportedMatcher, score); - R3 data2 = oneway ? null : Row.of(supportedMatcher, desiredMatcher, score); - boolean desiredEqualsSupported = desiredMatcher.equals(supportedMatcher); - switch (desiredLen) { - case language: - String dlanguage = desiredMatcher.getLanguage(); - String slanguage = supportedMatcher.getLanguage(); - languageScores.addDataToScores(dlanguage, slanguage, data); - if (!oneway && !desiredEqualsSupported) { - languageScores.addDataToScores(slanguage, dlanguage, data2); - } - break; - case script: - String dscript = desiredMatcher.getScript(); - String sscript = supportedMatcher.getScript(); - scriptScores.addDataToScores(dscript, sscript, data); - if (!oneway && !desiredEqualsSupported) { - scriptScores.addDataToScores(sscript, dscript, data2); - } - break; - case region: - String dregion = desiredMatcher.getRegion(); - String sregion = supportedMatcher.getRegion(); - regionScores.addDataToScores(dregion, sregion, data); - if (!oneway && !desiredEqualsSupported) { - regionScores.addDataToScores(sregion, dregion, data2); - } - break; - } - return this; + LocaleLsrIterator(Iterator locales) { + this.locales = locales; } - /** - * {@inheritDoc} - * @internal - * @deprecated This API is ICU internal only. - */ @Override - @Deprecated - public LanguageMatcherData cloneAsThawed() { - LanguageMatcherData result; - try { - result = (LanguageMatcherData) clone(); - result.languageScores = languageScores.cloneAsThawed(); - result.scriptScores = scriptScores.cloneAsThawed(); - result.regionScores = regionScores.cloneAsThawed(); - result.frozen = false; - return result; - } catch (CloneNotSupportedException e) { - throw new ICUCloneNotSupportedException(e); // will never happen - } + public boolean hasNext() { + return locales.hasNext(); } - /** - * {@inheritDoc} - * @internal - * @deprecated This API is ICU internal only. - */ @Override - @Deprecated - public LanguageMatcherData freeze() { - languageScores.freeze(); - regionScores.freeze(); - scriptScores.freeze(); - matchingLanguages = languageScores.getMatchingLanguages(); - frozen = true; - return this; + public LSR next() { + current = locales.next(); + return getMaximalLsrOrUnd(current); } - /** - * {@inheritDoc} - * @internal - * @deprecated This API is ICU internal only. - */ @Override - @Deprecated - public boolean isFrozen() { - return frozen; + public void rememberCurrent(int desiredIndex) { + bestDesiredIndex = desiredIndex; + remembered = current; } } - LanguageMatcherData matcherData; - LocalePriorityList languagePriorityList; + /** + * Returns the supported locale which best matches the desired locale. + * + * @param desiredLocale Typically a user's language. + * @return the best-matching supported locale. + * @stable ICU 4.4 + */ + public ULocale getBestMatch(ULocale desiredLocale) { + LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale); + int suppIndex = getBestSuppIndex(desiredLSR, null); + return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale; + } - private static final LanguageMatcherData defaultWritten; + /** + * Returns the supported locale which best matches one of the desired locales. + * + * @param desiredLocales Typically a user's languages, in order of preference (descending). + * (In ICU 4.4..63 this parameter had type LocalePriorityList.) + * @return the best-matching supported locale. + * @stable ICU 4.4 + */ + public ULocale getBestMatch(Iterable desiredLocales) { + Iterator desiredIter = desiredLocales.iterator(); + if (!desiredIter.hasNext()) { + return defaultULocale; + } + ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter); + LSR desiredLSR = lsrIter.next(); + int suppIndex = getBestSuppIndex(desiredLSR, lsrIter); + return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale; + } - private static HashMap canonicalMap = new HashMap<>(); + /** + * Parses the string like {@link LocalePriorityList} does and + * returns the supported locale which best matches one of the desired locales. + * + * @param desiredLocaleList Typically a user's languages, in order of preference (descending), + * as a string which is to be parsed like LocalePriorityList does. + * @return the best-matching supported locale. + * @stable ICU 4.4 + */ + public ULocale getBestMatch(String desiredLocaleList) { + return getBestMatch(LocalePriorityList.add(desiredLocaleList).build()); + } + /** + * Returns the supported locale which best matches the desired locale. + * + * @param desiredLocale Typically a user's language. + * @return the best-matching supported locale. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Locale getBestLocale(Locale desiredLocale) { + LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale); + int suppIndex = getBestSuppIndex(desiredLSR, null); + return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale; + } - static { - canonicalMap.put("iw", "he"); - canonicalMap.put("mo", "ro"); - canonicalMap.put("tl", "fil"); + /** + * Returns the supported locale which best matches one of the desired locales. + * + * @param desiredLocales Typically a user's languages, in order of preference (descending). + * @return the best-matching supported locale. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Locale getBestLocale(Iterable desiredLocales) { + Iterator desiredIter = desiredLocales.iterator(); + if (!desiredIter.hasNext()) { + return defaultLocale; + } + LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter); + LSR desiredLSR = lsrIter.next(); + int suppIndex = getBestSuppIndex(desiredLSR, lsrIter); + return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale; + } - ICUResourceBundle suppData = getICUSupplementalData(); - ICUResourceBundle languageMatching = suppData.findTopLevel("languageMatching"); - ICUResourceBundle written = (ICUResourceBundle) languageMatching.get("written"); - defaultWritten = new LanguageMatcherData(); + private Result makeResult(ULocale desiredLocale, ULocaleLsrIterator lsrIter, int suppIndex) { + if (suppIndex < 0) { + return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex); + } else if (desiredLocale != null) { + return new Result(desiredLocale, supportedULocales[suppIndex], + null, supportedLocales[suppIndex], 0, suppIndex); + } else { + return new Result(lsrIter.remembered, supportedULocales[suppIndex], + null, supportedLocales[suppIndex], lsrIter.bestDesiredIndex, suppIndex); + } + } - for(UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) { - ICUResourceBundle item = (ICUResourceBundle) iter.next(); - /* - "*_*_*", - "*_*_*", - "96", - */ - // - boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3)); - defaultWritten.addDistance(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2)), oneway); + private Result makeResult(Locale desiredLocale, LocaleLsrIterator lsrIter, int suppIndex) { + if (suppIndex < 0) { + return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex); + } else if (desiredLocale != null) { + return new Result(null, supportedULocales[suppIndex], + desiredLocale, supportedLocales[suppIndex], 0, suppIndex); + } else { + return new Result(null, supportedULocales[suppIndex], + lsrIter.remembered, supportedLocales[suppIndex], + lsrIter.bestDesiredIndex, suppIndex); } - defaultWritten.freeze(); } /** - * @internal - * @deprecated This API is ICU internal only. + * Returns the best match between the desired locale and the supported locales. + * + * @param desiredLocale Typically a user's language. + * @return the best-matching pair of the desired and a supported locale. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Result getBestMatchResult(ULocale desiredLocale) { + LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale); + int suppIndex = getBestSuppIndex(desiredLSR, null); + return makeResult(desiredLocale, null, suppIndex); + } + + /** + * Returns the best match between the desired and supported locales. + * + * @param desiredLocales Typically a user's languages, in order of preference (descending). + * @return the best-matching pair of a desired and a supported locale. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. */ - @Deprecated - public static ICUResourceBundle getICUSupplementalData() { - ICUResourceBundle suppData = (ICUResourceBundle) UResourceBundle.getBundleInstance( - ICUData.ICU_BASE_NAME, - "supplementalData", - ICUResourceBundle.ICU_DATA_CLASS_LOADER); - return suppData; + public Result getBestMatchResult(Iterable desiredLocales) { + Iterator desiredIter = desiredLocales.iterator(); + if (!desiredIter.hasNext()) { + return makeResult(UND_ULOCALE, null, -1); + } + ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter); + LSR desiredLSR = lsrIter.next(); + int suppIndex = getBestSuppIndex(desiredLSR, lsrIter); + return makeResult(null, lsrIter, suppIndex); } /** - * @internal - * @deprecated This API is ICU internal only. + * Returns the best match between the desired locale and the supported locales. + * + * @param desiredLocale Typically a user's language. + * @return the best-matching pair of the desired and a supported locale. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. */ - @Deprecated - public static double match(ULocale a, ULocale b) { - final LocaleMatcher matcher = new LocaleMatcher(""); - return matcher.match(a, matcher.addLikelySubtags(a), b, matcher.addLikelySubtags(b)); + public Result getBestLocaleResult(Locale desiredLocale) { + LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale); + int suppIndex = getBestSuppIndex(desiredLSR, null); + return makeResult(desiredLocale, null, suppIndex); } - transient XLocaleMatcher xLocaleMatcher = null; - transient ULocale xDefaultLanguage = null; - transient boolean xFavorScript = false; + /** + * Returns the best match between the desired and supported locales. + * + * @param desiredLocales Typically a user's languages, in order of preference (descending). + * @return the best-matching pair of a desired and a supported locale. + * @draft ICU 65 + * @provisional This API might change or be removed in a future release. + */ + public Result getBestLocaleResult(Iterable desiredLocales) { + Iterator desiredIter = desiredLocales.iterator(); + if (!desiredIter.hasNext()) { + return makeResult(UND_LOCALE, null, -1); + } + LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter); + LSR desiredLSR = lsrIter.next(); + int suppIndex = getBestSuppIndex(desiredLSR, lsrIter); + return makeResult(null, lsrIter, suppIndex); + } - private synchronized XLocaleMatcher getLocaleMatcher() { - if (xLocaleMatcher == null) { - Builder builder = XLocaleMatcher.builder(); - builder.setSupportedULocales(languagePriorityList.getULocales()); - if (xDefaultLanguage != null) { - builder.setDefaultULocale(xDefaultLanguage); + /** + * @param desiredLSR The first desired locale's LSR. + * @param remainingIter Remaining desired LSRs, null or empty if none. + * @return the index of the best-matching supported locale, or -1 if there is no good match. + */ + private int getBestSuppIndex(LSR desiredLSR, LsrIterator remainingIter) { + int desiredIndex = 0; + int bestSupportedLsrIndex = -1; + for (int bestDistance = thresholdDistance;;) { + // Quick check for exact maximized LSR. + Integer index = supportedLsrToIndex.get(desiredLSR); + if (index != null) { + int suppIndex = index; + if (TRACE_MATCHER) { + System.err.printf("Returning %s: desiredLSR=supportedLSR\n", + supportedULocales[suppIndex]); + } + if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); } + return suppIndex; } - if (xFavorScript) { - builder.setFavorSubtag(FavorSubtag.SCRIPT); + int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance( + desiredLSR, supportedLsrs, bestDistance, favorSubtag); + if (bestIndexAndDistance >= 0) { + bestDistance = bestIndexAndDistance & 0xff; + if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); } + bestSupportedLsrIndex = bestIndexAndDistance >> 8; } - xLocaleMatcher = builder.build(); + if ((bestDistance -= demotionPerDesiredLocale) <= 0) { + break; + } + if (remainingIter == null || !remainingIter.hasNext()) { + break; + } + desiredLSR = remainingIter.next(); + } + if (bestSupportedLsrIndex < 0) { + if (TRACE_MATCHER) { + System.err.printf("Returning default %s: no good match\n", defaultULocale); + } + return -1; } - return xLocaleMatcher; + int suppIndex = supportedIndexes[bestSupportedLsrIndex]; + if (TRACE_MATCHER) { + System.err.printf("Returning %s: best matching supported locale\n", + supportedULocales[suppIndex]); + } + return suppIndex; } /** - * Get the best match between the desired languages and supported languages - * This supports the new CLDR syntax to provide for better matches within - * regional clusters (such as maghreb Arabic vs non-maghreb Arabic, or regions that use en-GB vs en-US) - * and also matching between regions and macroregions, such as comparing es-419 to es-AR). - * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first. - * @param outputBestDesired The one of the desired languages that matched best. - * Set to null if the best match was not below the threshold distance. - * @return best-match supported language - * @internal - * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release. + * Returns a fraction between 0 and 1, where 1 means that the languages are a + * perfect match, and 0 means that they are completely different. + * + *

This is mostly an implementation detail, and the precise values may change over time. + * The implementation may use either the maximized forms or the others ones, or both. + * The implementation may or may not rely on the forms to be consistent with each other. + * + *

Callers should construct and use a matcher rather than match pairs of locales directly. + * + * @param desired Desired locale. + * @param desiredMax Maximized locale (using likely subtags). + * @param supported Supported locale. + * @param supportedMax Maximized locale (using likely subtags). + * @return value between 0 and 1, inclusive. + * @deprecated ICU 65 Build and use a matcher rather than comparing pairs of locales. */ @Deprecated - public ULocale getBestMatch(LinkedHashSet desiredLanguages, Output outputBestDesired) { - if (outputBestDesired == null) { - return getLocaleMatcher().getBestMatch(desiredLanguages); - } else { - XLocaleMatcher.Result result = getLocaleMatcher().getBestMatchResult(desiredLanguages); - outputBestDesired.value = result.getDesiredULocale(); - return result.getSupportedULocale(); - } + public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) { + // Returns the inverse of the distance: That is, 1-distance(desired, supported). + int distance = LocaleDistance.INSTANCE.getBestIndexAndDistance( + XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired), + new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) }, + thresholdDistance, favorSubtag) & 0xff; + return (100 - distance) / 100.0; } /** - * Set the default language, with null = default = first supported language - * @param defaultLanguage Language to use in case the threshold for distance is exceeded. - * @return this, for chaining - * @internal - * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release. + * Partially canonicalizes a locale (language). Note that for now, it is canonicalizing + * according to CLDR conventions (he vs iw, etc), since that is what is needed + * for likelySubtags. + * + *

Currently, this is a much simpler canonicalization than what the ULocale class does: + * The language/script/region subtags are each mapped separately, ignoring the other subtags. + * If none of these change, then the input locale is returned. + * Otherwise a new ULocale with only those subtags is returned, removing variants and extensions. + * + * @param locale language/locale code + * @return ULocale with remapped subtags. + * @stable ICU 4.4 */ - @Deprecated - public synchronized LocaleMatcher setDefaultLanguage(ULocale defaultLanguage) { - this.xDefaultLanguage = defaultLanguage; - xLocaleMatcher = null; - return this; + public ULocale canonicalize(ULocale locale) { + return XLikelySubtags.INSTANCE.canonicalize(locale); } /** - * If true, then the language differences are smaller than than script differences. - * This is used in situations (such as maps) where it is better to fall back to the same script than a similar language. - * @param favorScript Set to true to treat script as most important. - * @return this, for chaining. - * @internal - * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release. + * {@inheritDoc} + * @stable ICU 4.4 */ - @Deprecated - public synchronized LocaleMatcher setFavorScript(boolean favorScript) { - this.xFavorScript = favorScript; - xLocaleMatcher = null; - return this; + @Override + public String toString() { + StringBuilder s = new StringBuilder().append("{LocaleMatcher"); + if (supportedULocales.length > 0) { + s.append(" supported={").append(supportedULocales[0].toString()); + for (int i = 1; i < supportedULocales.length; ++i) { + s.append(", ").append(supportedULocales[i].toString()); + } + s.append('}'); + } + s.append(" default=").append(Objects.toString(defaultULocale)); + if (favorSubtag != null) { + s.append(" distance=").append(favorSubtag.toString()); + } + if (thresholdDistance >= 0) { + s.append(String.format(" threshold=%d", thresholdDistance)); + } + s.append(String.format(" demotion=%d", demotionPerDesiredLocale)); + return s.append('}').toString(); } } diff --git a/icu4j/main/shared/build/common-targets.xml b/icu4j/main/shared/build/common-targets.xml index 8ee10ed141f..6f85a99de8d 100644 --- a/icu4j/main/shared/build/common-targets.xml +++ b/icu4j/main/shared/build/common-targets.xml @@ -261,9 +261,10 @@ + - + @@ -349,11 +350,9 @@ - - - + diff --git a/icu4j/main/shared/data/icudata.jar b/icu4j/main/shared/data/icudata.jar index 36c0d08dd80..7134fe12d94 100644 --- a/icu4j/main/shared/data/icudata.jar +++ b/icu4j/main/shared/data/icudata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd004f5d8064e047cef4f7d31326b39b7fc43fba685fab2f0d23c154f4dbc637 -size 12818511 +oid sha256:b21585ec768edea7b099bd6a97b0a4130b53966a63e6a10de2f31b22f8b59fbd +size 12840921 diff --git a/icu4j/main/tests/core/.classpath b/icu4j/main/tests/core/.classpath index e02ee9d41d0..60473b0ad8e 100644 --- a/icu4j/main/tests/core/.classpath +++ b/icu4j/main/tests/core/.classpath @@ -18,5 +18,6 @@ + diff --git a/icu4j/main/tests/core/.project b/icu4j/main/tests/core/.project index 951d4b2cfaa..32e60a3eca3 100644 --- a/icu4j/main/tests/core/.project +++ b/icu4j/main/tests/core/.project @@ -9,6 +9,7 @@ icu4j-regiondata icu4j-shared icu4j-test-framework + icu4j-tools diff --git a/icu4j/tools/misc/src/com/ibm/icu/dev/tool/serializable/SerializableChecker.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/SerializableChecker.java similarity index 90% rename from icu4j/tools/misc/src/com/ibm/icu/dev/tool/serializable/SerializableChecker.java rename to icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/SerializableChecker.java index 3aa55210a00..b6b66a636ca 100644 --- a/icu4j/tools/misc/src/com/ibm/icu/dev/tool/serializable/SerializableChecker.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/SerializableChecker.java @@ -8,7 +8,7 @@ * */ -package com.ibm.icu.dev.tool.serializable; +package com.ibm.icu.dev.test.serializable; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -23,7 +23,6 @@ import java.util.Arrays; import java.util.Iterator; import java.util.List; -import com.ibm.icu.dev.test.serializable.SerializableTestUtility; import com.ibm.icu.impl.URLHandler; /** @@ -31,32 +30,32 @@ import com.ibm.icu.impl.URLHandler; * and lists all those classes that implement Serializable. It also checks * to make sure that those classes have the serialVersionUID * field define. - * + * */ public class SerializableChecker implements URLHandler.URLVisitor { private static Class serializable; //private static Class throwable; - + private String path = null; - + //private boolean write; - + public SerializableChecker(String path) { this.path = path; - + if (path != null) { File dir = new File(path); - + if (!dir.exists()) { dir.mkdirs(); } } } - + static { - try { + try { serializable = Class.forName("java.io.Serializable"); //throwable = Class.forName("java.lang.Throwable"); } catch (Exception e) { @@ -64,45 +63,43 @@ public class SerializableChecker implements URLHandler.URLVisitor System.out.println("Woops! Can't get class info for Serializable and Throwable."); } } - + private void writeFile(String className, byte bytes[]) { File file = new File(path + File.separator + className + ".dat"); - FileOutputStream stream; - - try { - stream = new FileOutputStream(file); - + + try (FileOutputStream stream = new FileOutputStream(file)) { stream.write(bytes); stream.close(); } catch (Exception e) { System.out.print(" - can't write file!"); } } - + + @Override public void visit(String str) { int ix = str.lastIndexOf(".class"); - + if (ix >= 0) { String className = "com.ibm.icu" + str.substring(0, ix).replace('/', '.'); - + // Skip things in com.ibm.icu.dev; they're not relevant. if (className.startsWith("com.ibm.icu.dev.")) { return; } - + try { Class c = Class.forName(className); int m = c.getModifiers(); - + if (serializable.isAssignableFrom(c) /*&& (! throwable.isAssignableFrom(c) || c.getDeclaredFields().length > 0)*/) { //Field uid; - + System.out.print(className + " (" + Modifier.toString(m) + ") - "); - - if(!Modifier.isInterface(m)){ + + if(!Modifier.isInterface(m)){ try { /* uid = */ c.getDeclaredField("serialVersionUID"); @@ -110,18 +107,18 @@ public class SerializableChecker implements URLHandler.URLVisitor System.out.print("no serialVersionUID - "); } } - + if (Modifier.isPublic(m)) { SerializableTestUtility.Handler handler = SerializableTestUtility.getHandler(className); - + if (!Modifier.isInterface(m) && handler != null) { Object objectsOut[] = handler.getTestObjects(); Object objectsIn[]; boolean passed = true; - + ByteArrayOutputStream byteOut = new ByteArrayOutputStream(); ObjectOutputStream out = new ObjectOutputStream(byteOut); - + try { out.writeObject(objectsOut); out.close(); @@ -130,14 +127,14 @@ public class SerializableChecker implements URLHandler.URLVisitor System.out.println("Eror writing test objects:" + e.toString()); return; } - + if (path != null) { writeFile(className, byteOut.toByteArray()); } - + ByteArrayInputStream byteIn = new ByteArrayInputStream(byteOut.toByteArray()); ObjectInputStream in = new ObjectInputStream(byteIn); - + try { objectsIn = (Object[]) in.readObject(); in.close(); @@ -153,7 +150,7 @@ public class SerializableChecker implements URLHandler.URLVisitor System.out.println("Object " + i + " failed behavior test."); } } - + if (passed) { System.out.print("test passed."); } @@ -164,7 +161,7 @@ public class SerializableChecker implements URLHandler.URLVisitor } } } - + System.out.println(); } } catch (Exception e) { @@ -177,10 +174,10 @@ public class SerializableChecker implements URLHandler.URLVisitor { List argList = Arrays.asList(args); String path = null; - + for (Iterator it = argList.iterator(); it.hasNext(); /*anything?*/) { String arg = (String) it.next(); - + if (arg.equals("-w")) { if (it.hasNext()) { path = (String) it.next(); @@ -188,15 +185,15 @@ public class SerializableChecker implements URLHandler.URLVisitor System.out.println("Missing directory name on -w command."); } } else { - - + + try { //URL jarURL = new URL("jar:file:/dev/eclipse/workspace/icu4j/icu4j.jar!/com/ibm/icu"); //URL fileURL = new URL("file:/dev/eclipse/workspace/icu4j/classes/com/ibm/icu"); URL url = new URL(arg); URLHandler handler = URLHandler.get(url); SerializableChecker checker = new SerializableChecker(path); - + System.out.println("Checking classes from " + arg + ":"); handler.guide(checker, true, false); } catch (Exception e) { diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleDistanceTest.java similarity index 92% rename from icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java rename to icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleDistanceTest.java index cb32b1fbce5..39388fcbf01 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleDistanceTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleDistanceTest.java @@ -12,9 +12,10 @@ import org.junit.runner.RunWith; import org.junit.runners.JUnit4; import com.ibm.icu.dev.test.TestFmwk; +import com.ibm.icu.dev.tool.locale.LocaleDistanceBuilder; import com.ibm.icu.impl.locale.LocaleDistance; -import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag; import com.ibm.icu.util.LocaleMatcher; +import com.ibm.icu.util.LocaleMatcher.FavorSubtag; import com.ibm.icu.util.Output; import com.ibm.icu.util.ULocale; @@ -25,13 +26,13 @@ import com.ibm.icu.util.ULocale; * @author markdavis */ @RunWith(JUnit4.class) -public class XLocaleDistanceTest extends TestFmwk { +public class LocaleDistanceTest extends TestFmwk { private static final boolean REFORMAT = false; // set to true to get a reformatted data file listed private LocaleDistance localeDistance = LocaleDistance.INSTANCE; DataDrivenTestHelper tfh = new MyTestFileHandler() .setFramework(this) - .load(XLocaleDistanceTest.class, "data/localeDistanceTest.txt"); + .load(LocaleDistanceTest.class, "data/localeDistanceTest.txt"); static class Arguments { final ULocale desired; @@ -47,6 +48,13 @@ public class XLocaleDistanceTest extends TestFmwk { } } + @Test + public void testLoadedDataSameAsBuiltFromScratch() { + LocaleDistance.Data built = LocaleDistanceBuilder.build(); + LocaleDistance.Data loaded = LocaleDistance.Data.load(); + assertEquals("run LocaleDistanceBuilder and update ICU4C langInfo.txt", built, loaded); + } + @SuppressWarnings("unused") @Ignore("Disabled because of Linux; need to investigate.") @Test diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherShim.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherShim.java deleted file mode 100644 index ababb6d1155..00000000000 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherShim.java +++ /dev/null @@ -1,22 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html#License -/* - ******************************************************************************* - * Copyright (C) 2015, Google, Inc., International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* - */ -package com.ibm.icu.dev.test.util; - -import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData; - -/** - * @author markdavis - * - */ -public class LocaleMatcherShim { - public static LanguageMatcherData load() { - // In CLDR, has different value - return null; - } -} diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java index e80c7f5196e..32c361fb0f4 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java @@ -9,55 +9,47 @@ package com.ibm.icu.dev.test.util; -import java.util.Arrays; -import java.util.LinkedHashSet; +import java.io.BufferedReader; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.Set; import java.util.TreeSet; import org.junit.Test; import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; import com.ibm.icu.dev.test.TestFmwk; +import com.ibm.icu.dev.tool.locale.LikelySubtagsBuilder; +import com.ibm.icu.impl.locale.XCldrStub.FileUtilities; +import com.ibm.icu.impl.locale.XLikelySubtags; import com.ibm.icu.util.LocaleMatcher; -import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData; +import com.ibm.icu.util.LocaleMatcher.FavorSubtag; import com.ibm.icu.util.LocalePriorityList; -import com.ibm.icu.util.Output; import com.ibm.icu.util.ULocale; +import junitparams.JUnitParamsRunner; +import junitparams.Parameters; + /** * Test the LocaleMatcher. * * @author markdavis */ -@SuppressWarnings("deprecation") -@RunWith(JUnit4.class) +@RunWith(JUnitParamsRunner.class) public class LocaleMatcherTest extends TestFmwk { - - private static final ULocale ZH_MO = new ULocale("zh_MO"); private static final ULocale ZH_HK = new ULocale("zh_HK"); - static LanguageMatcherData LANGUAGE_MATCHER_DATA = LocaleMatcherShim.load(); private LocaleMatcher newLocaleMatcher(LocalePriorityList build) { - return new LocaleMatcher(build, LANGUAGE_MATCHER_DATA); - } - - private LocaleMatcher newLocaleMatcher(LocalePriorityList build, LanguageMatcherData data) { - return new LocaleMatcher(build, data == null ? LANGUAGE_MATCHER_DATA : data); - } - - private LocaleMatcher newLocaleMatcher(LocalePriorityList lpl, LanguageMatcherData data, double d) { - return new LocaleMatcher(lpl, data == null ? LANGUAGE_MATCHER_DATA : data, d); + return new LocaleMatcher(build); } private LocaleMatcher newLocaleMatcher(String string) { - return new LocaleMatcher(LocalePriorityList.add(string).build(), LANGUAGE_MATCHER_DATA); + return new LocaleMatcher(LocalePriorityList.add(string).build()); } - // public LocaleMatcher(LocalePriorityList languagePriorityList, - // LocaleMatcherData matcherData, double threshold) - @Test public void testParentLocales() { assertCloser("es_AR", "es_419", "es_ES"); @@ -87,32 +79,6 @@ public class LocaleMatcherTest extends TestFmwk { assertEquals("test " + a + " is closer to " + closer + " than to " + further, new ULocale(closer), matcher.getBestMatch(a)); } - // public void testParentLocales() { - // // find all the regions that have a closer relation because of an explicit parent - // Set explicitParents = new HashSet<>(INFO.getExplicitParents()); - // explicitParents.remove("root"); - // Set otherParents = new HashSet<>(INFO.getExplicitParents()); - // for (String locale : explicitParents) { - // while (true) { - // locale = LocaleIDParser.getParent(locale); - // if (locale == null || locale.equals("root")) { - // break; - // } - // otherParents.add(locale); - // } - // } - // otherParents.remove("root"); - // - // for (String locale : CONFIG.getCldrFactory().getAvailable()) { - // String parentId = LocaleIDParser.getParent(locale); - // String parentIdSimple = LocaleIDParser.getSimpleParent(locale); - // if (!explicitParents.contains(parentId) && !otherParents.contains(parentIdSimple)) { - // continue; - // } - // System.out.println(locale + "\t" + CONFIG.getEnglish().getName(locale) + "\t" + parentId + "\t" + parentIdSimple); - // } - // } - @Test public void testChinese() { LocaleMatcher matcher = newLocaleMatcher("zh_CN, zh_TW, iw"); @@ -139,31 +105,10 @@ public class LocaleMatcherTest extends TestFmwk { @Test public void testFallbacks() { LocalePriorityList lpl = LocalePriorityList.add("en, hi").build(); - final LocaleMatcher matcher = newLocaleMatcher(lpl, null, 0.09); + final LocaleMatcher matcher = newLocaleMatcher(lpl); assertEquals("hi", matcher.getBestMatch("sa").toString()); } - @Test - public void testOverrideData() { - double threshold = 0.05; - LanguageMatcherData localeMatcherData = new LanguageMatcherData() - .addDistance("br", "fr", 10, true) - .addDistance("es", "cy", 10, true); - logln(localeMatcherData.toString()); - - final LocaleMatcher matcher = newLocaleMatcher( - LocalePriorityList - .add(ULocale.ENGLISH) - .add(ULocale.FRENCH) - .add(ULocale.UK) - .build(), localeMatcherData, threshold); - logln(matcher.toString()); - - assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br"))); - assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one - // way - } - @Test public void testBasics() { final LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(ULocale.FRENCH).add(ULocale.UK) @@ -184,7 +129,7 @@ public class LocaleMatcherTest extends TestFmwk { assertEquals(new ULocale("zh_CN"), matcher.getBestMatch("zh")); assertEquals(new ULocale("zh_CN"), matcher.getBestMatch("zh_Hans_CN")); assertEquals(new ULocale("zh_TW"), matcher.getBestMatch("zh_Hant_HK")); - assertEquals(new ULocale("he"), matcher.getBestMatch("iw_IT")); + assertEquals(new ULocale("iw"), matcher.getBestMatch("iw_IT")); } @Test @@ -219,20 +164,8 @@ public class LocaleMatcherTest extends TestFmwk { @Test public void TestLocaleMatcherCoverage() { // Add tests for better code coverage - LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(null, 0).build(), null); + LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(null, 0).build()); logln(matcher.toString()); - - LanguageMatcherData data = new LanguageMatcherData(); - - LanguageMatcherData clone = data.cloneAsThawed(); - - if (clone.equals(data)) { - errln("Error cloneAsThawed() is equal."); - } - - if (data.isFrozen()) { - errln("Error LocaleMatcherData is frozen!"); - } } private void assertEquals(Object expected, Object string) { @@ -251,17 +184,19 @@ public class LocaleMatcherTest extends TestFmwk { static final ULocale ENGLISH_CANADA = new ULocale("en_CA"); + private static double match(ULocale a, ULocale b) { + final LocaleMatcher matcher = new LocaleMatcher(""); + return matcher.match(a, null, b, null); + } + @Test public void testMatch_exact() { - assertEquals(1.0, - LocaleMatcher.match(ENGLISH_CANADA, ENGLISH_CANADA)); + assertEquals(1.0, match(ENGLISH_CANADA, ENGLISH_CANADA)); } @Test public void testMatch_none() { - double match = LocaleMatcher.match( - new ULocale("ar_MK"), - ENGLISH_CANADA); + double match = match(new ULocale("ar_MK"), ENGLISH_CANADA); assertTrue("Actual < 0: " + match, 0 <= match); assertTrue("Actual > 0.15 (~ language + script distance): " + match, 0.2 > match); } @@ -270,13 +205,12 @@ public class LocaleMatcherTest extends TestFmwk { public void testMatch_matchOnMazimized() { ULocale undTw = new ULocale("und_TW"); ULocale zhHant = new ULocale("zh_Hant"); - double matchZh = LocaleMatcher.match(undTw, new ULocale("zh")); - double matchZhHant = LocaleMatcher.match(undTw, zhHant); + double matchZh = match(undTw, new ULocale("zh")); + double matchZhHant = match(undTw, zhHant); assertTrue("und_TW should be closer to zh_Hant (" + matchZhHant + ") than to zh (" + matchZh + ")", matchZh < matchZhHant); - double matchEnHantTw = LocaleMatcher.match(new ULocale("en_Hant_TW"), - zhHant); + double matchEnHantTw = match(new ULocale("en_Hant_TW"), zhHant); assertTrue("zh_Hant should be closer to und_TW (" + matchZhHant + ") than to en_Hant_TW (" + matchEnHantTw + ")", matchEnHantTw < matchZhHant); @@ -397,16 +331,9 @@ public class LocaleMatcherTest extends TestFmwk { assertEquals("it", matcher.getBestMatch("en").toString()); } - // public void testGetBestMatch_emptyList() { - // final LocaleMatcher matcher = newLocaleMatcher( - // new LocalePriorityList(new HashMap())); - // assertNull(matcher.getBestMatch(ULocale.ENGLISH)); - // } - @Test public void testGetBestMatch_googlePseudoLocales() { // Google pseudo locales are primarily based on variant subtags. - // See http://sites/intl_eng/pseudo_locales. // (See below for the region code based fall back options.) final LocaleMatcher matcher = newLocaleMatcher( "fr, pt"); @@ -475,19 +402,25 @@ public class LocaleMatcherTest extends TestFmwk { check2(sorted); } + private static final ULocale posix = new ULocale("en_US_POSIX"); + /** * @param sorted */ private void check2(Set sorted) { - // TODO Auto-generated method stub logln("Checking: " + sorted); LocaleMatcher matcher = newLocaleMatcher( LocalePriorityList.add( sorted.toArray(new ULocale[sorted.size()])) - .build()); + .build()); for (ULocale loc : sorted) { - String stringLoc = loc.toString(); - assertEquals(stringLoc, matcher.getBestMatch(stringLoc).toString()); + // The result may not be the exact same locale, but it must be equivalent. + // Variants and extensions are ignored. + if (loc.equals(posix)) { continue; } + ULocale max = ULocale.addLikelySubtags(loc); + ULocale best = matcher.getBestMatch(loc); + ULocale maxBest = ULocale.addLikelySubtags(best); + assertEquals(loc.toString(), max, maxBest); } } @@ -502,29 +435,8 @@ public class LocaleMatcherTest extends TestFmwk { } - // public void testComputeDistance_monkeyTest() { - // RegionCode[] codes = RegionCode.values(); - // Random random = new Random(); - // for (int i = 0; i < 1000; ++i) { - // RegionCode x = codes[random.nextInt(codes.length)]; - // RegionCode y = codes[random.nextInt(codes.length)]; - // double d = LocaleMatcher.getRegionDistance(x, y, null, null); - // if (x == RegionCode.ZZ || y == RegionCode.ZZ) { - // assertEquals(LocaleMatcher.REGION_DISTANCE, d); - // } else if (x == y) { - // assertEquals(0.0, d); - // } else { - // assertTrue(d > 0); - // assertTrue(d <= LocaleMatcher.REGION_DISTANCE); - // } - // } - // } - @Test public void testGetBestMatchForList_matchOnMaximized2() { -// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) { -// return; -// } final LocaleMatcher matcher = newLocaleMatcher("fr, en-GB, ja, es-ES, es-MX"); // ja-JP matches ja on likely subtags, and it's listed first, thus it wins over // thus it wins over the second preference en-GB. @@ -537,9 +449,6 @@ public class LocaleMatcherTest extends TestFmwk { @Test public void testGetBestMatchForList_closeEnoughMatchOnMaximized() { -// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) { -// return; -// } final LocaleMatcher matcher = newLocaleMatcher("en-GB, en, de, fr, ja"); assertEquals("de", matcher.getBestMatch("de-CH, fr").toString()); assertEquals("en", matcher.getBestMatch("en-US, ar, nl, de, ja").toString()); @@ -547,23 +456,20 @@ public class LocaleMatcherTest extends TestFmwk { @Test public void testGetBestMatchForPortuguese() { - -// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) { -// return; -// } - final LocaleMatcher withPTExplicit = newLocaleMatcher("pt_PT, pt_BR, es, es_419"); final LocaleMatcher withPTImplicit = newLocaleMatcher("pt_PT, pt, es, es_419"); // Could happen because "pt_BR" is a tier_1 language and "pt_PT" is tier_2. final LocaleMatcher withoutPT = newLocaleMatcher("pt_BR, es, es_419"); - // European user who prefers Spanish over Brazillian Portuguese as a fallback. + // European user who prefers Spanish over Brazilian Portuguese as a fallback. assertEquals("pt_PT", withPTExplicit.getBestMatch("pt_PT, es, pt").toString()); assertEquals("pt_PT", withPTImplicit.getBestMatch("pt_PT, es, pt").toString()); - assertEquals("es", withoutPT.getBestMatch("pt_PT, es, pt").toString()); + // The earlier pt_PT vs. pt_BR region mismatch is as good as the later es perfect match + // because of the demotion per desired locale. + assertEquals("pt_BR", withoutPT.getBestMatch("pt_PT, es, pt").toString()); - // Brazillian user who prefers South American Spanish over European Portuguese as a fallback. + // Brazilian user who prefers South American Spanish over European Portuguese as a fallback. // The asymmetry between this case and above is because it's "pt_PT" that's missing between the // matchers as "pt_BR" is a much more common language. assertEquals("pt_BR", withPTExplicit.getBestMatch("pt, es_419, pt_PT").toString()); @@ -578,9 +484,6 @@ public class LocaleMatcherTest extends TestFmwk { @Test public void testVariantWithScriptMatch() { -// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) { -// return; -// } final LocaleMatcher matcher = newLocaleMatcher("fr, en, sv"); assertEquals("en", matcher.getBestMatch("en-GB").toString()); assertEquals("en", matcher.getBestMatch("en-GB, sv").toString()); @@ -588,80 +491,415 @@ public class LocaleMatcherTest extends TestFmwk { @Test public void testVariantWithScriptMatch2() { -// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) { -// return; -// } final LocaleMatcher matcher = newLocaleMatcher("en, sv"); assertEquals("en", matcher.getBestMatch("en-GB, sv").toString()); } @Test - public void testPerf() { - if (LANGUAGE_MATCHER_DATA == null) { - return; // skip except when testing data + public void Test8288() { + final LocaleMatcher matcher = newLocaleMatcher("it, en"); + assertEquals("it", matcher.getBestMatch("und").toString()); + assertEquals("en", matcher.getBestMatch("und, en").toString()); + } + + @Test + public void testDemotion() { + LocalePriorityList supported = LocalePriorityList.add("fr, de-CH, it").build(); + LocalePriorityList desired = LocalePriorityList.add("fr-CH, de-CH, it").build(); + LocaleMatcher noDemotion = LocaleMatcher.builder(). + setSupportedULocales(supported.getULocales()). + setDemotionPerDesiredLocale(LocaleMatcher.Demotion.NONE).build(); + assertEquals("no demotion", new ULocale("de-CH"), noDemotion.getBestMatch(desired)); + + LocaleMatcher regionDemotion = LocaleMatcher.builder(). + setSupportedULocales(supported.getULocales()). + setDemotionPerDesiredLocale(LocaleMatcher.Demotion.REGION).build(); + assertEquals("region demotion", ULocale.FRENCH, regionDemotion.getBestMatch(desired)); + } + + private static final class PerfCase { + ULocale desired; + ULocale expectedShort; + ULocale expectedLong; + ULocale expectedVeryLong; + + PerfCase(String des, String expShort, String expLong, String expVeryLong) { + desired = new ULocale(des); + expectedShort = new ULocale(expShort); + expectedLong = new ULocale(expLong); + expectedVeryLong = new ULocale(expVeryLong); } - final String desired = "sv, en"; - - final LocaleMatcher matcherShort = newLocaleMatcher(desired); - final LocaleMatcher matcherLong = newLocaleMatcher("af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu"); - final LocaleMatcher matcherVeryLong = newLocaleMatcher("af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_001, en_150, en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA"); - - //LocaleMatcher.DEBUG = true; - ULocale expected = new ULocale("sv"); - assertEquals(expected, matcherShort.getBestMatch(desired)); - assertEquals(expected, matcherLong.getBestMatch(desired)); - assertEquals(expected, matcherVeryLong.getBestMatch(desired)); - //LocaleMatcher.DEBUG = false; - - for (int i = 0; i < 2; ++i) { - int iterations = i == 0 ? 1000 : 100000; - boolean showMessage = i != 0; - long timeShort = timeLocaleMatcher("Duration (few supported):\t", desired, matcherShort, showMessage, iterations, 0); - @SuppressWarnings("unused") - long timeMedium = timeLocaleMatcher("Duration (med. supported):\t", desired, matcherLong, showMessage, iterations, timeShort); - @SuppressWarnings("unused") - long timeLong = timeLocaleMatcher("Duration (many supported):\t", desired, matcherVeryLong, showMessage, iterations, timeShort); + } + + private static final int WARM_UP_ITERATIONS = 1000; + private static final int BENCHMARK_ITERATIONS = 20000; + + @Test + public void testPerf() { + final String shortList = "en, sv"; + final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " + + "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " + + "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " + + "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " + + "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " + + "zh-CN, zh-TW, zu"; + final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, " + + "ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, " + + "ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, " + + "ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, " + + "ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, " + + "bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, " + + "bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, " + + "bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, " + + "ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, " + + "cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, " + + "de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, " + + // removed en_001 to avoid exact match + "ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_150, " + + "en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, " + + "en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, " + + "en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, " + + "en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, " + + "en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, " + + "en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, " + + "en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, " + + "en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, " + + "en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, " + + "en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, " + + "es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, " + + "es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, " + + "et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, " + + "ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, " + + "fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, " + + "fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, " + + "fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, " + + "fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, " + + "fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, " + + "gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, " + + "he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, " + + "id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, " + + "jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, " + + "kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, " + + "kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, " + + "ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, " + + "lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, " + + "lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, " + + "lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, " + + "mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, " + + "ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, " + + "nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, " + + "nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, " + + "nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, " + + "pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, " + + "pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, " + + "rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, " + + "ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, " + + "sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, " + + "shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, " + + "smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, " + + "sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, " + + "sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, " + + "sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, " + + "teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, " + + "tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, " + + "uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, " + + "vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, " + + "wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, " + + "zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, " + + "zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA"; + + final LocaleMatcher matcherShort = newLocaleMatcher(shortList); + final LocaleMatcher matcherLong = newLocaleMatcher(longList); + final LocaleMatcher matcherVeryLong = newLocaleMatcher(veryLongList); + + PerfCase[] pcs = new PerfCase[] { + // Exact match in all matchers. + new PerfCase("sv", "sv", "sv", "sv"), + // Common locale, exact match only in very long list. + new PerfCase("fr_CA", "en", "fr", "fr_CA"), + // Unusual locale, no exact match. + new PerfCase("de_CA", "en", "de", "de"), + // World English maps to several region partitions. + new PerfCase("en_001", "en", "en", "en"), + // Ancient language with interesting subtags. + new PerfCase("egy_Copt_CY", "en", "af", "af") + }; + + for (PerfCase pc : pcs) { + final ULocale desired = pc.desired; + + assertEquals(desired.toString(), pc.expectedShort, matcherShort.getBestMatch(desired)); + assertEquals(desired.toString(), pc.expectedLong, matcherLong.getBestMatch(desired)); + assertEquals(desired.toString(), pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired)); + + timeLocaleMatcher(desired, matcherShort, WARM_UP_ITERATIONS); + timeLocaleMatcher(desired, matcherLong, WARM_UP_ITERATIONS); + timeLocaleMatcher(desired, matcherVeryLong, WARM_UP_ITERATIONS); + long tns = timeLocaleMatcher(desired, matcherShort, BENCHMARK_ITERATIONS); + System.out.format("New Duration (few supported):\t%s\t%d\tnanos\n", desired, tns); + long tnl = timeLocaleMatcher(desired, matcherLong, BENCHMARK_ITERATIONS); + System.out.format("New Duration (med. supported):\t%s\t%d\tnanos\n", desired, tnl); + long tnv = timeLocaleMatcher(desired, matcherVeryLong, BENCHMARK_ITERATIONS); + System.out.format("New Duration (many supported):\t%s\t%d\tnanos\n", desired, tnv); } + + maximizePerf(); } - private long timeLocaleMatcher(String title, String desired, LocaleMatcher matcher, - boolean showmessage, int iterations, long comparisonTime) { + private static long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) { long start = System.nanoTime(); for (int i = iterations; i > 0; --i) { matcher.getBestMatch(desired); } long delta = System.nanoTime() - start; - if (showmessage) warnln(title + (delta / iterations) + " nanos, " - + (comparisonTime > 0 ? (delta * 100 / comparisonTime - 100) + "% longer" : "")); - return delta; + return (delta / iterations); + } + + private void maximizePerf() { + final String tags = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " + + "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " + + "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " + + "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " + + "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " + + "zh-CN, zh-TW, zu"; + LocalePriorityList list = LocalePriorityList.add(tags).build(); + int few = 1000; + long t = timeMaximize(list, few); // warm up + t = timeMaximize(list, few); // measure for scale + long targetTime = 100000000L; // 10^8 ns = 0.1s + int iterations = (int)((targetTime * few) / t); + t = timeMaximize(list, iterations); + int length = 0; + for (@SuppressWarnings("unused") ULocale locale : list) { ++length; } + System.out.println("maximize: " + (t / iterations / length) + " ns/locale: " + + t + " ns / " + iterations + " iterations / " + length + " locales"); + } + + // returns total ns not per iteration + private static long timeMaximize(Iterable list, int iterations) { + long start = System.nanoTime(); + for (int i = iterations; i > 0; --i) { + for (ULocale locale : list) { + XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale); + } + } + return System.nanoTime() - start; } @Test - public void Test8288() { - final LocaleMatcher matcher = newLocaleMatcher("it, en"); - assertEquals("it", matcher.getBestMatch("und").toString()); - assertEquals("en", matcher.getBestMatch("und, en").toString()); + public void testLikelySubtagsLoadedDataSameAsBuiltFromScratch() { + XLikelySubtags.Data built = LikelySubtagsBuilder.build(); + XLikelySubtags.Data loaded = XLikelySubtags.Data.load(); + assertEquals("run LocaleDistanceBuilder and update ICU4C langInfo.txt", built, loaded); } - @Test - public void TestTechPreview() { - final LocaleMatcher matcher = newLocaleMatcher("it, en, ru"); - ULocale und = new ULocale("und"); - ULocale bulgarian = new ULocale("bg"); - ULocale russian = new ULocale("ru"); + private static final class TestCase implements Cloneable { + private static final String ENDL = System.getProperties().getProperty("line.separator"); + + int lineNr = 0; + + String nameLine = ""; + String supportedLine = ""; + String defaultLine = ""; + String distanceLine = ""; + String thresholdLine = ""; + String matchLine = ""; + + String supported = ""; + String def = ""; + String favor = ""; + String threshold = ""; + String desired = ""; + String expMatch = ""; + String expDesired = ""; + String expCombined = ""; + + @Override + public TestCase clone() throws CloneNotSupportedException { + return (TestCase) super.clone(); + } + + void reset(String newNameLine) { + nameLine = newNameLine; + supportedLine = ""; + defaultLine = ""; + distanceLine = ""; + thresholdLine = ""; + + supported = ""; + def = ""; + favor = ""; + threshold = ""; + } + + String toInputsKey() { + return supported + '+' + def + '+' + favor + '+' + threshold + '+' + desired; + } + + private static void appendLine(StringBuilder sb, String line) { + if (!line.isEmpty()) { + sb.append(ENDL).append(line); + } + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(nameLine); + appendLine(sb, supportedLine); + appendLine(sb, defaultLine); + appendLine(sb, distanceLine); + appendLine(sb, thresholdLine); + sb.append(ENDL).append("line ").append(lineNr).append(':'); + appendLine(sb, matchLine); + return sb.toString(); + } + } - Output outputBestDesired = new Output<>(); + private static String getSuffixAfterPrefix(String s, int limit, String prefix) { + if (prefix.length() <= limit && s.startsWith(prefix)) { + return s.substring(prefix.length(), limit); + } else { + return null; + } + } + + // UsedReflectively, not private to avoid unused-warning + static List readTestCases() throws Exception { + List tests = new ArrayList<>(); + Map uniqueTests = new HashMap<>(); + TestCase test = new TestCase(); + String filename = "data/localeMatcherTest.txt"; + try (BufferedReader in = FileUtilities.openFile(LocaleMatcherTest.class, filename)) { + String line; + while ((line = in.readLine()) != null) { + ++test.lineNr; + // Start of comment, or end of line, minus trailing spaces. + int limit = line.indexOf('#'); + if (limit < 0) { + limit = line.length(); + } + char c; + while (limit > 0 && ((c = line.charAt(limit - 1)) == ' ' || c == '\t')) { + --limit; + } + if (limit == 0) { // empty line + continue; + } + String suffix; + if (line.startsWith("** test: ")) { + test.reset(line); + } else if ((suffix = getSuffixAfterPrefix(line, limit, "@supported=")) != null) { + test.supportedLine = line; + test.supported = suffix; + } else if ((suffix = getSuffixAfterPrefix(line, limit, "@default=")) != null) { + test.defaultLine = line; + test.def = suffix; + } else if ((suffix = getSuffixAfterPrefix(line, limit, "@favor=")) != null) { + test.distanceLine = line; + test.favor = suffix; + } else if ((suffix = getSuffixAfterPrefix(line, limit, "@threshold=")) != null) { + test.thresholdLine = line; + test.threshold = suffix; + } else { + int matchSep = line.indexOf(">>"); + // >> before an inline comment, and followed by more than white space. + if (0 <= matchSep && (matchSep + 2) < limit) { + test.matchLine = line; + test.desired = line.substring(0, matchSep).trim(); + test.expDesired = test.expCombined = ""; + int start = matchSep + 2; + int expLimit = line.indexOf('|', start); + if (expLimit < 0) { + test.expMatch = line.substring(start, limit).trim(); + } else { + test.expMatch = line.substring(start, expLimit).trim(); + start = expLimit + 1; + expLimit = line.indexOf('|', start); + if (expLimit < 0) { + test.expDesired = line.substring(start, limit).trim(); + } else { + test.expDesired = line.substring(start, expLimit).trim(); + test.expCombined = line.substring(expLimit + 1, limit).trim(); + } + } + String inputs = test.toInputsKey(); + Integer prevIndex = uniqueTests.get(inputs); + if (prevIndex == null) { + uniqueTests.put(inputs, tests.size()); + } else { + System.out.println("Locale matcher test case on line " + test.lineNr + + " is a duplicate of line " + tests.get(prevIndex).lineNr); + } + tests.add(test.clone()); + } else { + throw new IllegalArgumentException("test data syntax error on line " + + test.lineNr + "\n" + line); + } + } + } + } + System.out.println("Number of duplicate locale matcher test cases: " + (tests.size() - uniqueTests.size())); + return tests; + } - ULocale best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired); - assertEquals(ULocale.ITALIAN, best); - assertEquals(null, outputBestDesired.value); + private static ULocale getULocaleOrNull(String s) { + if (s.equals("null")) { + return null; + } else { + return new ULocale(s); + } + } - matcher.setDefaultLanguage(ULocale.JAPANESE); - best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired); - assertEquals(ULocale.JAPANESE, best); + @Test + @Parameters(method = "readTestCases") + public void dataDriven(TestCase test) { + LocaleMatcher matcher; + if (test.def.isEmpty() && test.favor.isEmpty() && test.threshold.isEmpty()) { + matcher = new LocaleMatcher(test.supported); + } else { + LocaleMatcher.Builder builder = LocaleMatcher.builder(); + builder.setSupportedLocales(test.supported); + if (!test.def.isEmpty()) { + builder.setDefaultULocale(new ULocale(test.def)); + } + if (!test.favor.isEmpty()) { + FavorSubtag favor; + switch (test.favor) { + case "normal": + favor = FavorSubtag.LANGUAGE; + break; + case "script": + favor = FavorSubtag.SCRIPT; + break; + default: + throw new IllegalArgumentException("unsupported FavorSubtag value " + test.favor); + } + builder.setFavorSubtag(favor); + } + if (!test.threshold.isEmpty()) { + int threshold = Integer.valueOf(test.threshold); + builder.internalSetThresholdDistance(threshold); + } + matcher = builder.build(); + } - matcher.setFavorScript(true); - best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, bulgarian)), outputBestDesired); - assertEquals(russian, best); + ULocale expMatch = getULocaleOrNull(test.expMatch); + if (test.expDesired.isEmpty() && test.expCombined.isEmpty()) { + ULocale bestSupported = matcher.getBestMatch(test.desired); + assertEquals("bestSupported", expMatch, bestSupported); + } else { + LocalePriorityList desired = LocalePriorityList.add(test.desired).build(); + LocaleMatcher.Result result = matcher.getBestMatchResult(desired); + assertEquals("bestSupported", expMatch, result.getSupportedULocale()); + if (!test.expDesired.isEmpty()) { + ULocale expDesired = getULocaleOrNull(test.expDesired); + assertEquals("bestDesired", expDesired, result.getDesiredULocale()); + } + if (!test.expCombined.isEmpty()) { + ULocale expCombined = getULocaleOrNull(test.expCombined); + ULocale combined = result.makeServiceULocale(); + assertEquals("combined", expCombined, combined); + } + } } } diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java deleted file mode 100644 index f06e8be01c7..00000000000 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/XLocaleMatcherTest.java +++ /dev/null @@ -1,612 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html#License -package com.ibm.icu.dev.test.util; - -import java.io.BufferedReader; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeSet; - -import org.junit.Test; -import org.junit.runner.RunWith; - -import com.ibm.icu.dev.test.TestFmwk; -import com.ibm.icu.impl.locale.LocaleDistance; -import com.ibm.icu.impl.locale.XCldrStub.FileUtilities; -import com.ibm.icu.impl.locale.XLikelySubtags; -import com.ibm.icu.impl.locale.XLocaleMatcher; -import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag; -import com.ibm.icu.util.LocaleMatcher; -import com.ibm.icu.util.LocalePriorityList; -import com.ibm.icu.util.ULocale; - -import junitparams.JUnitParamsRunner; -import junitparams.Parameters; - -/** - * Test the XLocaleMatcher. - * - * @author markdavis - */ -@RunWith(JUnitParamsRunner.class) -public class XLocaleMatcherTest extends TestFmwk { - private static final int REGION_DISTANCE = 4; - - private static final LocaleDistance LANGUAGE_MATCHER_DATA = LocaleDistance.INSTANCE; - - private XLocaleMatcher newXLocaleMatcher() { - return new XLocaleMatcher(""); - } - - private XLocaleMatcher newXLocaleMatcher(LocalePriorityList build) { - return new XLocaleMatcher(build); - } - - private XLocaleMatcher newXLocaleMatcher(String string) { - return new XLocaleMatcher(LocalePriorityList.add(string).build()); - } - - @SuppressWarnings("unused") - private XLocaleMatcher newXLocaleMatcher(LocalePriorityList list, int d) { - return XLocaleMatcher.builder().setSupportedULocales(list.getULocales()). - internalSetThresholdDistance(d).build(); - } - - // public void testParentLocales() { - // // find all the regions that have a closer relation because of an explicit parent - // Set explicitParents = new HashSet<>(INFO.getExplicitParents()); - // explicitParents.remove("root"); - // Set otherParents = new HashSet<>(INFO.getExplicitParents()); - // for (String locale : explicitParents) { - // while (true) { - // locale = LocaleIDParser.getParent(locale); - // if (locale == null || locale.equals("root")) { - // break; - // } - // otherParents.add(locale); - // } - // } - // otherParents.remove("root"); - // - // for (String locale : CONFIG.getCldrFactory().getAvailable()) { - // String parentId = LocaleIDParser.getParent(locale); - // String parentIdSimple = LocaleIDParser.getSimpleParent(locale); - // if (!explicitParents.contains(parentId) && !otherParents.contains(parentIdSimple)) { - // continue; - // } - // System.out.println(locale + "\t" + CONFIG.getEnglish().getName(locale) + "\t" + parentId + "\t" + parentIdSimple); - // } - // } - - -// TBD reenable with override data -// public void testOverrideData() { -// double threshold = 0.05; -// XLocaleDistance XLocaleMatcherData = new XLocaleDistance() -// .addDistance("br", "fr", 10, true) -// .addDistance("es", "cy", 10, true); -// logln(XLocaleMatcherData.toString()); -// -// final XLocaleMatcher matcher = newXLocaleMatcher( -// LocalePriorityList -// .add(ULocale.ENGLISH) -// .add(ULocale.FRENCH) -// .add(ULocale.UK) -// .build(), XLocaleMatcherData, threshold); -// logln(matcher.toString()); -// -// assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br"))); -// assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one -// // way -// } - - - /** - * If all the base languages are the same, then each sublocale matches - * itself most closely - */ - @Test - public void testExactMatches() { - String lastBase = ""; - TreeSet sorted = new TreeSet<>(); - for (ULocale loc : ULocale.getAvailableLocales()) { - String language = loc.getLanguage(); - if (!lastBase.equals(language)) { - check(sorted); - sorted.clear(); - lastBase = language; - } - sorted.add(loc); - } - check(sorted); - } - - private void check(Set sorted) { - if (sorted.isEmpty()) { - return; - } - check2(sorted); - ULocale first = sorted.iterator().next(); - ULocale max = ULocale.addLikelySubtags(first); - sorted.add(max); - check2(sorted); - } - - private static final ULocale posix = new ULocale("en_US_POSIX"); - - /** - * @param sorted - */ - private void check2(Set sorted) { - logln("Checking: " + sorted); - XLocaleMatcher matcher = newXLocaleMatcher( - LocalePriorityList.add( - sorted.toArray(new ULocale[sorted.size()])) - .build()); - for (ULocale loc : sorted) { - // The result may not be the exact same locale, but it must be equivalent. - // Variants and extensions are ignored. - if (loc.equals(posix)) { continue; } - ULocale max = ULocale.addLikelySubtags(loc); - ULocale best = matcher.getBestMatch(loc); - ULocale maxBest = ULocale.addLikelySubtags(best); - assertEquals(loc.toString(), max, maxBest); - } - } - - @Test - public void testDemotion() { - LocalePriorityList supported = LocalePriorityList.add("fr, de-CH, it").build(); - LocalePriorityList desired = LocalePriorityList.add("fr-CH, de-CH, it").build(); - XLocaleMatcher noDemotion = XLocaleMatcher.builder(). - setSupportedULocales(supported.getULocales()). - setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.NONE).build(); - assertEquals("no demotion", new ULocale("de-CH"), noDemotion.getBestMatch(desired)); - - XLocaleMatcher regionDemotion = XLocaleMatcher.builder(). - setSupportedULocales(supported.getULocales()). - setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.REGION).build(); - assertEquals("region demotion", ULocale.FRENCH, regionDemotion.getBestMatch(desired)); - } - - private static final class PerfCase { - ULocale desired; - ULocale expectedShort; - ULocale expectedLong; - ULocale expectedVeryLong; - - PerfCase(String des, String expShort, String expLong, String expVeryLong) { - desired = new ULocale(des); - expectedShort = new ULocale(expShort); - expectedLong = new ULocale(expLong); - expectedVeryLong = new ULocale(expVeryLong); - } - } - - private static final int WARM_UP_ITERATIONS = 1000; - private static final int BENCHMARK_ITERATIONS = 20000; - private static final int AVG_PCT_MEDIUM_NEW_OLD = 33; - private static final int AVG_PCT_LONG_NEW_OLD = 80; - - @Test - public void testPerf() { - if (LANGUAGE_MATCHER_DATA == null) { - return; // skip except when testing data - } - - final String shortList = "en, sv"; - final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " + - "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " + - "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " + - "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " + - "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " + - "zh-CN, zh-TW, zu"; - final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, " + - "ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, " + - "ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, " + - "ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, " + - "ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, " + - "bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, " + - "bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, " + - "bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, " + - "ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, " + - "cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, " + - "de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, " + - // removed en_001 to avoid exact match - "ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_150, " + - "en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, " + - "en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, " + - "en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, " + - "en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, " + - "en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, " + - "en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, " + - "en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, " + - "en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, " + - "en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, " + - "en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, " + - "es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, " + - "es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, " + - "et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, " + - "ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, " + - "fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, " + - "fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, " + - "fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, " + - "fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, " + - "fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, " + - "gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, " + - "he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, " + - "id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, " + - "jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, " + - "kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, " + - "kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, " + - "ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, " + - "lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, " + - "lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, " + - "lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, " + - "mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, " + - "ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, " + - "nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, " + - "nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, " + - "nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, " + - "pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, " + - "pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, " + - "rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, " + - "ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, " + - "sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, " + - "shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, " + - "smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, " + - "sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, " + - "sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, " + - "sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, " + - "teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, " + - "tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, " + - "uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, " + - "vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, " + - "wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, " + - "zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, " + - "zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA"; - - final XLocaleMatcher matcherShort = newXLocaleMatcher(shortList); - final XLocaleMatcher matcherLong = newXLocaleMatcher(longList); - final XLocaleMatcher matcherVeryLong = newXLocaleMatcher(veryLongList); - - final LocaleMatcher matcherShortOld = new LocaleMatcher(shortList); - final LocaleMatcher matcherLongOld = new LocaleMatcher(longList); - final LocaleMatcher matcherVeryLongOld = new LocaleMatcher(veryLongList); - - long timeShortNew=0; - long timeMediumNew=0; - long timeLongNew=0; - - long timeShortOld=0; - long timeMediumOld=0; - long timeLongOld=0; - - PerfCase[] pcs = new PerfCase[] { - // Exact match in all matchers. - new PerfCase("sv", "sv", "sv", "sv"), - // Common locale, exact match only in very long list. - new PerfCase("fr_CA", "en", "fr", "fr_CA"), - // Unusual locale, no exact match. - new PerfCase("de_CA", "en", "de", "de"), - // World English maps to several region partitions. - new PerfCase("en_001", "en", "en", "en"), - // Ancient language with interesting subtags. - new PerfCase("egy_Copt_CY", "en", "af", "af") - }; - - for (PerfCase pc : pcs) { - final ULocale desired = pc.desired; - - assertEquals(desired.toString(), pc.expectedShort, matcherShort.getBestMatch(desired)); - assertEquals(desired.toString(), pc.expectedLong, matcherLong.getBestMatch(desired)); - assertEquals(desired.toString(), pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired)); - - timeXLocaleMatcher(desired, matcherShort, WARM_UP_ITERATIONS); - timeXLocaleMatcher(desired, matcherLong, WARM_UP_ITERATIONS); - timeXLocaleMatcher(desired, matcherVeryLong, WARM_UP_ITERATIONS); - long tns = timeXLocaleMatcher(desired, matcherShort, BENCHMARK_ITERATIONS); - System.out.format("New Duration (few supported):\t%s\t%d\tnanos\n", desired, tns); - timeShortNew += tns; - long tnl = timeXLocaleMatcher(desired, matcherLong, BENCHMARK_ITERATIONS); - System.out.format("New Duration (med. supported):\t%s\t%d\tnanos\n", desired, tnl); - timeMediumNew += tnl; - long tnv = timeXLocaleMatcher(desired, matcherVeryLong, BENCHMARK_ITERATIONS); - System.out.format("New Duration (many supported):\t%s\t%d\tnanos\n", desired, tnv); - timeLongNew += tnv; - - timeLocaleMatcher(desired, matcherShortOld, WARM_UP_ITERATIONS); - timeLocaleMatcher(desired, matcherLongOld, WARM_UP_ITERATIONS); - timeLocaleMatcher(desired, matcherVeryLongOld, WARM_UP_ITERATIONS); - long tos = timeLocaleMatcher(desired, matcherShortOld, BENCHMARK_ITERATIONS); - System.out.format("Old Duration (few supported):\t%s\t%d\tnanos new/old=%d%%\n", - desired, tos, (100 * tns) / tos); - timeShortOld += tos; - long tol = timeLocaleMatcher(desired, matcherLongOld, BENCHMARK_ITERATIONS); - System.out.format("Old Duration (med. supported):\t%s\t%d\tnanos new/old=%d%%\n", - desired, tol, (100 * tnl) / tol); - timeMediumOld += tol; - long tov = timeLocaleMatcher(desired, matcherVeryLongOld, BENCHMARK_ITERATIONS); - System.out.format("Old Duration (many supported):\t%s\t%d\tnanos new/old=%d%%\n", - desired, tov, (100 * tnv) / tov); - timeLongOld += tov; - } - - assertTrue( - String.format("timeShortNew=%d < %d%% of timeShortOld=%d", - timeShortNew, AVG_PCT_MEDIUM_NEW_OLD, timeShortOld), - timeShortNew * 100 < timeShortOld * AVG_PCT_MEDIUM_NEW_OLD); - assertTrue( - String.format("timeMediumNew=%d < %d%% of timeMediumOld=%d", - timeMediumNew, AVG_PCT_MEDIUM_NEW_OLD, timeMediumOld), - timeMediumNew * 100 < timeMediumOld * AVG_PCT_MEDIUM_NEW_OLD); - assertTrue( - String.format("timeLongNew=%d < %d%% of timeLongOld=%d", - timeLongNew, AVG_PCT_LONG_NEW_OLD, timeLongOld), - timeLongNew * 100 < timeLongOld * AVG_PCT_LONG_NEW_OLD); - - maximizePerf(); - } - - private static long timeXLocaleMatcher(ULocale desired, XLocaleMatcher matcher, int iterations) { - long start = System.nanoTime(); - for (int i = iterations; i > 0; --i) { - matcher.getBestMatch(desired); - } - long delta = System.nanoTime() - start; - return (delta / iterations); - } - - private static long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) { - long start = System.nanoTime(); - for (int i = iterations; i > 0; --i) { - matcher.getBestMatch(desired); - } - long delta = System.nanoTime() - start; - return (delta / iterations); - } - - private void maximizePerf() { - final String tags = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " + - "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " + - "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " + - "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " + - "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " + - "zh-CN, zh-TW, zu"; - LocalePriorityList list = LocalePriorityList.add(tags).build(); - int few = 1000; - long t = timeMaximize(list, few); // warm up - t = timeMaximize(list, few); // measure for scale - long targetTime = 100000000L; // 10^8 ns = 0.1s - int iterations = (int)((targetTime * few) / t); - t = timeMaximize(list, iterations); - int length = 0; - for (@SuppressWarnings("unused") ULocale locale : list) { ++length; } - System.out.println("maximize: " + (t / iterations / length) + " ns/locale: " + - t + " ns / " + iterations + " iterations / " + length + " locales"); - } - - // returns total ns not per iteration - private static long timeMaximize(Iterable list, int iterations) { - long start = System.nanoTime(); - for (int i = iterations; i > 0; --i) { - for (ULocale locale : list) { - XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale); - } - } - return System.nanoTime() - start; - } - - private static final class TestCase implements Cloneable { - private static final String ENDL = System.getProperties().getProperty("line.separator"); - - int lineNr = 0; - - String nameLine = ""; - String supportedLine = ""; - String defaultLine = ""; - String distanceLine = ""; - String thresholdLine = ""; - String matchLine = ""; - - String supported = ""; - String def = ""; - String favor = ""; - String threshold = ""; - String desired = ""; - String expMatch = ""; - String expDesired = ""; - String expCombined = ""; - - @Override - public TestCase clone() throws CloneNotSupportedException { - return (TestCase) super.clone(); - } - - void reset(String newNameLine) { - nameLine = newNameLine; - supportedLine = ""; - defaultLine = ""; - distanceLine = ""; - thresholdLine = ""; - - supported = ""; - def = ""; - favor = ""; - threshold = ""; - } - - String toInputsKey() { - return supported + '+' + def + '+' + favor + '+' + threshold + '+' + desired; - } - - private static void appendLine(StringBuilder sb, String line) { - if (!line.isEmpty()) { - sb.append(ENDL).append(line); - } - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(nameLine); - appendLine(sb, supportedLine); - appendLine(sb, defaultLine); - appendLine(sb, distanceLine); - appendLine(sb, thresholdLine); - sb.append(ENDL).append("line ").append(lineNr).append(':'); - appendLine(sb, matchLine); - return sb.toString(); - } - } - - private static String getSuffixAfterPrefix(String s, int limit, String prefix) { - if (prefix.length() <= limit && s.startsWith(prefix)) { - return s.substring(prefix.length(), limit); - } else { - return null; - } - } - - // UsedReflectively, not private to avoid unused-warning - static List readTestCases() throws Exception { - List tests = new ArrayList<>(); - Map uniqueTests = new HashMap<>(); - TestCase test = new TestCase(); - String filename = "data/localeMatcherTest.txt"; - try (BufferedReader in = FileUtilities.openFile(XLocaleMatcherTest.class, filename)) { - String line; - while ((line = in.readLine()) != null) { - ++test.lineNr; - // Start of comment, or end of line, minus trailing spaces. - int limit = line.indexOf('#'); - if (limit < 0) { - limit = line.length(); - } - char c; - while (limit > 0 && ((c = line.charAt(limit - 1)) == ' ' || c == '\t')) { - --limit; - } - if (limit == 0) { // empty line - continue; - } - String suffix; - if (line.startsWith("** test: ")) { - test.reset(line); - } else if ((suffix = getSuffixAfterPrefix(line, limit, "@supported=")) != null) { - test.supportedLine = line; - test.supported = suffix; - } else if ((suffix = getSuffixAfterPrefix(line, limit, "@default=")) != null) { - test.defaultLine = line; - test.def = suffix; - } else if ((suffix = getSuffixAfterPrefix(line, limit, "@favor=")) != null) { - test.distanceLine = line; - test.favor = suffix; - } else if ((suffix = getSuffixAfterPrefix(line, limit, "@threshold=")) != null) { - test.thresholdLine = line; - test.threshold = suffix; - } else { - int matchSep = line.indexOf(">>"); - // >> before an inline comment, and followed by more than white space. - if (0 <= matchSep && (matchSep + 2) < limit) { - test.matchLine = line; - test.desired = line.substring(0, matchSep).trim(); - test.expDesired = test.expCombined = ""; - int start = matchSep + 2; - int expLimit = line.indexOf('|', start); - if (expLimit < 0) { - test.expMatch = line.substring(start, limit).trim(); - } else { - test.expMatch = line.substring(start, expLimit).trim(); - start = expLimit + 1; - expLimit = line.indexOf('|', start); - if (expLimit < 0) { - test.expDesired = line.substring(start, limit).trim(); - } else { - test.expDesired = line.substring(start, expLimit).trim(); - test.expCombined = line.substring(expLimit + 1, limit).trim(); - } - } - String inputs = test.toInputsKey(); - Integer prevIndex = uniqueTests.get(inputs); - if (prevIndex == null) { - uniqueTests.put(inputs, tests.size()); - } else { - System.out.println("Locale matcher test case on line " + test.lineNr - + " is a duplicate of line " + tests.get(prevIndex).lineNr); - } - tests.add(test.clone()); - } else { - throw new IllegalArgumentException("test data syntax error on line " - + test.lineNr + "\n" + line); - } - } - } - } - System.out.println("Number of duplicate locale matcher test cases: " + (tests.size() - uniqueTests.size())); - return tests; - } - - private static ULocale getULocaleOrNull(String s) { - if (s.equals("null")) { - return null; - } else { - return new ULocale(s); - } - } - - @Test - @Parameters(method = "readTestCases") - public void dataDriven(TestCase test) { - XLocaleMatcher matcher; - if (test.def.isEmpty() && test.favor.isEmpty() && test.threshold.isEmpty()) { - matcher = new XLocaleMatcher(test.supported); - } else { - XLocaleMatcher.Builder builder = XLocaleMatcher.builder(); - builder.setSupportedLocales(test.supported); - if (!test.def.isEmpty()) { - builder.setDefaultULocale(new ULocale(test.def)); - } - if (!test.favor.isEmpty()) { - FavorSubtag favor; - switch (test.favor) { - case "normal": - favor = FavorSubtag.LANGUAGE; - break; - case "script": - favor = FavorSubtag.SCRIPT; - break; - default: - throw new IllegalArgumentException("unsupported FavorSubtag value " + test.favor); - } - builder.setFavorSubtag(favor); - } - if (!test.threshold.isEmpty()) { - int threshold = Integer.valueOf(test.threshold); - builder.internalSetThresholdDistance(threshold); - } - matcher = builder.build(); - } - - ULocale expMatch = getULocaleOrNull(test.expMatch); - if (test.expDesired.isEmpty() && test.expCombined.isEmpty()) { - ULocale bestSupported = matcher.getBestMatch(test.desired); - assertEquals("bestSupported", expMatch, bestSupported); - } else { - LocalePriorityList desired = LocalePriorityList.add(test.desired).build(); - XLocaleMatcher.Result result = matcher.getBestMatchResult(desired); - assertEquals("bestSupported", expMatch, result.getSupportedULocale()); - if (!test.expDesired.isEmpty()) { - ULocale expDesired = getULocaleOrNull(test.expDesired); - assertEquals("bestDesired", expDesired, result.getDesiredULocale()); - } - if (!test.expCombined.isEmpty()) { - ULocale expCombined = getULocaleOrNull(test.expCombined); - ULocale combined = result.makeServiceULocale(); - assertEquals("combined", expCombined, combined); - } - } - } -} diff --git a/icu4j/tools/misc/.classpath b/icu4j/tools/misc/.classpath index 3ae236d9f9b..858b05b33e3 100644 --- a/icu4j/tools/misc/.classpath +++ b/icu4j/tools/misc/.classpath @@ -3,10 +3,8 @@ - - diff --git a/icu4j/tools/misc/.project b/icu4j/tools/misc/.project index 16a37bc03da..2fbf5974887 100644 --- a/icu4j/tools/misc/.project +++ b/icu4j/tools/misc/.project @@ -3,10 +3,6 @@ icu4j-tools - icu4j-core - icu4j-core-tests - icu4j-shared - icu4j-test-framework diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LikelySubtagsBuilder.java b/icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LikelySubtagsBuilder.java similarity index 97% rename from icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LikelySubtagsBuilder.java rename to icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LikelySubtagsBuilder.java index a6bdbf695be..813d6f8f81b 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LikelySubtagsBuilder.java +++ b/icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LikelySubtagsBuilder.java @@ -1,6 +1,6 @@ // © 2017 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html#License -package com.ibm.icu.impl.locale; +package com.ibm.icu.dev.tool.locale; import java.nio.ByteBuffer; import java.util.Collection; @@ -14,10 +14,11 @@ import java.util.TreeMap; import com.ibm.icu.impl.ICUData; import com.ibm.icu.impl.ICUResourceBundle; import com.ibm.icu.impl.UResource; +import com.ibm.icu.impl.locale.LSR; import com.ibm.icu.impl.locale.XCldrStub.HashMultimap; import com.ibm.icu.impl.locale.XCldrStub.Multimap; import com.ibm.icu.impl.locale.XCldrStub.Multimaps; -import com.ibm.icu.util.BytesTrie; +import com.ibm.icu.impl.locale.XLikelySubtags; import com.ibm.icu.util.BytesTrieBuilder; import com.ibm.icu.util.ICUException; @@ -25,7 +26,7 @@ import com.ibm.icu.util.ICUException; * Builds data for XLikelySubtags. * Reads source data from ICU resource bundles. */ -class LikelySubtagsBuilder { +public class LikelySubtagsBuilder { private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT; private static ICUResourceBundle getSupplementalDataBundle(String name) { @@ -50,7 +51,7 @@ class LikelySubtagsBuilder { UResource.Key key = new UResource.Key(); for (int i = 0; aliases.getKeyAndValue(i, key, value); ++i) { String aliasFrom = key.toString(); - if (aliasFrom.contains("_")) { + if (aliasFrom.contains("_") || aliasFrom.contains("-")) { continue; // only simple aliasing } UResource.Table table = value.getTable(); @@ -113,7 +114,7 @@ class LikelySubtagsBuilder { } } - BytesTrie build() { + byte[] build() { ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL); // Allocate an array with just the necessary capacity, // so that we do not hold on to a larger array for a long time. @@ -122,11 +123,12 @@ class LikelySubtagsBuilder { if (DEBUG_OUTPUT) { System.out.println("likely subtags trie size: " + bytes.length + " bytes"); } - return new BytesTrie(bytes, 0); + return bytes; } } - static XLikelySubtags.Data build() { + // VisibleForTesting + public static XLikelySubtags.Data build() { AliasesBuilder languageAliasesBuilder = new AliasesBuilder("language"); AliasesBuilder regionAliasesBuilder = new AliasesBuilder("territory"); @@ -202,7 +204,7 @@ class LikelySubtagsBuilder { } } } - BytesTrie trie = trieBuilder.build(); + byte[] trie = trieBuilder.build(); LSR[] lsrs = lsrIndexes.keySet().toArray(new LSR[lsrIndexes.size()]); return new XLikelySubtags.Data( languageAliasesBuilder.toCanonical, regionAliasesBuilder.toCanonical, trie, lsrs); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistanceBuilder.java b/icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LocaleDistanceBuilder.java similarity index 87% rename from icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistanceBuilder.java rename to icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LocaleDistanceBuilder.java index 83cbe4a3e74..327f714b221 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistanceBuilder.java +++ b/icu4j/tools/misc/src/com/ibm/icu/dev/tool/locale/LocaleDistanceBuilder.java @@ -1,8 +1,15 @@ // © 2017 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html#License -package com.ibm.icu.impl.locale; - +package com.ibm.icu.dev.tool.locale; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -17,11 +24,13 @@ import java.util.TreeSet; import com.ibm.icu.impl.ICUData; import com.ibm.icu.impl.ICUResourceBundle; import com.ibm.icu.impl.UResource; +import com.ibm.icu.impl.locale.LSR; +import com.ibm.icu.impl.locale.LocaleDistance; import com.ibm.icu.impl.locale.XCldrStub.Multimap; import com.ibm.icu.impl.locale.XCldrStub.Predicate; import com.ibm.icu.impl.locale.XCldrStub.Splitter; import com.ibm.icu.impl.locale.XCldrStub.TreeMultimap; -import com.ibm.icu.util.BytesTrie; +import com.ibm.icu.impl.locale.XLikelySubtags; import com.ibm.icu.util.BytesTrieBuilder; import com.ibm.icu.util.Output; import com.ibm.icu.util.ULocale; @@ -153,7 +162,7 @@ public final class LocaleDistanceBuilder { } } - BytesTrie build() { + byte[] build() { ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL); // Allocate an array with just the necessary capacity, // so that we do not hold on to a larger array for a long time. @@ -162,7 +171,7 @@ public final class LocaleDistanceBuilder { if (DEBUG_OUTPUT) { System.out.println("distance trie size: " + bytes.length + " bytes"); } - return new BytesTrie(bytes, 0); + return bytes; } } @@ -468,7 +477,8 @@ public final class LocaleDistanceBuilder { return result; } - static LocaleDistance build() { + // VisibleForTesting + public static LocaleDistance.Data build() { // From CLDR supplementalData/languageMatching/languageMatches type="written_new"/ // and then paradigmLocales, matchVariable, and the last languageMatch items. ICUResourceBundle supplementalData = getSupplementalDataBundle("supplementalData"); @@ -591,8 +601,8 @@ public final class LocaleDistanceBuilder { TrieBuilder trieBuilder = new TrieBuilder(); defaultDistanceTable.toTrie(trieBuilder); - BytesTrie trie = trieBuilder.build(); - return new LocaleDistance( + byte[] trie = trieBuilder.build(); + return new LocaleDistance.Data( trie, rmb.regionToPartitionsIndex, rmb.partitionArrays, paradigmLSRs, distances); } @@ -845,4 +855,112 @@ public final class LocaleDistanceBuilder { } } } + + private static final String TXT_PATH = "/tmp"; + private static final String TXT_FILE_BASE_NAME = "langInfo"; + private static final String TXT_FILE_NAME = TXT_FILE_BASE_NAME + ".txt"; + + private static PrintWriter openWriter() throws IOException { + File file = new File(TXT_PATH, TXT_FILE_NAME); + return new PrintWriter( + new BufferedWriter( + new OutputStreamWriter( + new FileOutputStream(file), StandardCharsets.UTF_8), 4096)); + } + + private static void printManyHexBytes(PrintWriter out, byte[] bytes) { + for (int i = 0;; ++i) { + if (i == bytes.length) { + out.println(); + break; + } + if (i != 0 && (i & 0xf) == 0) { + out.println(); + } + out.format("%02x", bytes[i] & 0xff); + } + } + + public static final void main(String[] args) throws IOException { + XLikelySubtags.Data likelyData = LikelySubtagsBuilder.build(); + LocaleDistance.Data distanceData = build(); + System.out.println("Writing LocaleDistance.Data to " + TXT_PATH + '/' + TXT_FILE_NAME); + try (PrintWriter out = openWriter()) { + out.println("// © 2019 and later: Unicode, Inc. and others.\n" + + "// License & terms of use: http://www.unicode.org/copyright.html#License\n" + + "// Generated by ICU4J LocaleDistanceBuilder.\n" + + TXT_FILE_BASE_NAME + ":table(nofallback){"); + out.println(" likely{"); + out.println(" languageAliases{ // " + likelyData.languageAliases.size()); + for (Map.Entry entry : + new TreeMap<>(likelyData.languageAliases).entrySet()) { + out.println(" \"" + entry.getKey() + "\",\"" + entry.getValue() + "\","); + } + out.println(" } // languageAliases"); + + out.println(" regionAliases{ // " + likelyData.regionAliases.size()); + for (Map.Entry entry : + new TreeMap<>(likelyData.regionAliases).entrySet()) { + out.println(" \"" + entry.getKey() + "\",\"" + entry.getValue() + "\","); + } + out.println(" } // regionAliases"); + + out.println(" trie:bin{ // BytesTrie: " + likelyData.trie.length + " bytes"); + printManyHexBytes(out, likelyData.trie); + out.println(" } // trie"); + + out.println(" lsrs{ // " + likelyData.lsrs.length); + for (LSR lsr : likelyData.lsrs) { + out.println(" \"" + lsr.language + "\",\"" + + lsr.script + "\",\"" + lsr.region + "\","); + } + out.println(" } // lsrs"); + out.println(" } // likely"); + + out.println(" match{"); + out.println(" trie:bin{ // BytesTrie: " + distanceData.trie.length + " bytes"); + printManyHexBytes(out, distanceData.trie); + out.println(" } // trie"); + + out.println(" regionToPartitions:bin{ // " + + distanceData.regionToPartitionsIndex.length + " bytes"); + printManyHexBytes(out, distanceData.regionToPartitionsIndex); + out.println(" } // regionToPartitions"); + + out.print(" partitions{"); + boolean first = true; + for (String p : distanceData.partitionArrays) { + if (first) { + first = false; + } else { + out.append(','); + } + out.append('"').print(p); + out.append('"'); + } + out.println("}"); + + out.println(" paradigms{"); + for (LSR lsr : distanceData.paradigmLSRs) { + out.println(" \"" + lsr.language + "\",\"" + + lsr.script + "\",\"" + lsr.region + "\","); + } + out.println(" }"); + + out.print(" distances:intvector{"); + first = true; + for (int d : distanceData.distances) { + if (first) { + first = false; + } else { + out.append(','); + } + out.print(d); + } + out.println("}"); + + out.println(" } // match"); + out.println("}"); + } + } }