| Confusables | `"confusables"` | unidata/confusables\*.txt | 45 KiB |
| Currencies | `"misc"` <br/> `"curr_supplemental"` <br/> `"curr_tree"` | misc/currencyNumericCodes.txt <br/> curr/supplementalData.txt <br/> curr/\*.txt | 3.1 KiB <br/> 27 KiB <br/> **2.5 MiB** |
| Language Display <br/> Names | `"lang_tree"` | lang/\*.txt | **2.1 MiB** |
-| Language Tags | `"misc"` | misc/keyTypeData.txt <br/> misc/likelySubtags.txt <br/> misc/metadata.txt | 6.8 KiB <br/> 53 KiB <br/> 33 KiB |
+| Language Tags | `"misc"` | misc/keyTypeData.txt <br/> misc/langInfo.txt <br/> misc/likelySubtags.txt <br/> misc/metadata.txt | 6.8 KiB <br/> 37 KiB <br/> 53 KiB <br/> 33 KiB |
| Normalization | `"normalization"` | in/\*.nrm except in/nfc.nrm | 160 KiB |
| Plural Rules | `"misc"` | misc/pluralRanges.txt <br/> misc/plurals.txt | 3.3 KiB <br/> 33 KiB |
| Region Display <br/> Names | `"region_tree"` | region/\*.txt | **1.1 MiB** |
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+// Generated by ICU4J LocaleDistanceBuilder.
+langInfo:table(nofallback){
+ likely{
+ languageAliases{ // 164
+ "aam","aas",
+ "adp","dz",
+ "aju","jrb",
+ "alb","sq",
+ "als","sq",
+ "arb","ar",
+ "arm","hy",
+ "aue","ktz",
+ "ayr","ay",
+ "ayx","nun",
+ "azj","az",
+ "baq","eu",
+ "bcc","bal",
+ "bcl","bik",
+ "bgm","bcg",
+ "bh","bho",
+ "bjd","drl",
+ "bur","my",
+ "bxk","luy",
+ "bxr","bua",
+ "ccq","rki",
+ "chi","zh",
+ "cjr","mom",
+ "cka","cmr",
+ "cld","syr",
+ "cmk","xch",
+ "cmn","zh",
+ "coy","pij",
+ "cqu","quh",
+ "cwd","cr",
+ "cze","cs",
+ "dgo","doi",
+ "dhd","mwr",
+ "dik","din",
+ "diq","zza",
+ "drh","mn",
+ "dut","nl",
+ "ekk","et",
+ "emk","man",
+ "esk","ik",
+ "fat","ak",
+ "fre","fr",
+ "fuc","ff",
+ "gav","dev",
+ "gaz","om",
+ "gbo","grb",
+ "geo","ka",
+ "ger","de",
+ "gfx","vaj",
+ "ggn","gvr",
+ "gno","gon",
+ "gre","el",
+ "gti","nyc",
+ "gug","gn",
+ "guv","duz",
+ "gya","gba",
+ "hdn","hai",
+ "hea","hmn",
+ "him","srx",
+ "hrr","jal",
+ "ibi","opa",
+ "ice","is",
+ "ike","iu",
+ "ilw","gal",
+ "in","id",
+ "iw","he",
+ "jeg","oyb",
+ "ji","yi",
+ "jw","jv",
+ "kgc","tdf",
+ "kgh","kml",
+ "khk","mn",
+ "kmr","ku",
+ "knc","kr",
+ "kng","kg",
+ "knn","kok",
+ "koj","kwv",
+ "kpv","kv",
+ "krm","bmf",
+ "ktr","dtp",
+ "kvs","gdj",
+ "kwq","yam",
+ "kxe","tvd",
+ "kzj","dtp",
+ "kzt","dtp",
+ "lbk","bnc",
+ "lii","raq",
+ "lmm","rmx",
+ "lvs","lv",
+ "mac","mk",
+ "mao","mi",
+ "may","ms",
+ "meg","cir",
+ "mhr","chm",
+ "mnk","man",
+ "mo","ro",
+ "mst","mry",
+ "mup","raj",
+ "mwj","vaj",
+ "myt","mry",
+ "nad","xny",
+ "ncp","kdz",
+ "nnx","ngv",
+ "no","nb",
+ "npi","ne",
+ "nts","pij",
+ "ojg","oj",
+ "ory","or",
+ "oun","vaj",
+ "pbu","ps",
+ "pcr","adx",
+ "per","fa",
+ "pes","fa",
+ "plt","mg",
+ "pmc","huw",
+ "pmu","phr",
+ "pnb","lah",
+ "ppa","bfy",
+ "ppr","lcq",
+ "pry","prt",
+ "puz","pub",
+ "quz","qu",
+ "rmy","rom",
+ "rum","ro",
+ "sca","hle",
+ "scc","sr",
+ "scr","hr",
+ "skk","oyb",
+ "slo","sk",
+ "spy","kln",
+ "src","sc",
+ "swh","sw",
+ "tdu","dtp",
+ "thc","tpo",
+ "thx","oyb",
+ "tib","bo",
+ "tie","ras",
+ "tkk","twm",
+ "tl","fil",
+ "tlw","weo",
+ "tmp","tyj",
+ "tne","kak",
+ "tsf","taj",
+ "ttq","tmh",
+ "tw","ak",
+ "umu","del",
+ "uok","ema",
+ "uzn","uz",
+ "wel","cy",
+ "xba","cax",
+ "xia","acn",
+ "xkh","waw",
+ "xpe","kpe",
+ "xsj","suj",
+ "xsl","den",
+ "ybd","rki",
+ "ydd","yi",
+ "yma","lrr",
+ "ymt","mtm",
+ "yos","zom",
+ "yuu","yug",
+ "zai","zap",
+ "zsm","ms",
+ "zyb","za",
+ } // languageAliases
+ regionAliases{ // 38
+ "062","034",
+ "172","RU",
+ "200","CZ",
+ "230","ET",
+ "280","DE",
+ "532","CW",
+ "582","FM",
+ "736","SD",
+ "830","JE",
+ "886","YE",
+ "890","RS",
+ "AN","CW",
+ "BU","MM",
+ "CS","RS",
+ "CT","KI",
+ "DD","DE",
+ "DY","BJ",
+ "FQ","AQ",
+ "FX","FR",
+ "HV","BF",
+ "JT","UM",
+ "MI","UM",
+ "NH","VU",
+ "NQ","AQ",
+ "NT","SA",
+ "PC","FM",
+ "PU","UM",
+ "PZ","PA",
+ "QU","EU",
+ "RH","ZW",
+ "SU","RU",
+ "TP","TL",
+ "UK","GB",
+ "VD","VN",
+ "WK","UM",
+ "YD","YE",
+ "YU","RS",
+ "ZR","CD",
+ } // regionAliases
+ trie:bin{ // BytesTrie: 9782 bytes
+001a6dcc0b74c4e677c26077a2b378a4
+4e79a4bb7a0e6d7f7a5b7a30e1ad8ce8
+2ef5af2f10e1a537022a3c42cae54811
+616e01e2a3fcf4a3f70b4d24540e54ca
+8c552a5610cea3fa10d3a3f94dca9550
+2a5310d2a3f502c1a3f4c6a3f2c8a3f3
+47d4a2473248d81dfb4910c4a3ef01c2
+a3f8c6a3ed6d326e36733a7910e2ad8c
+10e9af2d10e5af2e10ed22022aa90643
+b4fd4910c4a908671767326836693a6c
+10edaf2c10e8adb910f8ada910e1af2b
+612e64326510e1af2a10e7af2810eaaf
+2900126d46741b743475387742e1addc
+efadf310edadf801f5adf9f6adfa10e1
+adfb6d3c6e406f50725a7301e7ad79eb
+adf710efadef02e3adf0e9adf1f5adf2
+01e2adf4f3adf510f3adf66726673668
+3a693e6a4e6c10f3adee10e9ade710e7
+ade802e2ade9f5adeaf6adeb01e1adec
+e9aded6138625463646501eca51ef2ad
+e604e5adddeaaddeecaddfeeade0f2ad
+e102f0ade2f1ade3f2ade410e9ade50c
+6e3d7221722e7332774ee8adff10e2af
+0504e1adb1e9af06eca52dedaf07f2af
+0810e5af096e346f3e7001e5a78af2ad
+ae01e1ada7f2af0201e7af03eeaf0465
+1d653a6c3e6d02e6af01eead9df2ada0
+10f3adfe02e1af00e3ad99e4ad9a612e
+62326310f2ad6910f6adfc10e9adfd0d
+6d49733073307534e9a731efaf1c10f3
+af2104e1af22e532eaaf25f4af26f7af
+27012a2e4812616ef3af24012aaf2343
+10ceaf246d3a6f3e7202e2af1ee5af1f
+ecaf2010ecaf1b10eeaf1d6520653e67
+426b4c6c02e5af18e7af19ecaf1a10f2
+af1401f2af15f7af1610efaf17612e62
+606410e4a73107f40cf4af0df6af0ef9
+af0ffaaf10eda7b2efaf0af0af0bf3af
+0c02e1af11e2af12f9af1374a25275a4
+d0760a6f1fe509e5adcfe9ad57efadd7
+6f3472387501eeaddaf4addb10f4add8
+10efadd9613c654c69566c606d01e6ad
+d5f7add602e7adcde9adbeeeadce01e3
+add0f0add101e3add2f6add310f3add4
+00267576e91ff20ef2abbdf3abc1f4ab
+c7f767f9abd7e9aba2ebaba9ecabadee
+abb3efabb57a247a36e1ab82e5ab93e7
+2ee8ab9e10edabda012a2e41127261e2
+ab9a012aab995010cbab9a753e765477
+6478ccca7901e1abd8f6abd903e8abce
+ecabcfedabd0f1abd102e4a7b5ecabd2
+f5abd301e8abd4f1abd56b7d7049705e
+716e727273827406f10cf1abb1f2abcb
+f3abccf4abcde4abc8e5abc9eaabca02
+e9abb9edabbafaabbb10efabbc02f5ab
+bef6abbff7abc004e4abc2e6abc3e7ab
+c4eaabc5f7abc66b426c526d626e6c6f
+02e6abb6e7abb7f1abb802ecabaaf2ab
+abf4abac02e6abaef8abaff9abb001e8
+abb1f9abb210e8abb46546655e667467
+7868886906eb0cebaba5edaba6efaba7
+f6aba8e2a3b0e6aba3e7aba403e4ab94
+edab95efab96f4ab9710e9ab9802e3ab
+9befab9cf5ab9d02ecab9ff1aba0f2ab
+a16144625a63866403e4ab90e7ab91e8
+ab92f5a54303eaab83ecab84eeab85f1
+ab8606e70ce7ab8aefab8bf7ab8cfaab
+8de3ab87e4ab88e6ab8901e9ab8ef9ab
+8f0f735fe72ce742ebabe1f2ad3ffa01
+2a8641127261e2adcb012a2e43127972
+ecabde022aabdd4b2a4d10ceabdf10da
+abde7360746476687a10ee012a2e4112
+7261e2adcb022aabec412a4310ceadcc
+10c6adcb10e1adc710f2adc801e8adc9
+ecadca6d326d3e6e426f707202e9adc4
+f4adc5f7adc610e2abe301f226f8adc3
+012a2e44126576e1adc2012aadc14e10
+d0adc210eba55a62326436673a6c10e9
+abe210f5abdb10edabdc10e1abe070c3
+9570a4f971a6d772a6e47300287884ec
+39f229f509f5ab71f6ab77f7ab78f22c
+f3ab6af4ab6e22032aab244da4bf522a
+5410d2ab2801cfab27d5ab25ecab4ced
+ab51eeab58efab5ef16de429e438e5ab
+34e7ab39e9ab42ebab48032aab2e4432
+4b3a5312696ee4ab31126576e1ab2f12
+686feaab307834793e7a48e1ab18e3ab
+2301eeab7ef7ab7f01ecab80f2a50a10
+ecab816cad724f725873747484758e77
+05e809e8ab78f0ab7cf6ab7de2ab79e3
+ab7ae7ab7b04e2ab67e3ab23eeab68f2
+ab69f8a5ef02e4ab6be7ab6cf9ab6d01
+ebab6ff1ab7004e1ab72e5ab73ebab74
+f2ab75f3ab766c4a6d666e8c6fa24370
+03e4ab64ecab65f3ab66f9a77204e4ab
+4de9ab4eecab4fefab48f9ab5005f009
+f0ab55f1ab56f3ab57e1ab52eaab53ee
+ab5404e3ab59ebab5af0ab5bf8ab5cf9
+ab5d04e7ab5febab60f1ab61f5ab62f9
+ab63673e67426858696e6a846b02e3ab
+49f2ab4af3ab4b03e1ab3af3ab3bf7ab
+3cfaab3d03e9ab3eebab3feeab40f5ab
+4103e4ab43e7ab44ecab45edab4610f2
+ab47614a6276638664a26b6503e6ab35
+e8ab36e9ab37f3ab3806f30cf3ab1cf4
+ab1df6ab1efaab1fe6ab19e8ab1af1ab
+1b02e1ab20e5ab21f0ab2206ee0ceeab
+2befab2cf2a5fdf3ab2de32cebab29ec
+ab2a22055211522e53325410d2ab2810
+cfab2710d5ab252aab24312a4d10c5ab
+261137b2ab2501e3ab32e8ab3300166e
+63752bec09eca9e4f3a9d5f4a9f0754c
+7750e1012a2e41127261e2a9cd012aa9
+cc5010cba9cd10f5a9f210e1a9f37217
+722e733e7410f0a9f102e1a9ece4a9ed
+e7a9ee10f3a9ef6e346f4a7001e1a386
+efa9eb03e2a7c4e7a9e7eea9e8f4a9e9
+10eea9ea66306b176b2e6c386d10f3a9
+e601e1a9e2efa9e301e1a9e5f4a92766
+3468386901eca9e0f0a9e110eca9dd01
+eca9deeea9df614e627e638864926504
+e4a9daefa9dbf2a56ef3a56ef8a9dc04
+e7a9ceec32eda9d1f0a9d2f5a9d3012a
+a9cf5012686cf0a9d001e9a9d4f5a9d5
+01e4a9d6eda9d701e3a9d8f4a9d90175
+26f5a9f402e3a9f5e7a9f6faa9f40014
+6f457917ef09efa944f5ab11f7ab1479
+2cedab04eeab0910f5ab176f3c724c74
+5075547701ebab15efab1602e2ab0ce6
+ab0defab0e10efab0f10edab1002e5ab
+12e7ab13eda9446934693c6a566b5a6d
+5e6e01e1ab0ae7ab0b01e1a9ffe62201
+2aab004e10ccab0110f3ab0210f4ab03
+03e6ab05efab06f4ab07f5ab08613663
+46654a675a6810e7a9fe02e9a9f7eaa9
+59efa9f810e6a9f902eaa9faeca9fbf3
+a9fc10eea9fd6da2576ea6446f0b741d
+ed09eda59ff2a9c4f3a9c7742a7a34e3
+a9bc01e1a9c9eba9ca10eda9cb701770
+2e72327310e1a9c810eda9c302efa9c5
+f5a9c6f9a9c4673a6b3e6e02e7a9c0ee
+a9c1f3a9c210e3a9bd01f2a9bef6a9bf
+0022749fe926ef0fefa944f2a94ff3a4
+92f4a953f9a3cde9a903eba7fdeca939
+ee012abaab4d126f6ee7a53e783d7834
+793e7a64e7a927e8a92d01e3a962eda9
+6305f709f7a967f8a968faa969eba964
+eda965f6a96605f009f0a96df7a96efa
+a96feba96aeda96beea96c7444755a76
+707703eba95ff2a535f6a960f7a96103
+e3a954e6a955e9a956f2a95703e1a958
+f0a959f2a95af3a95b02e1a95ceea95d
+f9a95e69856e446e426f5e707a719072
+02e4a950eaa951efa95204e1a940e6a9
+41e9a942ebb603f7a94304e1a945e5a9
+46e8a947f3a948f8a94903f0a94af3a9
+4bf4a94cf8a94d10eca94e693e6b686c
+7e6d02efa93df5a93ef8a93f03e6a930
+eea931f326f7a934012aa9324d126564
+e6a93303e9a935eca936f0a937f7a938
+02e5a93af0a93bf3a93c6549653e666a
+67866802e9a92eeca92ff2a3fe06ee0c
+eea91ef2a91ff4a920f5a921e4a91be5
+a91ceba91d04e1a922e5a923eea924ef
+a925f1a92604e8a928eca929efa92af0
+a92bf9a92c615e62a25663a2636406e8
+0ce8a917eaa918f2a919f8a91ae1a914
+e5a915e6a9160bee23f718f7a905f926
+faa90922022aa906432a4910c4a90810
+c3a907eeb414efa903f3a904e709e7a9
+00e9a901eba902e3a7fde4a7fee6a7ff
+04e8a90aefa90bf1a90cf5a90df7a90e
+04e9a90ff0a910f1a911f2a912f5a913
+0021745fe41dee0feea99aefa9a0f2a9
+a7f6a9b4f9a9b8e4a97fe5a982e7a987
+eca54a791579307a3ae1a970e2a97801
+eda9b9eea9ba10e9a9bb74387542775e
+7801f1a9b6f2a9b701eda9adf2a9ae04
+e9a9aff0a9b0f3a9b1f6a9b2f8a9b310
+e2a9b56a5d6f326f4870647168726c73
+03eba9a9eea9aaefa9abf3a9ac04e4a9
+a1e5a9a2eea9a3f0a9a4f5a9a510e9a9
+8210efa9a610e2a9a86a4a6b4e6d586e
+04e6a99be8a99ceba99deda99ef0a99f
+10efa99501e7a996efa99701e7a998fa
+a9996637665a675e686e6906ee0ceea9
+91f5a992f9a993faa994e6a98ee9a98f
+eaa99010f2a98602e1a988e2a989eca9
+8a02e2a98be5a98cf7a98d613e636a64
+906502e2a983f7a984f8a98506ee0cee
+a974f0a975f1a976f3a977e3a971e6a9
+72eba97305e809e8a97cefa97df5a97e
+e1a979e5a97ae6a97b01e3a980f3a981
+66c73c69c48269a4166aa4c46ba60e6c
+001b744ae717ef0cefa7ebf4a7f3f5a7
+f5f6a7f9e7a7d5e9a7d7eea7e8771577
+307a34e1a7c0e2a7c710eca7fa01e8a7
+fbfaa7fc742e75327610f3a7f910e7a7
+f403e1a7f6efa7f7f9a3d4faa7f86a4a
+6d316d326e426f4c7210e3a7f202eea7
+e5efa7e6f0a7e701f3a7e9f5a7ea05f2
+09f2a7eff3a7f0faa7f1eaa7eceba7ed
+eca7ee6a346b386c01e5a7e3eea7e410
+f0a7e001e9a7e1f4a7e26448646c6570
+679c6906e70ce7a7dce8a7ddeaa7def3
+a7dfe1a7d8e4a7d9e6012aa7da4c1269
+6de2a7db10e2a7cd06f00cf0a7d1f1a7
+d2f5a7d3faa7d4e4a7cee5a7cfeda7d0
+10e7a7d66134625a6301eda7cbf0a7cc
+05e809e8a7c4eaa7c5f3a7c6e2a7c1e4
+a7c2e7a7c302e5a7c8f5a7c9f7a7ca00
+197739eb17f30cf3a70bf4a723f5a717
+f7a724eba566eea71eefa720e409e4a7
+0de7a712e9a715772a7a34e1a70401ed
+a725f3a72601e8a727e9a7286a366d17
+6d326e366f3a7210e9a72210efa71d10
+e8a71f10f5a7216a2e6b326c10efa71c
+10eaa71604e5a717eba718f4a719f7a7
+1af8a71b641a643466446701e2a713e5
+a71402e4a70ee9a70ff5a71010e5a711
+613a62446302e1a70ae5a70be8a70c01
+eea705f2a70602e1a707e2a708f9a709
+0b721ae909e9a731f6a737f7a738722a
+752ee1a72910e1a73510f4a736671767
+34693e6d01e3a733eca73401eba72fef
+a73010e2a732612e62386510eea72e01
+e2a72aeda72b01efa72cf5a72d002875
+c0e4eb71f23cf628f6a78ef7a7aff902
+2a32414c4c126174eea7bc022aa7ba43
+2a5410d2a7bc10cea7bb127261e2a7bb
+f2a781f3a79af5012aa45941127261e2
+a77ceb38eca771eda776eea780efa785
+012a2e41127261e2a76c042aa76a4132
+4336493a4d10cea76e10c6a76b10cea7
+6c10d2a76d7a187a38e1a5aee7a755e9
+a761eaa76502eaa543f2a7bff4a54375
+3c766e777e788e7901e5a7bdf8a7be07
+ed0ceda7a8eea7a9f0a7aaf3a7abe2a7
+a4e4a7a5e5a7a6eaa7a702e7a7acf2a7
+adf8a7ae02eaa7b0efa7b1f1a7b206ed
+0ceda7b6f0a7b7f7a7b8faa7b9e1a7b3
+e3a7b4e5a7b56bc0cd7063704a717072
+8673a2457403e2a7a1eda7a2efa7a3f2
+a54305f209f2a78df6a78ef8a78fe5a7
+8ae6a78befa78c03e2a790e6a791f3a7
+92f9a79305ec09eca797f3a798f5a799
+e3a794e9a795eaa79605e809e8a79eea
+a79ff2a7a0e2a79be4a79ce6a79d6b50
+6c5a6d706ea24f6f04e9a786eba783ec
+a787f3a788faa78901e3a76feaa77003
+eea772f1a773f4a774f8a77506f21ef2
+32f3a77df5a77ef7a77f012a2e411272
+61e2a77c012aa77a4c10c2a77be2a777
+e8a778efa77904e3a781e6a782e7a755
+eea783f0a784665266486758686869a0
+6a03e4a766e7a767f3a768f9a76902ef
+a752f2a753f9a75402e5a756e6a757f0
+a75808f10ff1a75cf3a75df4a75ef7a7
+5ffaa760e1a759e2a75aebac79eea75b
+02eaa762f5a763f7a76461446276639c
+64a2486502e1a74feea750faa75107e9
+0ce9a73deaa73eeda73fefa740e1a739
+e2a73ae3a73be4a73c05f109f1a744f8
+a745f9a746e4a741eda742f0a74303e7
+a747eba748eca749f4a74a03e5a74be8
+a74ceca74df4a74e66a2b567a46a6800
+137539f20ff2a5fdf4a700f5a701f9a3
+41faa7037536e13ae5a5e7e9a5e9efa5
+f910e9a70222032aa5dd372e43345310
+c4a5de1133b6a5de10cda5df6c386c3c
+6d466e506f6c7301e2a5feeea5ff01e1
+a5f0f5a5f101e4a5f2f4a5f304e4a5f4
+e5a5f5eaa5f6eea5f7efa5f802e3a5fa
+eaa5fbf4a5fc615862746478687c6905
+e809e8a5edeca5eeeda5efe1a5eae6a5
+ebe7a5ec04e7a5e0eba5e1eda5e2f7a5
+e3faa5e410e2a5e510f9a5e610f9a5e8
+00117553e90fe9a578eaa57cefa57ff2
+a585f9a59475427694e1a56ee6012aa5
+744112646ceda57509e80fe8a58ef1a5
+8ff2a590f6a591f9a592e2a58ae332e4
+a58be5a58ce6a58d012aa5744112646c
+eda57510f2a5936d2f6d4e6f52706271
+667204e3a586e5a585f0a587f2a588f3
+a58910f0a57e02e4a580eea581f2a582
+10e5a58310f3a5846132665669606c10
+f2a57d05e908e9a572eea573f467e1a5
+6fe2a570e7a57101e9a576eda57702e1
+a579eca57af4a57b001a6e9f791dec0c
+eca5baeea5bef5a5cef6a5d6792ce1a5
+95e4a5a701e1a5a0e9a5dc733a733e75
+4276747702e3a5d9e9a5daf4a5db10f7
+a5cd07f20cf2a5d2f7a5d3f8a5d4faa5
+d5e2a5cfe3a5d0e4a5d1e7a5be02e6a5
+d7f2a5b1f3a5d86e5a6f6a7204e2a5a3
+e332e5a559f4a5cbf7a5cc012aa5c94c
+12696ee2a5ca02e4a5bfe7a5c0efa5c1
+07ee0ceea5c1f2a5c6f3a5c7f4a5c8e4
+a5c2e6a5c3e9a5c4eda5c567396a236a
+386b486c526d01eda5bcf6a5bd02eba5
+b5eea5b6f5a5b701eea5b8f0a5b910eb
+a5bb673468386901eca5b3eda5b410ee
+a5b110f3a5b26428642e653e6610eba5
+b002e5a5a8eea5a9f2a5aa05ef09efa5
+aef2a52bfaa5afe2a5abeaa5aceca5ad
+612e626c6310f2a5a609ed0feda59bee
+a59cf7a59df9a59efaa59fe1a596e6a5
+97e7a598e8a599eaa59a05ef09efa5a3
+f9a5a4faa5a5e1a5a0e6a5a1eda5a263
+c2e863a29d64a4f2650011772dee19ee
+38efa562f3a564f4a557f5a364012aa5
+5e53126861f7a55f77307834e5a552ec
+a55910efa56c10f4a56d6d466d486e7a
+728473887403f2a568f4a569f5a56af8
+a56b02e1a55ae9a55beb012a2e4e126b
+6fefa55d012aa55c4710cea55d01eea5
+60f1a56110e9a56302e7a565eba566f5
+a567623e664267466b02e1a556eba557
+f9a55810f5a55110e9a55301eca554f9
+a555001a725ce821f316f3a517f52cf6
+a51df9a51e012aa51b47126c61e7a51c
+e8a3e9efa50df2a510771277307a34e1
+a3e0e5a3e510e4a51010e5a517722e73
+547410e4a51a05ec09eca514eda515f3
+a516e8a511eaa512eba51301e2a518f7
+a51968c0ce6c306c326d3c6f6c7010f3
+a50f01e1a509e4a50a02e5a50be7a50c
+ee022aa24a42a29b4811616e01e2a3fc
+f4a3f710f0a50e68426aa28d6b03e2a5
+05eca506efa507f9a50805ef09efa3ff
+f0a500f2a501e92ceba3fdeda3fe022a
+3c42a25f4811616e01e2a3fcf4a3f70b
+4d335417542e55385610c4a3fa01c8a3
+f6d7a3f701cba3f8d3a3f94d2e503853
+10d2a3f501cfa3f0d9a3f102c6a3f2c8
+a3f3daa3f4470e473048b4684910c4a3
+ef10c6a3ed2aa3ea412a4210cea3ec10
+d5a3eb126f70efa3fb02e1a502eda503
+f6a5046511652e66326710e7a3e810e2
+a3e610e1a3e7613462386301e8a3e3f0
+a3e410eea3e110eaa3e20016726f791c
+e508e5a52bf6a54bfa45792a7a34e1a5
+1f01efa54ef5a54f10e7a550751d752e
+764a7710f7a54d04e1a546e3a547e4a5
+48e7a549f4a54a10e1a54c7240737474
+03eda542f0a543f3a544f9a54502e82c
+e9a53ff3a540012a2e4d126f6ee7a53e
+012aa53d4310cea53e10e2a54167446a
+1a6a406e446f03e2a53ae9a532f0a53b
+f7a53c10e5a53810eaa5396734686069
+01e1a536f1a53706ec0ceca531efa532
+f2a533faa534e1a52ee8a52fe9a53010
+e4a535613c6268637264766501e4a52c
+eea52d06e80ce8a523eba524f2a525f6
+a526e4a520e6a521e7a52201e4a527f1
+a52810e3a52910eea52a2aa4fa61be83
+62002373a5e51ded0feda3a7eea3acef
+a3b0f2a3b9f3a3bee5a37ae7a387e8a3
+8be9a39277417732783c794c7a72e1a3
+6001e4a3d1f2a3d202e8a3d3eba3d4f2
+a3c605f309f3a3d8f6a3d9f8a3dae5a3
+d5eea3d6f2a3d704e1a3dbe5a3dce6a3
+dde8a3def7a3df7332744875587610e2
+a3d003eaa3bff1a3c0f3a3c1f4a3c202
+efa3c3f4a3c4f6a3c509ed0feda3cbef
+a3ccf2a3cdf3a3cef5a3cfe1a3c6e3a3
+c7e4a3c8e7a3c9eba3ca6a7d6e3b6e48
+6f587068716c7203e1a3bae8a3bbf8a3
+bcfaa3bd02e7a3adeda3aef0a3af02ea
+a3b1eda3b2eea3b310f9a3b403e3a3b5
+e9a3b6f0a3b7f6a3b86a466b786ca4a6
+6d03e8a3a8eba3a9f1a3aaf5a3ab07ef
+0cefa39df2a39ef4a39ffaa3a0e8a399
+e9a39aeaa39beea39c04e3a3a1eda3a2
+f1a3a3f5a3a4f6a3a5656d6562669467
+a24468a24b6906ed0ceda395eea396ef
+a397f1a398e2a393e7a394eba37307f4
+0cf4a37ff7a380f8a381faa382e6a37b
+e8a37ceaa37deda37e03e4a383f1a384
+f4a385f9a38602e3a388eea389f8a38a
+06eb0ceba38feca390efa38bf9a391e2
+a38ce7a38de9a38e6132626463906410
+e4a37907f20cf2a365f3a366f6a367f8
+a368eca361eea362f0a363f1a36406e4
+0ce4a36ceaa36df0a36ef2a36fe1a369
+e2a36ae3a36b09ed0feda374eea375ef
+a376f1a377f5a378e3a361e6a370e8a3
+71e9a372eca37300184dc25d54c0c257
+2e5736584e59645a12616ee2adc00161
+2c631168efa99f1172e1adbf01702a73
+1175f8691165efa9db126969e9a71554
+3455a28756126169e9adbe056746674a
+68506901622a7210e8adbd10f4012aa3
+b04210d445116ce7adba106101e158e9
+042aab9e43324b364c3a5410c8ab9e10
+cea7cc10c8adbb10c1adbc012aa54b4d
+10d6a54b6130657c66116ee7adb9056d
+116d2e6e327610f4a3a610ecab8210e7
+abd667346b386c01e5ab90f5a75a10e2
+adb710f2adb8116cf5ab93126761f2ab
+e050c0c350a27252a2a8530669436942
+6f6a759079016c2a7210e3a50a10efad
+b601643e6e01e4ab31e8012aab424c10
+cbab4210e4adb302672e72387910efa5
+0c01e4ab5fefadb410e1ab67116ee4ad
+b5613e675e6801612a7210e4adb210f7
+a55f026d2e72327510f2ab1f10f2ab55
+10e2adb1116ef7a349046138654a6850
+6c76721174e9adae016c2a7510e3a51a
+10ed9f1172edadac02612e6c326e10f8
+a9df10e7adad01e9a9cff0a9d01172e4
+a5f2026a306f3675116ef2a9a3116ee7
+adaf1168e7adb04d7c4ea2a44f036742
+6c48724e7301672a6d10e1adab10e5a9
+c81161edab3a1163ebadaa016b2a7910
+e1a9c410e8a9ca07722e725a74607566
+79116df2032aa3cd492e4d325410c8ad
+a510cea75e10cda3cd116fefa9521165
+e9ada3116cf4ada4614265746c9a6f01
+642a6e10e7a53e10e9ada20368326b36
+6e3a7210e3ad9e10eaad9b10e1ad9c01
+e4a969e9ad9d0264346e387201e3ada0
+efada110e6a93310e4ad9f1179eda939
+046138624c65506b56731168f5ada901
+6e2a7210e2ada710e4ada61161f49d11
+77e1ada8116fefa55d47c40d4ac3214a
+a6ba4ba6d34c03614665a69669a69879
+01632a6410e9ad9a10e9ad99026ea673
+6fa6747410ee001a4bc139545b571457
+ac5458ac5859305a01c1abe8d7ab5810
+d4ad5b544055825603c1ad56c5ad58ce
+ad57d5a3920bce14d609d6abd2d7abbf
+daab78cead97cfabb5d2abbdcbc5b8cb
+abaaccad52cdaba904c1ad98c7ad54d3
+a55ed9ad55daabec4e824e8a50a24a52
+a26d530cce17d40cd4ad4cd5a797d6ad
+4dd9ad96cead4acfab5ed2ad4bca09ca
+ad48cbab48cdad49c3ad47c5ab77c9ab
+4c06c70cc7abe4c9ad3acca54acfa978
+c1ad37c3ad38c5ad390acc14d409d4ad
+43d7a9d3d9a5becca9e4cdad40d2ad41
+c1ad3cc5ad3dc6ad3ec7abb9c8a57a03
+c5ad46cfa944d5a797d7ab144b9e4ca2
+434d0010cd1dd40fd4a953d5a923d8ab
+e6d9a906daad36cda73bcfad94d1ad34
+d2ad95c60cc6ad33c7a927cbad93cca3
+a7c1ad92c3ad31c4ad32c5ab2601c5ad
+26cdad9104c9ad2cd3ad2dd4a7f3d5ad
+2ed6a7f943bd465f464c475c48a24849
+04c3ad23c4a70dd2ad90d3a70bd4a723
+02c9a578cfa57fd2a5850cce17d30cd3
+ad1ed4ad1fd5abebd7ad20cead1bd0ad
+1cd1ad1dc608c6ad1ac867cca771c1ad
+19c2abeec5ad8f04cdad21cead22d2a5
+fdd4a700d5a701435244a2424505d309
+d3a564d4ad89daad18c1ad14c3ad15c5
+a5570fcf1ad60cd6ad0fd7abf0d9ad8d
+daa517cfad0cd0ad0dd2ad0ed5abe9c9
+c676c9ad09ccad0acdad0bcead8c05cb
+09cba51fcfad12daad8ec5a52bc7ad45
+caad11345e34ae714178420bce14d209
+d2a9f0d5a73bd6ad05cead02cfad03d1
+ad04c909c9ab09caad00ccad01c1a3be
+c5abfdc6abfe0cd217d50cd5abe5d7ab
+fbd8abfcdaa35cd2abf8d3abf9d4abfa
+cd09cdad8bcfabf6d1abf7c4abf3c6ad
+8acc6d2aa55e312c321133b0ad891137
+b2a79710e1a9a110ef012aa7eb4c10c1
+a7eb1170e3a7d1026d2e6e327310f5a7
+df10e2a7db01e1a7c1e2a5ca01613a70
+1161ee012aa7294a10d0a729016d2a76
+10e1ad8610efad85046138684c6e7a6f
+80741168e9ad88016c2a6e10e1ad8710
+e9a55802612e6d326f10eaab3010f2a9
+ec10f2012aa7764b10c8a7761164e1a7
+801172e5012aa7854b01d0ad28d2a785
+473448a25c49127461eca5690465466c
+5a6f60727a75016a2a7210f5a9cc10f2
+a5ce116ff2012aa5ae4710c5a5ae1161
+e7a51c016e2a7410e8a5c801e7ad79ed
+a5650161506510eb032aa559432e4732
+5410d2a38a10d9ad1010d2a55910eead
+7a056c186c306d3675116ee7ad841175
+f7a5f1106e01e7a5f6f0a9616130659c
+691172e1ad83016e2a7410f2a93205ef
+24efad7df34cf4032aa3f7482e4d3254
+10d7a3f710cba3ee10cfa3f0012aa3ea
+4310cea3eae2a3fce7ad7be9ad7c1162
+f2054917493a533e5502c1ad81cbad7f
+d3ad8210cca5e710c5ad802aa5e7432a
+4710c2ad7f10c1ad7e43c0f9437044a2
+b9450267406c46741168e9012a754501
+d2ad17d4751179f0a55501622a7910ed
+ad7810e1ad770461a26868a2776fa280
+70a282791172ec0a4d335414542e5538
+5810cbad7101caab99d2ad7010c1abe1
+4d2e523e5310cbad6f02c4ad6dcba7fd
+cea53d02cfad6ed3ab24d5ab112aab11
+4138423c474c4b01c7a7badaad2a10cc
+ad6a02c1ad6bc7a387d9a37a01c52dd2
+ad6c026b2e6e327210e9ad6910eda3e4
+10f3a51001612a6510f2a50110eda503
+1170f4a50e1172f4a5c90265306f8275
+1170ecad761176e106491749324d364e
+3a5010cba3c510cea5e910d5ad7410d0
+a9822aa5e9422a4610caad7310d4ad72
+1167f2ad752aa25441aa3142056f1f6f
+3e72447501672a6810e4ad6810e9ad67
+1170efa3fb106101e8a9e2e9ad666130
+655c68116bf3ad65036c326d36733a74
+10ebad6410e9ad6310f5a36810f3a3c0
+116ee7012aa3ac4210c4a3ac00214ac1
+c052c0cc5638563c5758586259665a01
+d2abe7d7ab5804c1ad56c4ad57c5ad58
+cead57d5a39201c6ad59d3ab5110cbad
+5a03c4abf2c5abf2d4ad5bd5ab245246
+536c54a2575503c1abe1c7ad54d9ad55
+daabec05d309d3ab24d5ab11d7ab14c5
+ad46c8ab58cfa9440ecd1ad40cd4ad4c
+d5ab11d6ad4dd9ad4ecdad49cead4acf
+ab5ed2ad4bc50cc5ab77c9ab4ccaad48
+cbab48c1abedc3ad47c4abf10ecd1ad2
+0cd2abbdd6abd2d7a3f7daab78cdaba9
+cead53cfabb5d0ad52c80cc8ab9ecaab
+99cbabaaccad52c4ad4fc6ad50c7ad51
+4e644e404fa89650785102c1ad44cfad
+45d5abee09cc0fcca54acfa978d0a982
+d1abf7d4abedc1ad37c3ad38c5ad39c8
+a392c9ad3a0dcd17d40cd4ad43d7a9d3
+d9a5bedaad3ccdad40d2ad41d3ad42c7
+0cc7abb9c8a57acbad3fcca9e4c1ad3c
+c5ad3dc6ad3e4aa2484ba24c4ca2644d
+0012ce20d50fd5a923d6a54bd8abe6d9
+a906daad36cea53dcfa3f0d1ad34d2ad
+35d4a953c60fc6ad33c7a927cba7fdcc
+a3a7cda3cdc1ad30c3ad31c4ad32c5ab
+2601cfad25d0a72907d00cd0ad28d2a7
+85d7ad29daad2ac5ad26c7a7bac8a776
+cdad2708d30fd3ad2dd4a7f3d5ad2ed6
+a7f9d9ad2fc1a7ebc2ad2bc9ad2ccbab
+4241c1604598456a46a24147a24e48a2
+754907d10cd1ad24d2a56ed3a70bd4a7
+23c3ad23c4a70dcca5e7cea5e909d20e
+d2ad17d3a564d475d5abeedaad18c1ad
+14c3ad15c5a557c799c8ad1604c9a578
+cfa57fd1abf7d2a585d8a5850bd014d3
+09d3ad1ed4ad1fd7ad20d0ad1cd1ad1d
+d2a559c808c867cca771cead1bc1ad19
+c5a5aec6ad1a06d20cd2a5fdd4a700d5
+a701d6abfecba3eecdad21cead22415e
+42a24c43a2834406cb0ccba51fcfad12
+d9ad00daad13c4a52bc5a52bcaad110d
+d117d40cd4abfad7abfbd8abfcdaa35c
+d1abf7d2abf8d3abf9cc0bcc6dcda341
+ceabf0cfabf6c4abf3c5abf4c6abf500
+10cc1cd20ed2a9f0d445d5a3cdd6ad05
+d9a37accad01cead02cfad03d1ad04c7
+0cc7a387c8abffc9ab09caad00c1a3be
+c4a3acc5abfdc6abfe0010cf1dd50fd5
+abe9d6ad0fd7abf0d9ad10daa517cfad
+0cd0ad0dd2ad0ed3ab24c90cc9ad09cc
+ad0acdad0bcea3eac4abe7c6ad06c7ad
+07c8ad08342234423548374e3801382a
+3910b0ab2410b6abf21131b9abef1133
+b2abf01133b6abf12aa55e305031a26b
+3202302e33383810b0a52b01b0a517b2
+abe410b0750533233334354a3601b1ab
+51b2a5e903b0a3eab4a5e9b5a70db9a7
+2302b3abe5b4abeab7abeb3034314a32
+01b1a55eb9abe903b2abe4b3a55eb5a9
+f0b9abe506b50bb599b7abe7b8abe8b9
+a55eb1abe4b3abe6b4ab7802342e353e
+3710b2ab1102b2a3eab3abecb5abed03
+b0ab11b1ab11b4abeeb5a52b04643667
+3c68427246761173f44b116ceda57511
+68e2ad5c116fed6301613c6d01e99bee
+012aa3414110cda34110e200134c6251
+31513c53405450556a5901c5abf2d4ab
+7910c1ad4402c1abedc4abf1d9ad4e04
+c78fc8a922caad61cead53d2ad6210cb
+ad5d4c3c4d464e624f665001cbad3fd3
+ad4201c2ad2bd9ad2f04c1ad30cda9fe
+cea76ed2ad35d5ad5f10c7ad6010cdad
+3b452e4542474a494e4a644b02c8a502
+cdad27d7ad2901c799c8ad1610c2ad5d
+03c4a908cead5ed1ad24d2a56e10cfad
+252a994132423c43464410daad1301c5
+abf4c6abf501c8abffd5a9fe01c3a907
+ceabdd00207492e52bf221f299f3a347
+f6a351f9a35afa012a2e41127261e2a3
+5e022aa35c49985210d5a35fe54be651
+eb67ed757934792c7a36e125e22d01e2
+a35bf2a35a10ea012a2e41127261e2a3
+5e032aa35c312e49345310d5a35f1137
+b2a35f01d1a35dd2a35e744475547658
+7703e1a356e2a357efa358f8a35902e1
+a34de7a34eeaa34f10f9a35003eca352
+eea353f4a354f5a3556b856f586f4a70
+54726e7304e1a348e5a349e7a34aefa3
+4bf4a34c02ea87ed89fa8b05f206f293
+f395fa97e38de48fe59108ee0feea342
+efa343f1a344f9a345faa346e299e32a
+e8a1eda341022a9b4e2c5012616ced9f
+126261f49d6b3c6c3e6d586e03e37feb
+81ee83f98510eb6905ee06ee71f36df4
+73e16be26de96f03ed77ee79ef7bf07d
+65256530673668506a10e76501e24df9
+4f05ed06ed59ef5bf15de353e455e757
+02e15fec61ef636148625263646405f0
+06f045f947fa49e13fe541ea4302e927
+eb29f52b04e92ff131f233f435f93702
+e439e53be83d
+ } // trie
+ lsrs{ // 1584
+ "","","",
+ "skip","script","",
+ "aa","Latn","ET",
+ "aai","Latn","ZZ",
+ "aak","Latn","ZZ",
+ "aau","Latn","ZZ",
+ "ab","Cyrl","GE",
+ "abi","Latn","ZZ",
+ "abq","Cyrl","ZZ",
+ "abr","Latn","GH",
+ "abt","Latn","ZZ",
+ "aby","Latn","ZZ",
+ "acd","Latn","ZZ",
+ "ace","Latn","ID",
+ "ach","Latn","UG",
+ "ada","Latn","GH",
+ "ade","Latn","ZZ",
+ "adj","Latn","ZZ",
+ "dz","Tibt","BT",
+ "ady","Cyrl","RU",
+ "adz","Latn","ZZ",
+ "ae","Avst","IR",
+ "aeb","Arab","TN",
+ "aey","Latn","ZZ",
+ "af","Latn","ZA",
+ "agc","Latn","ZZ",
+ "agd","Latn","ZZ",
+ "agg","Latn","ZZ",
+ "agm","Latn","ZZ",
+ "ago","Latn","ZZ",
+ "agq","Latn","CM",
+ "aha","Latn","ZZ",
+ "ahl","Latn","ZZ",
+ "aho","Ahom","IN",
+ "ajg","Latn","ZZ",
+ "ak","Latn","GH",
+ "akk","Xsux","IQ",
+ "ala","Latn","ZZ",
+ "sq","Latn","AL",
+ "ali","Latn","ZZ",
+ "aln","Latn","XK",
+ "alt","Cyrl","RU",
+ "am","Ethi","ET",
+ "amm","Latn","ZZ",
+ "amn","Latn","ZZ",
+ "amo","Latn","NG",
+ "amp","Latn","ZZ",
+ "anc","Latn","ZZ",
+ "ank","Latn","ZZ",
+ "ann","Latn","ZZ",
+ "any","Latn","ZZ",
+ "aoj","Latn","ZZ",
+ "aom","Latn","ZZ",
+ "aoz","Latn","ID",
+ "apc","Arab","ZZ",
+ "apd","Arab","TG",
+ "ape","Latn","ZZ",
+ "apr","Latn","ZZ",
+ "aps","Latn","ZZ",
+ "apz","Latn","ZZ",
+ "ar","Arab","EG",
+ "arc","Armi","IR",
+ "arc","Nbat","JO",
+ "arc","Palm","SY",
+ "arh","Latn","ZZ",
+ "hy","Armn","AM",
+ "arn","Latn","CL",
+ "aro","Latn","BO",
+ "arq","Arab","DZ",
+ "ary","Arab","MA",
+ "arz","Arab","EG",
+ "as","Beng","IN",
+ "asa","Latn","TZ",
+ "ase","Sgnw","US",
+ "asg","Latn","ZZ",
+ "aso","Latn","ZZ",
+ "ast","Latn","ES",
+ "ata","Latn","ZZ",
+ "atg","Latn","ZZ",
+ "atj","Latn","CA",
+ "auy","Latn","ZZ",
+ "av","Cyrl","RU",
+ "avl","Arab","ZZ",
+ "avn","Latn","ZZ",
+ "avt","Latn","ZZ",
+ "avu","Latn","ZZ",
+ "awa","Deva","IN",
+ "awb","Latn","ZZ",
+ "awo","Latn","ZZ",
+ "awx","Latn","ZZ",
+ "ay","Latn","BO",
+ "ayb","Latn","ZZ",
+ "az","Latn","AZ",
+ "az","Arab","IQ",
+ "az","Arab","IR",
+ "az","Cyrl","RU",
+ "ba","Cyrl","RU",
+ "bal","Arab","PK",
+ "ban","Latn","ID",
+ "bap","Deva","NP",
+ "eu","Latn","ES",
+ "bar","Latn","AT",
+ "bas","Latn","CM",
+ "bav","Latn","ZZ",
+ "bax","Bamu","CM",
+ "bba","Latn","ZZ",
+ "bbb","Latn","ZZ",
+ "bbc","Latn","ID",
+ "bbd","Latn","ZZ",
+ "bbj","Latn","CM",
+ "bbp","Latn","ZZ",
+ "bbr","Latn","ZZ",
+ "bcf","Latn","ZZ",
+ "bch","Latn","ZZ",
+ "bci","Latn","CI",
+ "bik","Latn","PH",
+ "bcm","Latn","ZZ",
+ "bcn","Latn","ZZ",
+ "bco","Latn","ZZ",
+ "bcq","Ethi","ZZ",
+ "bcu","Latn","ZZ",
+ "bdd","Latn","ZZ",
+ "be","Cyrl","BY",
+ "bef","Latn","ZZ",
+ "beh","Latn","ZZ",
+ "bej","Arab","SD",
+ "bem","Latn","ZM",
+ "bet","Latn","ZZ",
+ "bew","Latn","ID",
+ "bex","Latn","ZZ",
+ "bez","Latn","TZ",
+ "bfd","Latn","CM",
+ "bfq","Taml","IN",
+ "bft","Arab","PK",
+ "bfy","Deva","IN",
+ "bg","Cyrl","BG",
+ "bgc","Deva","IN",
+ "bgn","Arab","PK",
+ "bgx","Grek","TR",
+ "bho","Deva","IN",
+ "bhb","Deva","IN",
+ "bhg","Latn","ZZ",
+ "bhi","Deva","IN",
+ "bhk","Latn","PH",
+ "bhl","Latn","ZZ",
+ "bhy","Latn","ZZ",
+ "bi","Latn","VU",
+ "bib","Latn","ZZ",
+ "big","Latn","ZZ",
+ "bim","Latn","ZZ",
+ "bin","Latn","NG",
+ "bio","Latn","ZZ",
+ "biq","Latn","ZZ",
+ "bjh","Latn","ZZ",
+ "bji","Ethi","ZZ",
+ "bjj","Deva","IN",
+ "bjn","Latn","ID",
+ "bjo","Latn","ZZ",
+ "bjr","Latn","ZZ",
+ "bjt","Latn","SN",
+ "bjz","Latn","ZZ",
+ "bkc","Latn","ZZ",
+ "bkm","Latn","CM",
+ "bkq","Latn","ZZ",
+ "bku","Latn","PH",
+ "bkv","Latn","ZZ",
+ "blt","Tavt","VN",
+ "bm","Latn","ML",
+ "bmh","Latn","ZZ",
+ "bmk","Latn","ZZ",
+ "bmq","Latn","ML",
+ "bmu","Latn","ZZ",
+ "bn","Beng","BD",
+ "bng","Latn","ZZ",
+ "bnm","Latn","ZZ",
+ "bnp","Latn","ZZ",
+ "bo","Tibt","CN",
+ "boj","Latn","ZZ",
+ "bom","Latn","ZZ",
+ "bon","Latn","ZZ",
+ "bpy","Beng","IN",
+ "bqc","Latn","ZZ",
+ "bqi","Arab","IR",
+ "bqp","Latn","ZZ",
+ "bqv","Latn","CI",
+ "br","Latn","FR",
+ "bra","Deva","IN",
+ "brh","Arab","PK",
+ "brx","Deva","IN",
+ "brz","Latn","ZZ",
+ "bs","Latn","BA",
+ "bsj","Latn","ZZ",
+ "bsq","Bass","LR",
+ "bss","Latn","CM",
+ "bst","Ethi","ZZ",
+ "bto","Latn","PH",
+ "btt","Latn","ZZ",
+ "btv","Deva","PK",
+ "bua","Cyrl","RU",
+ "buc","Latn","YT",
+ "bud","Latn","ZZ",
+ "bug","Latn","ID",
+ "buk","Latn","ZZ",
+ "bum","Latn","CM",
+ "buo","Latn","ZZ",
+ "my","Mymr","MM",
+ "bus","Latn","ZZ",
+ "buu","Latn","ZZ",
+ "bvb","Latn","GQ",
+ "bwd","Latn","ZZ",
+ "bwr","Latn","ZZ",
+ "bxh","Latn","ZZ",
+ "luy","Latn","KE",
+ "bye","Latn","ZZ",
+ "byn","Ethi","ER",
+ "byr","Latn","ZZ",
+ "bys","Latn","ZZ",
+ "byv","Latn","CM",
+ "byx","Latn","ZZ",
+ "bza","Latn","ZZ",
+ "bze","Latn","ML",
+ "bzf","Latn","ZZ",
+ "bzh","Latn","ZZ",
+ "bzw","Latn","ZZ",
+ "ca","Latn","ES",
+ "can","Latn","ZZ",
+ "cbj","Latn","ZZ",
+ "cch","Latn","NG",
+ "ccp","Cakm","BD",
+ "ce","Cyrl","RU",
+ "ceb","Latn","PH",
+ "cfa","Latn","ZZ",
+ "cgg","Latn","UG",
+ "ch","Latn","GU",
+ "zh","Hans","CN",
+ "zh","Hant","AU",
+ "zh","Hant","BN",
+ "zh","Hant","GF",
+ "zh","Hant","HK",
+ "zh","Hant","ID",
+ "zh","Hant","MO",
+ "zh","Hant","MY",
+ "zh","Hant","PF",
+ "zh","Hant","PH",
+ "zh","Hant","PA",
+ "zh","Hant","SR",
+ "zh","Hant","TH",
+ "zh","Hant","TW",
+ "zh","Hant","GB",
+ "zh","Hant","US",
+ "zh","Hant","VN",
+ "zh","Bopo","TW",
+ "zh","Hanb","TW",
+ "chk","Latn","FM",
+ "chm","Cyrl","RU",
+ "cho","Latn","US",
+ "chp","Latn","CA",
+ "chr","Cher","US",
+ "cja","Arab","KH",
+ "cjm","Cham","VN",
+ "cjv","Latn","ZZ",
+ "ckb","Arab","IQ",
+ "ckl","Latn","ZZ",
+ "cko","Latn","ZZ",
+ "cky","Latn","ZZ",
+ "cla","Latn","ZZ",
+ "syr","Syrc","IQ",
+ "cme","Latn","ZZ",
+ "cmg","Soyo","MN",
+ "co","Latn","FR",
+ "cop","Copt","EG",
+ "cps","Latn","PH",
+ "cr","Cans","CA",
+ "crh","Cyrl","UA",
+ "crj","Cans","CA",
+ "crk","Cans","CA",
+ "crl","Cans","CA",
+ "crm","Cans","CA",
+ "crs","Latn","SC",
+ "cs","Latn","CZ",
+ "csb","Latn","PL",
+ "csw","Cans","CA",
+ "ctd","Pauc","MM",
+ "cu","Cyrl","RU",
+ "cu","Glag","BG",
+ "cv","Cyrl","RU",
+ "cy","Latn","GB",
+ "da","Latn","DK",
+ "dad","Latn","ZZ",
+ "daf","Latn","ZZ",
+ "dag","Latn","ZZ",
+ "dah","Latn","ZZ",
+ "dak","Latn","US",
+ "dar","Cyrl","RU",
+ "dav","Latn","KE",
+ "dbd","Latn","ZZ",
+ "dbq","Latn","ZZ",
+ "dcc","Arab","IN",
+ "ddn","Latn","ZZ",
+ "de","Latn","DE",
+ "ded","Latn","ZZ",
+ "den","Latn","CA",
+ "dga","Latn","ZZ",
+ "dgh","Latn","ZZ",
+ "dgi","Latn","ZZ",
+ "dgl","Arab","ZZ",
+ "doi","Arab","IN",
+ "dgr","Latn","CA",
+ "dgz","Latn","ZZ",
+ "mwr","Deva","IN",
+ "dia","Latn","ZZ",
+ "zza","Latn","TR",
+ "dje","Latn","NE",
+ "dnj","Latn","CI",
+ "dob","Latn","ZZ",
+ "dop","Latn","ZZ",
+ "dow","Latn","ZZ",
+ "mn","Cyrl","MN",
+ "mn","Mong","CN",
+ "dri","Latn","ZZ",
+ "drs","Ethi","ZZ",
+ "dsb","Latn","DE",
+ "dtm","Latn","ML",
+ "dtp","Latn","MY",
+ "dts","Latn","ZZ",
+ "dty","Deva","NP",
+ "dua","Latn","CM",
+ "duc","Latn","ZZ",
+ "dud","Latn","ZZ",
+ "dug","Latn","ZZ",
+ "nl","Latn","NL",
+ "dv","Thaa","MV",
+ "dva","Latn","ZZ",
+ "dww","Latn","ZZ",
+ "dyo","Latn","SN",
+ "dyu","Latn","BF",
+ "dzg","Latn","ZZ",
+ "ebu","Latn","KE",
+ "ee","Latn","GH",
+ "efi","Latn","NG",
+ "egl","Latn","IT",
+ "egy","Egyp","EG",
+ "eka","Latn","ZZ",
+ "et","Latn","EE",
+ "eky","Kali","MM",
+ "el","Grek","GR",
+ "ema","Latn","ZZ",
+ "emi","Latn","ZZ",
+ "man","Latn","GM",
+ "man","Nkoo","GN",
+ "en","Latn","US",
+ "en","Shaw","GB",
+ "enn","Latn","ZZ",
+ "enq","Latn","ZZ",
+ "eo","Latn","001",
+ "eri","Latn","ZZ",
+ "es","Latn","ES",
+ "esg","Gonm","IN",
+ "ik","Latn","US",
+ "esu","Latn","US",
+ "etr","Latn","ZZ",
+ "ett","Ital","IT",
+ "etu","Latn","ZZ",
+ "etx","Latn","ZZ",
+ "ewo","Latn","CM",
+ "ext","Latn","ES",
+ "fa","Arab","IR",
+ "faa","Latn","ZZ",
+ "fab","Latn","ZZ",
+ "fag","Latn","ZZ",
+ "fai","Latn","ZZ",
+ "fan","Latn","GQ",
+ "ff","Latn","SN",
+ "ff","Adlm","GN",
+ "ffi","Latn","ZZ",
+ "ffm","Latn","ML",
+ "fi","Latn","FI",
+ "fia","Arab","SD",
+ "fil","Latn","PH",
+ "fit","Latn","SE",
+ "fj","Latn","FJ",
+ "flr","Latn","ZZ",
+ "fmp","Latn","ZZ",
+ "fo","Latn","FO",
+ "fod","Latn","ZZ",
+ "fon","Latn","BJ",
+ "for","Latn","ZZ",
+ "fpe","Latn","ZZ",
+ "fqs","Latn","ZZ",
+ "fr","Latn","FR",
+ "frc","Latn","US",
+ "frp","Latn","FR",
+ "frr","Latn","DE",
+ "frs","Latn","DE",
+ "fub","Arab","CM",
+ "fud","Latn","WF",
+ "fue","Latn","ZZ",
+ "fuf","Latn","GN",
+ "fuh","Latn","ZZ",
+ "fuq","Latn","NE",
+ "fur","Latn","IT",
+ "fuv","Latn","NG",
+ "fuy","Latn","ZZ",
+ "fvr","Latn","SD",
+ "fy","Latn","NL",
+ "ga","Latn","IE",
+ "gaa","Latn","GH",
+ "gaf","Latn","ZZ",
+ "gag","Latn","MD",
+ "gah","Latn","ZZ",
+ "gaj","Latn","ZZ",
+ "gam","Latn","ZZ",
+ "gan","Hans","CN",
+ "gaw","Latn","ZZ",
+ "gay","Latn","ID",
+ "om","Latn","ET",
+ "gba","Latn","ZZ",
+ "gbf","Latn","ZZ",
+ "gbm","Deva","IN",
+ "grb","Latn","ZZ",
+ "gby","Latn","ZZ",
+ "gbz","Arab","IR",
+ "gcr","Latn","GF",
+ "gd","Latn","GB",
+ "gde","Latn","ZZ",
+ "gdn","Latn","ZZ",
+ "gdr","Latn","ZZ",
+ "geb","Latn","ZZ",
+ "gej","Latn","ZZ",
+ "gel","Latn","ZZ",
+ "ka","Geor","GE",
+ "gez","Ethi","ET",
+ "gfk","Latn","ZZ",
+ "gvr","Deva","NP",
+ "ghs","Latn","ZZ",
+ "gil","Latn","KI",
+ "gim","Latn","ZZ",
+ "gjk","Arab","PK",
+ "gjn","Latn","ZZ",
+ "gju","Arab","PK",
+ "gkn","Latn","ZZ",
+ "gkp","Latn","ZZ",
+ "gl","Latn","ES",
+ "glk","Arab","IR",
+ "gmm","Latn","ZZ",
+ "gmv","Ethi","ZZ",
+ "gn","Latn","PY",
+ "gnd","Latn","ZZ",
+ "gng","Latn","ZZ",
+ "gon","Telu","IN",
+ "god","Latn","ZZ",
+ "gof","Ethi","ZZ",
+ "goi","Latn","ZZ",
+ "gom","Deva","IN",
+ "gor","Latn","ID",
+ "gos","Latn","NL",
+ "got","Goth","UA",
+ "grc","Cprt","CY",
+ "grc","Linb","GR",
+ "grt","Beng","IN",
+ "grw","Latn","ZZ",
+ "gsw","Latn","CH",
+ "gu","Gujr","IN",
+ "gub","Latn","BR",
+ "guc","Latn","CO",
+ "gud","Latn","ZZ",
+ "gur","Latn","GH",
+ "guw","Latn","ZZ",
+ "gux","Latn","ZZ",
+ "guz","Latn","KE",
+ "gv","Latn","IM",
+ "gvf","Latn","ZZ",
+ "gvs","Latn","ZZ",
+ "gwc","Arab","ZZ",
+ "gwi","Latn","CA",
+ "gwt","Arab","ZZ",
+ "gyi","Latn","ZZ",
+ "ha","Latn","NG",
+ "ha","Arab","SD",
+ "ha","Arab","CM",
+ "hag","Latn","ZZ",
+ "hak","Hans","CN",
+ "ham","Latn","ZZ",
+ "haw","Latn","US",
+ "haz","Arab","AF",
+ "hbb","Latn","ZZ",
+ "hdy","Ethi","ZZ",
+ "he","Hebr","IL",
+ "hhy","Latn","ZZ",
+ "hi","Deva","IN",
+ "hia","Latn","ZZ",
+ "hif","Latn","FJ",
+ "hig","Latn","ZZ",
+ "hih","Latn","ZZ",
+ "hil","Latn","PH",
+ "srx","Deva","IN",
+ "hla","Latn","ZZ",
+ "hlu","Hluw","TR",
+ "hmd","Plrd","CN",
+ "hmt","Latn","ZZ",
+ "hnd","Arab","PK",
+ "hne","Deva","IN",
+ "hnj","Hmng","LA",
+ "hnn","Latn","PH",
+ "hno","Arab","PK",
+ "ho","Latn","PG",
+ "hoc","Deva","IN",
+ "hoj","Deva","IN",
+ "hot","Latn","ZZ",
+ "hr","Latn","HR",
+ "hsb","Latn","DE",
+ "hsn","Hans","CN",
+ "ht","Latn","HT",
+ "hu","Latn","HU",
+ "hui","Latn","ZZ",
+ "hz","Latn","NA",
+ "ia","Latn","001",
+ "ian","Latn","ZZ",
+ "iar","Latn","ZZ",
+ "iba","Latn","MY",
+ "ibb","Latn","NG",
+ "iby","Latn","ZZ",
+ "ica","Latn","ZZ",
+ "is","Latn","IS",
+ "ich","Latn","ZZ",
+ "id","Latn","ID",
+ "idd","Latn","ZZ",
+ "idi","Latn","ZZ",
+ "idu","Latn","ZZ",
+ "ife","Latn","TG",
+ "ig","Latn","NG",
+ "igb","Latn","ZZ",
+ "ige","Latn","ZZ",
+ "ii","Yiii","CN",
+ "ijj","Latn","ZZ",
+ "iu","Cans","CA",
+ "ikk","Latn","ZZ",
+ "ikt","Latn","CA",
+ "ikw","Latn","ZZ",
+ "ikx","Latn","ZZ",
+ "ilo","Latn","PH",
+ "imo","Latn","ZZ",
+ "in","Latn","ID",
+ "inh","Cyrl","RU",
+ "io","Latn","001",
+ "iou","Latn","ZZ",
+ "iri","Latn","ZZ",
+ "it","Latn","IT",
+ "iw","Hebr","IL",
+ "iwm","Latn","ZZ",
+ "iws","Latn","ZZ",
+ "izh","Latn","RU",
+ "izi","Latn","ZZ",
+ "ja","Jpan","JP",
+ "jab","Latn","ZZ",
+ "jam","Latn","JM",
+ "jbo","Latn","001",
+ "jbu","Latn","ZZ",
+ "jen","Latn","ZZ",
+ "jgk","Latn","ZZ",
+ "jgo","Latn","CM",
+ "yi","Hebr","001",
+ "jib","Latn","ZZ",
+ "jmc","Latn","TZ",
+ "jml","Deva","NP",
+ "jra","Latn","ZZ",
+ "jut","Latn","DK",
+ "jv","Latn","ID",
+ "jw","Latn","ID",
+ "kaa","Cyrl","UZ",
+ "kab","Latn","DZ",
+ "kac","Latn","MM",
+ "kad","Latn","ZZ",
+ "kai","Latn","ZZ",
+ "kaj","Latn","NG",
+ "kam","Latn","KE",
+ "kao","Latn","ML",
+ "kbd","Cyrl","RU",
+ "kbm","Latn","ZZ",
+ "kbp","Latn","ZZ",
+ "kbq","Latn","ZZ",
+ "kbx","Latn","ZZ",
+ "kby","Arab","NE",
+ "kcg","Latn","NG",
+ "kck","Latn","ZW",
+ "kcl","Latn","ZZ",
+ "kct","Latn","ZZ",
+ "kde","Latn","TZ",
+ "kdh","Arab","TG",
+ "kdl","Latn","ZZ",
+ "kdt","Thai","TH",
+ "kea","Latn","CV",
+ "ken","Latn","CM",
+ "kez","Latn","ZZ",
+ "kfo","Latn","CI",
+ "kfr","Deva","IN",
+ "kfy","Deva","IN",
+ "kg","Latn","CD",
+ "kge","Latn","ID",
+ "kgf","Latn","ZZ",
+ "kgp","Latn","BR",
+ "kha","Latn","IN",
+ "khb","Talu","CN",
+ "khn","Deva","IN",
+ "khq","Latn","ML",
+ "khs","Latn","ZZ",
+ "kht","Mymr","IN",
+ "khw","Arab","PK",
+ "khz","Latn","ZZ",
+ "ki","Latn","KE",
+ "kij","Latn","ZZ",
+ "kiu","Latn","TR",
+ "kiw","Latn","ZZ",
+ "kj","Latn","NA",
+ "kjd","Latn","ZZ",
+ "kjg","Laoo","LA",
+ "kjs","Latn","ZZ",
+ "kjy","Latn","ZZ",
+ "kk","Cyrl","KZ",
+ "kk","Arab","AF",
+ "kk","Arab","CN",
+ "kk","Arab","IR",
+ "kk","Arab","MN",
+ "kkc","Latn","ZZ",
+ "kkj","Latn","CM",
+ "kl","Latn","GL",
+ "kln","Latn","KE",
+ "klq","Latn","ZZ",
+ "klt","Latn","ZZ",
+ "klx","Latn","ZZ",
+ "km","Khmr","KH",
+ "kmb","Latn","AO",
+ "kmh","Latn","ZZ",
+ "kmo","Latn","ZZ",
+ "ku","Latn","TR",
+ "ku","Arab","LB",
+ "ku","Arab","IQ",
+ "kms","Latn","ZZ",
+ "kmu","Latn","ZZ",
+ "kmw","Latn","ZZ",
+ "kn","Knda","IN",
+ "kr","Latn","ZZ",
+ "knf","Latn","GW",
+ "kok","Deva","IN",
+ "knp","Latn","ZZ",
+ "ko","Kore","KR",
+ "koi","Cyrl","RU",
+ "kol","Latn","ZZ",
+ "kos","Latn","FM",
+ "koz","Latn","ZZ",
+ "kpe","Latn","LR",
+ "kpf","Latn","ZZ",
+ "kpo","Latn","ZZ",
+ "kpr","Latn","ZZ",
+ "kv","Cyrl","RU",
+ "kpx","Latn","ZZ",
+ "kqb","Latn","ZZ",
+ "kqf","Latn","ZZ",
+ "kqs","Latn","ZZ",
+ "kqy","Ethi","ZZ",
+ "krc","Cyrl","RU",
+ "kri","Latn","SL",
+ "krj","Latn","PH",
+ "krl","Latn","RU",
+ "krs","Latn","ZZ",
+ "kru","Deva","IN",
+ "ks","Arab","IN",
+ "ksb","Latn","TZ",
+ "ksd","Latn","ZZ",
+ "ksf","Latn","CM",
+ "ksh","Latn","DE",
+ "ksj","Latn","ZZ",
+ "ksr","Latn","ZZ",
+ "ktb","Ethi","ZZ",
+ "ktm","Latn","ZZ",
+ "kto","Latn","ZZ",
+ "kub","Latn","ZZ",
+ "kud","Latn","ZZ",
+ "kue","Latn","ZZ",
+ "kuj","Latn","ZZ",
+ "kum","Cyrl","RU",
+ "kun","Latn","ZZ",
+ "kup","Latn","ZZ",
+ "kus","Latn","ZZ",
+ "kvg","Latn","ZZ",
+ "kvr","Latn","ID",
+ "kvx","Arab","PK",
+ "kw","Latn","GB",
+ "kwj","Latn","ZZ",
+ "kwo","Latn","ZZ",
+ "yam","Latn","ZZ",
+ "kxa","Latn","ZZ",
+ "kxc","Ethi","ZZ",
+ "tvd","Latn","ZZ",
+ "kxm","Thai","TH",
+ "kxp","Arab","PK",
+ "kxw","Latn","ZZ",
+ "kxz","Latn","ZZ",
+ "ky","Cyrl","KG",
+ "ky","Arab","CN",
+ "ky","Latn","TR",
+ "kye","Latn","ZZ",
+ "kyx","Latn","ZZ",
+ "kzr","Latn","ZZ",
+ "la","Latn","VA",
+ "lab","Lina","GR",
+ "lad","Hebr","IL",
+ "lag","Latn","TZ",
+ "lah","Arab","PK",
+ "laj","Latn","UG",
+ "las","Latn","ZZ",
+ "lb","Latn","LU",
+ "lbe","Cyrl","RU",
+ "lbu","Latn","ZZ",
+ "lbw","Latn","ID",
+ "lcm","Latn","ZZ",
+ "lcp","Thai","CN",
+ "ldb","Latn","ZZ",
+ "led","Latn","ZZ",
+ "lee","Latn","ZZ",
+ "lem","Latn","ZZ",
+ "lep","Lepc","IN",
+ "leq","Latn","ZZ",
+ "leu","Latn","ZZ",
+ "lez","Cyrl","RU",
+ "lg","Latn","UG",
+ "lgg","Latn","ZZ",
+ "li","Latn","NL",
+ "lia","Latn","ZZ",
+ "lid","Latn","ZZ",
+ "lif","Deva","NP",
+ "lif","Limb","IN",
+ "lig","Latn","ZZ",
+ "lih","Latn","ZZ",
+ "lij","Latn","IT",
+ "lis","Lisu","CN",
+ "ljp","Latn","ID",
+ "lki","Arab","IR",
+ "lkt","Latn","US",
+ "lle","Latn","ZZ",
+ "lln","Latn","ZZ",
+ "lmn","Telu","IN",
+ "lmo","Latn","IT",
+ "lmp","Latn","ZZ",
+ "ln","Latn","CD",
+ "lns","Latn","ZZ",
+ "lnu","Latn","ZZ",
+ "lo","Laoo","LA",
+ "loj","Latn","ZZ",
+ "lok","Latn","ZZ",
+ "lol","Latn","CD",
+ "lor","Latn","ZZ",
+ "los","Latn","ZZ",
+ "loz","Latn","ZM",
+ "lrc","Arab","IR",
+ "lt","Latn","LT",
+ "ltg","Latn","LV",
+ "lu","Latn","CD",
+ "lua","Latn","CD",
+ "luo","Latn","KE",
+ "luz","Arab","IR",
+ "lv","Latn","LV",
+ "lwl","Thai","TH",
+ "lzh","Hans","CN",
+ "lzz","Latn","TR",
+ "mk","Cyrl","MK",
+ "mad","Latn","ID",
+ "maf","Latn","CM",
+ "mag","Deva","IN",
+ "mai","Deva","IN",
+ "mak","Latn","ID",
+ "mi","Latn","NZ",
+ "mas","Latn","KE",
+ "maw","Latn","ZZ",
+ "ms","Latn","MY",
+ "ms","Arab","CC",
+ "ms","Arab","ID",
+ "maz","Latn","MX",
+ "mbh","Latn","ZZ",
+ "mbo","Latn","ZZ",
+ "mbq","Latn","ZZ",
+ "mbu","Latn","ZZ",
+ "mbw","Latn","ZZ",
+ "mci","Latn","ZZ",
+ "mcp","Latn","ZZ",
+ "mcq","Latn","ZZ",
+ "mcr","Latn","ZZ",
+ "mcu","Latn","ZZ",
+ "mda","Latn","ZZ",
+ "mde","Arab","ZZ",
+ "mdf","Cyrl","RU",
+ "mdh","Latn","PH",
+ "mdj","Latn","ZZ",
+ "mdr","Latn","ID",
+ "mdx","Ethi","ZZ",
+ "med","Latn","ZZ",
+ "mee","Latn","ZZ",
+ "mek","Latn","ZZ",
+ "men","Latn","SL",
+ "mer","Latn","KE",
+ "met","Latn","ZZ",
+ "meu","Latn","ZZ",
+ "mfa","Arab","TH",
+ "mfe","Latn","MU",
+ "mfn","Latn","ZZ",
+ "mfo","Latn","ZZ",
+ "mfq","Latn","ZZ",
+ "mg","Latn","MG",
+ "mgh","Latn","MZ",
+ "mgl","Latn","ZZ",
+ "mgo","Latn","CM",
+ "mgp","Deva","NP",
+ "mgy","Latn","TZ",
+ "mh","Latn","MH",
+ "mhi","Latn","ZZ",
+ "mhl","Latn","ZZ",
+ "mif","Latn","ZZ",
+ "min","Latn","ID",
+ "mis","Hatr","IQ",
+ "mis","Medf","NG",
+ "miw","Latn","ZZ",
+ "mki","Arab","ZZ",
+ "mkl","Latn","ZZ",
+ "mkp","Latn","ZZ",
+ "mkw","Latn","ZZ",
+ "ml","Mlym","IN",
+ "mle","Latn","ZZ",
+ "mlp","Latn","ZZ",
+ "mls","Latn","SD",
+ "mmo","Latn","ZZ",
+ "mmu","Latn","ZZ",
+ "mmx","Latn","ZZ",
+ "mna","Latn","ZZ",
+ "mnf","Latn","ZZ",
+ "mni","Beng","IN",
+ "mnw","Mymr","MM",
+ "ro","Latn","RO",
+ "moa","Latn","ZZ",
+ "moe","Latn","CA",
+ "moh","Latn","CA",
+ "mos","Latn","BF",
+ "mox","Latn","ZZ",
+ "mpp","Latn","ZZ",
+ "mps","Latn","ZZ",
+ "mpt","Latn","ZZ",
+ "mpx","Latn","ZZ",
+ "mql","Latn","ZZ",
+ "mr","Deva","IN",
+ "mrd","Deva","NP",
+ "mrj","Cyrl","RU",
+ "mro","Mroo","BD",
+ "mt","Latn","MT",
+ "mtc","Latn","ZZ",
+ "mtf","Latn","ZZ",
+ "mti","Latn","ZZ",
+ "mtr","Deva","IN",
+ "mua","Latn","CM",
+ "raj","Deva","IN",
+ "mur","Latn","ZZ",
+ "mus","Latn","US",
+ "mva","Latn","ZZ",
+ "mvn","Latn","ZZ",
+ "mvy","Arab","PK",
+ "mwk","Latn","ML",
+ "mwv","Latn","ID",
+ "mww","Hmnp","US",
+ "mxc","Latn","ZW",
+ "mxm","Latn","ZZ",
+ "myk","Latn","ZZ",
+ "mym","Ethi","ZZ",
+ "myv","Cyrl","RU",
+ "myw","Latn","ZZ",
+ "myx","Latn","UG",
+ "myz","Mand","IR",
+ "mzk","Latn","ZZ",
+ "mzm","Latn","ZZ",
+ "mzn","Arab","IR",
+ "mzp","Latn","ZZ",
+ "mzw","Latn","ZZ",
+ "mzz","Latn","ZZ",
+ "na","Latn","NR",
+ "nac","Latn","ZZ",
+ "naf","Latn","ZZ",
+ "nak","Latn","ZZ",
+ "nan","Hans","CN",
+ "nap","Latn","IT",
+ "naq","Latn","NA",
+ "nas","Latn","ZZ",
+ "nb","Latn","NO",
+ "nca","Latn","ZZ",
+ "nce","Latn","ZZ",
+ "ncf","Latn","ZZ",
+ "nch","Latn","MX",
+ "nco","Latn","ZZ",
+ "ncu","Latn","ZZ",
+ "nd","Latn","ZW",
+ "ndc","Latn","MZ",
+ "nds","Latn","DE",
+ "ne","Deva","NP",
+ "neb","Latn","ZZ",
+ "new","Deva","NP",
+ "nex","Latn","ZZ",
+ "nfr","Latn","ZZ",
+ "ng","Latn","NA",
+ "nga","Latn","ZZ",
+ "ngb","Latn","ZZ",
+ "ngl","Latn","MZ",
+ "nhb","Latn","ZZ",
+ "nhe","Latn","MX",
+ "nhw","Latn","MX",
+ "nif","Latn","ZZ",
+ "nii","Latn","ZZ",
+ "nij","Latn","ID",
+ "nin","Latn","ZZ",
+ "niu","Latn","NU",
+ "niy","Latn","ZZ",
+ "niz","Latn","ZZ",
+ "njo","Latn","IN",
+ "nkg","Latn","ZZ",
+ "nko","Latn","ZZ",
+ "nmg","Latn","CM",
+ "nmz","Latn","ZZ",
+ "nn","Latn","NO",
+ "nnf","Latn","ZZ",
+ "nnh","Latn","CM",
+ "nnk","Latn","ZZ",
+ "nnm","Latn","ZZ",
+ "nnp","Wcho","IN",
+ "no","Latn","NO",
+ "nod","Lana","TH",
+ "noe","Deva","IN",
+ "non","Runr","SE",
+ "nop","Latn","ZZ",
+ "nou","Latn","ZZ",
+ "nqo","Nkoo","GN",
+ "nr","Latn","ZA",
+ "nrb","Latn","ZZ",
+ "nsk","Cans","CA",
+ "nsn","Latn","ZZ",
+ "nso","Latn","ZA",
+ "nss","Latn","ZZ",
+ "ntm","Latn","ZZ",
+ "ntr","Latn","ZZ",
+ "nui","Latn","ZZ",
+ "nup","Latn","ZZ",
+ "nus","Latn","SS",
+ "nuv","Latn","ZZ",
+ "nux","Latn","ZZ",
+ "nv","Latn","US",
+ "nwb","Latn","ZZ",
+ "nxq","Latn","CN",
+ "nxr","Latn","ZZ",
+ "ny","Latn","MW",
+ "nym","Latn","TZ",
+ "nyn","Latn","UG",
+ "nzi","Latn","GH",
+ "oc","Latn","FR",
+ "ogc","Latn","ZZ",
+ "okr","Latn","ZZ",
+ "okv","Latn","ZZ",
+ "ong","Latn","ZZ",
+ "onn","Latn","ZZ",
+ "ons","Latn","ZZ",
+ "opm","Latn","ZZ",
+ "or","Orya","IN",
+ "oro","Latn","ZZ",
+ "oru","Arab","ZZ",
+ "os","Cyrl","GE",
+ "osa","Osge","US",
+ "ota","Arab","ZZ",
+ "otk","Orkh","MN",
+ "ozm","Latn","ZZ",
+ "pa","Guru","IN",
+ "pa","Arab","PK",
+ "pag","Latn","PH",
+ "pal","Phli","IR",
+ "pal","Phlp","CN",
+ "pam","Latn","PH",
+ "pap","Latn","AW",
+ "pau","Latn","PW",
+ "pbi","Latn","ZZ",
+ "ps","Arab","AF",
+ "pcd","Latn","FR",
+ "pcm","Latn","NG",
+ "pdc","Latn","US",
+ "pdt","Latn","CA",
+ "ped","Latn","ZZ",
+ "peo","Xpeo","IR",
+ "pex","Latn","ZZ",
+ "pfl","Latn","DE",
+ "phl","Arab","ZZ",
+ "phn","Phnx","LB",
+ "pil","Latn","ZZ",
+ "pip","Latn","ZZ",
+ "pka","Brah","IN",
+ "pko","Latn","KE",
+ "pl","Latn","PL",
+ "pla","Latn","ZZ",
+ "pms","Latn","IT",
+ "png","Latn","ZZ",
+ "pnn","Latn","ZZ",
+ "pnt","Grek","GR",
+ "pon","Latn","FM",
+ "ppo","Latn","ZZ",
+ "pra","Khar","PK",
+ "prd","Arab","IR",
+ "prg","Latn","001",
+ "pss","Latn","ZZ",
+ "pt","Latn","BR",
+ "ptp","Latn","ZZ",
+ "puu","Latn","GA",
+ "pwa","Latn","ZZ",
+ "qu","Latn","PE",
+ "quc","Latn","GT",
+ "qug","Latn","EC",
+ "rai","Latn","ZZ",
+ "rao","Latn","ZZ",
+ "rcf","Latn","RE",
+ "rej","Latn","ID",
+ "rel","Latn","ZZ",
+ "res","Latn","ZZ",
+ "rgn","Latn","IT",
+ "rhg","Arab","MM",
+ "ria","Latn","IN",
+ "rif","Tfng","MA",
+ "rif","Latn","NL",
+ "rjs","Deva","NP",
+ "rkt","Beng","BD",
+ "rm","Latn","CH",
+ "rmf","Latn","FI",
+ "rmo","Latn","CH",
+ "rmt","Arab","IR",
+ "rmu","Latn","SE",
+ "rn","Latn","BI",
+ "rna","Latn","ZZ",
+ "rng","Latn","MZ",
+ "rob","Latn","ID",
+ "rof","Latn","TZ",
+ "roo","Latn","ZZ",
+ "rro","Latn","ZZ",
+ "rtm","Latn","FJ",
+ "ru","Cyrl","RU",
+ "rue","Cyrl","UA",
+ "rug","Latn","SB",
+ "rw","Latn","RW",
+ "rwk","Latn","TZ",
+ "rwo","Latn","ZZ",
+ "ryu","Kana","JP",
+ "sa","Deva","IN",
+ "saf","Latn","GH",
+ "sah","Cyrl","RU",
+ "saq","Latn","KE",
+ "sas","Latn","ID",
+ "sat","Latn","IN",
+ "sav","Latn","SN",
+ "saz","Saur","IN",
+ "sba","Latn","ZZ",
+ "sbe","Latn","ZZ",
+ "sbp","Latn","TZ",
+ "sc","Latn","IT",
+ "sr","Cyrl","RS",
+ "sr","Latn","RU",
+ "sr","Latn","ME",
+ "sr","Latn","RO",
+ "sr","Latn","TR",
+ "sck","Deva","IN",
+ "scl","Arab","ZZ",
+ "scn","Latn","IT",
+ "sco","Latn","GB",
+ "scs","Latn","CA",
+ "sd","Arab","PK",
+ "sd","Deva","IN",
+ "sd","Khoj","IN",
+ "sd","Sind","IN",
+ "sdc","Latn","IT",
+ "sdh","Arab","IR",
+ "se","Latn","NO",
+ "sef","Latn","CI",
+ "seh","Latn","MZ",
+ "sei","Latn","MX",
+ "ses","Latn","ML",
+ "sg","Latn","CF",
+ "sga","Ogam","IE",
+ "sgs","Latn","LT",
+ "sgw","Ethi","ZZ",
+ "sgz","Latn","ZZ",
+ "shi","Tfng","MA",
+ "shk","Latn","ZZ",
+ "shn","Mymr","MM",
+ "shu","Arab","ZZ",
+ "si","Sinh","LK",
+ "sid","Latn","ET",
+ "sig","Latn","ZZ",
+ "sil","Latn","ZZ",
+ "sim","Latn","ZZ",
+ "sjr","Latn","ZZ",
+ "sk","Latn","SK",
+ "skc","Latn","ZZ",
+ "skr","Arab","PK",
+ "sks","Latn","ZZ",
+ "sl","Latn","SI",
+ "sld","Latn","ZZ",
+ "sli","Latn","PL",
+ "sll","Latn","ZZ",
+ "sly","Latn","ID",
+ "sm","Latn","WS",
+ "sma","Latn","SE",
+ "smj","Latn","SE",
+ "smn","Latn","FI",
+ "smp","Samr","IL",
+ "smq","Latn","ZZ",
+ "sms","Latn","FI",
+ "sn","Latn","ZW",
+ "snc","Latn","ZZ",
+ "snk","Latn","ML",
+ "snp","Latn","ZZ",
+ "snx","Latn","ZZ",
+ "sny","Latn","ZZ",
+ "so","Latn","SO",
+ "sog","Sogd","UZ",
+ "sok","Latn","ZZ",
+ "soq","Latn","ZZ",
+ "sou","Thai","TH",
+ "soy","Latn","ZZ",
+ "spd","Latn","ZZ",
+ "spl","Latn","ZZ",
+ "sps","Latn","ZZ",
+ "srb","Sora","IN",
+ "srn","Latn","SR",
+ "srr","Latn","SN",
+ "ss","Latn","ZA",
+ "ssd","Latn","ZZ",
+ "ssg","Latn","ZZ",
+ "ssy","Latn","ER",
+ "st","Latn","ZA",
+ "stk","Latn","ZZ",
+ "stq","Latn","DE",
+ "su","Latn","ID",
+ "sua","Latn","ZZ",
+ "sue","Latn","ZZ",
+ "suk","Latn","TZ",
+ "sur","Latn","ZZ",
+ "sus","Latn","GN",
+ "sv","Latn","SE",
+ "sw","Latn","TZ",
+ "swb","Arab","YT",
+ "swc","Latn","CD",
+ "swg","Latn","DE",
+ "swp","Latn","ZZ",
+ "swv","Deva","IN",
+ "sxn","Latn","ID",
+ "sxw","Latn","ZZ",
+ "syl","Beng","BD",
+ "szl","Latn","PL",
+ "ta","Taml","IN",
+ "taj","Deva","NP",
+ "tal","Latn","ZZ",
+ "tan","Latn","ZZ",
+ "taq","Latn","ZZ",
+ "tbc","Latn","ZZ",
+ "tbd","Latn","ZZ",
+ "tbf","Latn","ZZ",
+ "tbg","Latn","ZZ",
+ "tbo","Latn","ZZ",
+ "tbw","Latn","PH",
+ "tbz","Latn","ZZ",
+ "tci","Latn","ZZ",
+ "tcy","Knda","IN",
+ "tdd","Tale","CN",
+ "tdg","Deva","NP",
+ "tdh","Deva","NP",
+ "te","Telu","IN",
+ "ted","Latn","ZZ",
+ "tem","Latn","SL",
+ "teo","Latn","UG",
+ "tet","Latn","TL",
+ "tfi","Latn","ZZ",
+ "tg","Cyrl","TJ",
+ "tg","Arab","PK",
+ "tgc","Latn","ZZ",
+ "tgo","Latn","ZZ",
+ "tgu","Latn","ZZ",
+ "th","Thai","TH",
+ "thl","Deva","NP",
+ "thq","Deva","NP",
+ "thr","Deva","NP",
+ "ti","Ethi","ET",
+ "tif","Latn","ZZ",
+ "tig","Ethi","ER",
+ "tik","Latn","ZZ",
+ "tim","Latn","ZZ",
+ "tio","Latn","ZZ",
+ "tiv","Latn","NG",
+ "tk","Latn","TM",
+ "tkl","Latn","TK",
+ "tkr","Latn","AZ",
+ "tkt","Deva","NP",
+ "tl","Latn","PH",
+ "tlf","Latn","ZZ",
+ "tlx","Latn","ZZ",
+ "tly","Latn","AZ",
+ "tmh","Latn","NE",
+ "tmy","Latn","ZZ",
+ "tn","Latn","ZA",
+ "tnh","Latn","ZZ",
+ "to","Latn","TO",
+ "tof","Latn","ZZ",
+ "tog","Latn","MW",
+ "toq","Latn","ZZ",
+ "tpi","Latn","PG",
+ "tpm","Latn","ZZ",
+ "tpz","Latn","ZZ",
+ "tqo","Latn","ZZ",
+ "tr","Latn","TR",
+ "tru","Latn","TR",
+ "trv","Latn","TW",
+ "trw","Arab","ZZ",
+ "ts","Latn","ZA",
+ "tsd","Grek","GR",
+ "tsf","Deva","NP",
+ "tsg","Latn","PH",
+ "tsj","Tibt","BT",
+ "tsw","Latn","ZZ",
+ "tt","Cyrl","RU",
+ "ttd","Latn","ZZ",
+ "tte","Latn","ZZ",
+ "ttj","Latn","UG",
+ "ttr","Latn","ZZ",
+ "tts","Thai","TH",
+ "ttt","Latn","AZ",
+ "tuh","Latn","ZZ",
+ "tul","Latn","ZZ",
+ "tum","Latn","MW",
+ "tuq","Latn","ZZ",
+ "tvl","Latn","TV",
+ "tvu","Latn","ZZ",
+ "twh","Latn","ZZ",
+ "twq","Latn","NE",
+ "txg","Tang","CN",
+ "ty","Latn","PF",
+ "tya","Latn","ZZ",
+ "tyv","Cyrl","RU",
+ "tzm","Latn","MA",
+ "ubu","Latn","ZZ",
+ "udm","Cyrl","RU",
+ "ug","Arab","CN",
+ "ug","Cyrl","KZ",
+ "ug","Cyrl","MN",
+ "uga","Ugar","SY",
+ "uk","Cyrl","UA",
+ "uli","Latn","FM",
+ "umb","Latn","AO",
+ "en","Latn","NG",
+ "en","Latn","AU",
+ "es","Latn","MX",
+ "sw","Latn","CD",
+ "en","Latn","ZA",
+ "es","Latn","CU",
+ "en","Latn","PG",
+ "en","Latn","GU",
+ "uz","Latn","UZ",
+ "ar","Arab","SA",
+ "en","Latn","GB",
+ "es","Latn","419",
+ "pap","Latn","CW",
+ "ar","Arab","SD",
+ "ar","Arab","YE",
+ "ca","Latn","AD",
+ "ar","Arab","AE",
+ "fa","Arab","AF",
+ "pt","Latn","AO",
+ "und","Latn","AQ",
+ "es","Latn","AR",
+ "sm","Latn","AS",
+ "de","Latn","AT",
+ "nl","Latn","AW",
+ "sv","Latn","AX",
+ "nl","Latn","BE",
+ "fr","Latn","BF",
+ "ar","Arab","BH",
+ "fr","Latn","BJ",
+ "fr","Latn","BL",
+ "ms","Latn","BN",
+ "es","Latn","BO",
+ "pap","Latn","BQ",
+ "und","Latn","BV",
+ "fr","Latn","CF",
+ "fr","Latn","CG",
+ "de","Latn","CH",
+ "fr","Latn","CI",
+ "es","Latn","CL",
+ "fr","Latn","CM",
+ "es","Latn","CO",
+ "und","Latn","CP",
+ "es","Latn","CR",
+ "pt","Latn","CV",
+ "el","Grek","CY",
+ "aa","Latn","DJ",
+ "es","Latn","DO",
+ "ar","Arab","DZ",
+ "es","Latn","EA",
+ "es","Latn","EC",
+ "ar","Arab","EH",
+ "ti","Ethi","ER",
+ "de","Latn","EZ",
+ "fr","Latn","GA",
+ "fr","Latn","GF",
+ "fr","Latn","GN",
+ "fr","Latn","GP",
+ "es","Latn","GQ",
+ "und","Latn","GS",
+ "es","Latn","GT",
+ "pt","Latn","GW",
+ "und","Latn","HM",
+ "es","Latn","HN",
+ "es","Latn","IC",
+ "ar","Arab","IQ",
+ "ar","Arab","JO",
+ "sw","Latn","KE",
+ "ar","Arab","KM",
+ "ko","Kore","KP",
+ "ar","Arab","KW",
+ "ru","Cyrl","KZ",
+ "ar","Arab","LB",
+ "de","Latn","LI",
+ "st","Latn","LS",
+ "fr","Latn","LU",
+ "ar","Arab","LY",
+ "ar","Arab","MA",
+ "fr","Latn","MC",
+ "ro","Latn","MD",
+ "fr","Latn","MF",
+ "fr","Latn","MQ",
+ "ar","Arab","MR",
+ "pt","Latn","MZ",
+ "af","Latn","NA",
+ "fr","Latn","NC",
+ "ha","Latn","NE",
+ "es","Latn","NI",
+ "ar","Arab","OM",
+ "es","Latn","PA",
+ "es","Latn","PE",
+ "fr","Latn","PF",
+ "ur","Arab","PK",
+ "fr","Latn","PM",
+ "es","Latn","PR",
+ "ar","Arab","PS",
+ "pt","Latn","PT",
+ "ar","Arab","QA",
+ "en","Latn","DG",
+ "fr","Latn","RE",
+ "fr","Latn","SC",
+ "nb","Latn","SJ",
+ "it","Latn","SM",
+ "fr","Latn","SN",
+ "nl","Latn","SR",
+ "pt","Latn","ST",
+ "es","Latn","SV",
+ "ar","Arab","SY",
+ "fr","Latn","TD",
+ "fr","Latn","TF",
+ "fr","Latn","TG",
+ "pt","Latn","TL",
+ "ar","Arab","TN",
+ "sw","Latn","UG",
+ "es","Latn","UY",
+ "it","Latn","VA",
+ "vi","Latn","VN",
+ "es","Latn","VE",
+ "fr","Latn","WF",
+ "sq","Latn","XK",
+ "fr","Latn","YT",
+ "lez","Aghb","RU",
+ "ks","Arab","GB",
+ "ur","Arab","IN",
+ "ur","Arab","MU",
+ "ha","Arab","NG",
+ "fa","Arab","TJ",
+ "az","Arab","TR",
+ "ban","Bali","ID",
+ "bbc","Batk","ID",
+ "sa","Bhks","IN",
+ "fr","Brai","FR",
+ "bug","Bugi","ID",
+ "bku","Buhd","PH",
+ "xcr","Cari","TR",
+ "mk","Cyrl","AL",
+ "sr","Cyrl","BA",
+ "mk","Cyrl","GR",
+ "uk","Cyrl","MD",
+ "bg","Cyrl","RO",
+ "uk","Cyrl","SK",
+ "kbd","Cyrl","TR",
+ "sr","Cyrl","XK",
+ "ne","Deva","BT",
+ "hif","Deva","FJ",
+ "bho","Deva","MU",
+ "doi","Dogr","IN",
+ "fr","Dupl","FR",
+ "sq","Elba","AL",
+ "arc","Elym","IR",
+ "wsg","Gong","IN",
+ "sa","Gran","IN",
+ "ko","Hang","KR",
+ "zh","Hani","CN",
+ "hnn","Hano","PH",
+ "yi","Hebr","CA",
+ "yi","Hebr","GB",
+ "yi","Hebr","SE",
+ "yi","Hebr","UA",
+ "yi","Hebr","US",
+ "ja","Hira","JP",
+ "hu","Hung","HU",
+ "ko","Jamo","KR",
+ "jv","Java","ID",
+ "ja","Kana","JP",
+ "bho","Kthi","IN",
+ "en","Latn","ET",
+ "tk","Latn","AF",
+ "ku","Latn","AM",
+ "za","Latn","CN",
+ "tr","Latn","CY",
+ "fr","Latn","DZ",
+ "ku","Latn","GE",
+ "tk","Latn","IR",
+ "fr","Latn","KM",
+ "fr","Latn","MA",
+ "sq","Latn","MK",
+ "pt","Latn","MO",
+ "fr","Latn","MR",
+ "fr","Latn","SY",
+ "fr","Latn","TN",
+ "pl","Latn","UA",
+ "xlc","Lyci","TR",
+ "xld","Lydi","TR",
+ "hi","Mahj","IN",
+ "mak","Maka","ID",
+ "xmn","Mani","CN",
+ "bo","Marc","CN",
+ "men","Mend","SL",
+ "xmr","Merc","SD",
+ "xmr","Mero","SD",
+ "mr","Modi","IN",
+ "mni","Mtei","IN",
+ "skr","Mult","PK",
+ "mnw","Mymr","TH",
+ "sa","Nand","IN",
+ "xna","Narb","SA",
+ "new","Newa","NP",
+ "zhx","Nshu","CN",
+ "sat","Olck","IN",
+ "so","Osma","SO",
+ "kv","Perm","RU",
+ "lzh","Phag","CN",
+ "xpr","Prti","IR",
+ "rej","Rjng","ID",
+ "rhg","Rohg","MM",
+ "xsa","Sarb","YE",
+ "sa","Shrd","IN",
+ "sa","Sidd","IN",
+ "sog","Sogo","UZ",
+ "su","Sund","ID",
+ "syl","Sylo","BD",
+ "tbw","Tagb","PH",
+ "doi","Takr","IN",
+ "zgh","Tfng","MA",
+ "fil","Tglg","PH",
+ "kdt","Thai","KH",
+ "kdt","Thai","LA",
+ "mai","Tirh","IN",
+ "vai","Vaii","LR",
+ "hoc","Wara","IN",
+ "cmg","Zanb","MN",
+ "unr","Beng","IN",
+ "unr","Deva","NP",
+ "unx","Beng","IN",
+ "uri","Latn","ZZ",
+ "urt","Latn","ZZ",
+ "urw","Latn","ZZ",
+ "usa","Latn","ZZ",
+ "utr","Latn","ZZ",
+ "uvh","Latn","ZZ",
+ "uvl","Latn","ZZ",
+ "uz","Arab","AF",
+ "uz","Cyrl","CN",
+ "vag","Latn","ZZ",
+ "van","Latn","ZZ",
+ "ve","Latn","ZA",
+ "vec","Latn","IT",
+ "vep","Latn","RU",
+ "vic","Latn","SX",
+ "viv","Latn","ZZ",
+ "vls","Latn","BE",
+ "vmf","Latn","DE",
+ "vmw","Latn","MZ",
+ "vo","Latn","001",
+ "vot","Latn","RU",
+ "vro","Latn","EE",
+ "vun","Latn","TZ",
+ "vut","Latn","ZZ",
+ "wa","Latn","BE",
+ "wae","Latn","CH",
+ "waj","Latn","ZZ",
+ "wal","Ethi","ET",
+ "wan","Latn","ZZ",
+ "war","Latn","PH",
+ "wbp","Latn","AU",
+ "wbq","Telu","IN",
+ "wbr","Deva","IN",
+ "wci","Latn","ZZ",
+ "wer","Latn","ZZ",
+ "wgi","Latn","ZZ",
+ "whg","Latn","ZZ",
+ "wib","Latn","ZZ",
+ "wiu","Latn","ZZ",
+ "wiv","Latn","ZZ",
+ "wja","Latn","ZZ",
+ "wji","Latn","ZZ",
+ "wls","Latn","WF",
+ "wmo","Latn","ZZ",
+ "wnc","Latn","ZZ",
+ "wni","Arab","KM",
+ "wnu","Latn","ZZ",
+ "wo","Latn","SN",
+ "wob","Latn","ZZ",
+ "wos","Latn","ZZ",
+ "wrs","Latn","ZZ",
+ "wsk","Latn","ZZ",
+ "wtm","Deva","IN",
+ "wuu","Hans","CN",
+ "wuv","Latn","ZZ",
+ "wwa","Latn","ZZ",
+ "xav","Latn","BR",
+ "xbi","Latn","ZZ",
+ "xes","Latn","ZZ",
+ "xh","Latn","ZA",
+ "xla","Latn","ZZ",
+ "xmf","Geor","GE",
+ "xnr","Deva","IN",
+ "xog","Latn","UG",
+ "xon","Latn","ZZ",
+ "xrb","Latn","ZZ",
+ "xsi","Latn","ZZ",
+ "xsm","Latn","ZZ",
+ "xsr","Deva","NP",
+ "xwe","Latn","ZZ",
+ "yao","Latn","MZ",
+ "yap","Latn","FM",
+ "yas","Latn","ZZ",
+ "yat","Latn","ZZ",
+ "yav","Latn","CM",
+ "yay","Latn","ZZ",
+ "yaz","Latn","ZZ",
+ "yba","Latn","ZZ",
+ "ybb","Latn","CM",
+ "yby","Latn","ZZ",
+ "yer","Latn","ZZ",
+ "ygr","Latn","ZZ",
+ "ygw","Latn","ZZ",
+ "yko","Latn","ZZ",
+ "yle","Latn","ZZ",
+ "ylg","Latn","ZZ",
+ "yll","Latn","ZZ",
+ "yml","Latn","ZZ",
+ "yo","Latn","NG",
+ "yon","Latn","ZZ",
+ "yrb","Latn","ZZ",
+ "yre","Latn","ZZ",
+ "yrl","Latn","BR",
+ "yss","Latn","ZZ",
+ "yua","Latn","MX",
+ "yue","Hant","HK",
+ "yue","Hans","CN",
+ "yuj","Latn","ZZ",
+ "yut","Latn","ZZ",
+ "yuw","Latn","ZZ",
+ "zag","Latn","SD",
+ "zdj","Arab","KM",
+ "zea","Latn","NL",
+ "zia","Latn","ZZ",
+ "zlm","Latn","TG",
+ "zmi","Latn","MY",
+ "zne","Latn","ZZ",
+ "zu","Latn","ZA",
+ } // lsrs
+ } // likely
+ match{
+ trie:bin{ // BytesTrie: 1269 bytes
+00186dc27f73c16e778077a25f78aaa2
+79a25e7a01e82af51165ee35117ae820
+012a854811616e01f398f41248616e01
+f347f42007b314b32ab426b536b605b3
+c45ab329b429b62910b5292a2bb026b1
+22b205b3c446b329b429b62905b3c43c
+b329b429b6291348616ef43f12ef66f2
+3501e92aef1165ee351165ee34012a85
+48166562f24c6174ee35736074a29275
+02e74af2a275fa1172f534012a854c16
+6174ee437972ec35117ae8350bef30f4
+0af428f5a45ef71165ee35ef72f16ef2
+107301e829f220022a8543344c166174
+ee437972ec2b167972ec4c6174ee2be8
+24e82ee93eee1165ee350262a68a6822
+7310f2291165ee34012a855316696ee8
+4c6174ee35734ae1a62ee41165ee3401
+2a8541167261e24c6174ee3512f961e1
+2909e921e93ceba82aeea41aefa417f4
+1172f5351165ee34012a8545167468e9
+4c6174ee356ca86a75a8a1e12ee54ce7
+1172f5351165ee34012a855416616dec
+4c6174ee351165ee34012a855416656c
+f54c6174ee35705d704671a2537202ed
+30eea2c2f71166f2351164e5350363a8
+5be156f374f41170f4a28007b3c217b3
+a61bb4a618b5a615b601b129b6291165
+ee34012a8547167572f54c6174ee3511
+65ee34012a8541167261e24c6174ee35
+12f565f3356d5a6ea2626f02e3a800ed
+a264f21165ee34012a854f167279e14c
+6174ee3509ee21eea2d7f2a43bf34af4
+a244f91165ee34012a854d16796df24c
+6174ee351169e4356652e7a6c2e96eeb
+4eec1165ee34012a854d166c79ed4c61
+74ee3512e565ee351162e73506e52ae5
+32ee50ef5af91165ee351165ee34012a
+8544166576e14c6174ee35106e01e235
+ef350164446e10e223733e7944e20164
+2c6e01ee35ef2310e13112ef65ee3512
+ee65ee3567c0f46a946a8e6b946c06e2
+1ee250e7a292eea646ef1165ee34012a
+854c16616fef4c6174ee351164e5296f
+2e7534e11169f43512fa65ee3512e166
+f23512f669e43507ed31ed32ee50f56e
+f91172f5351165ee34012a854b16686d
+f24c6174ee351165ee34012a854b166e
+64e14c6174ee351174f2357234e13ae7
+a4ddeb1172f53512e965ee351165ee34
+012a854716656ff24c6174ee35673a68
+7c6902e126e722f31165ee3506e40de4
+40eca26aeea267f51168e935612e7334
+e11165ee3512e165ee3512f764e52904
+6154e1a28cf254f4a485f91172f53401
+2a854116726dee437972ec3512f765ee
+350162287310e82910f329649e643e65
+4a6601ef2af9116eec351164e13511e1
+6e01e231ef3105f343f32ef496f51165
+f3351165f3a28007b30cb342b43eb53a
+b601b129b6292a2bb026b14eb204b029
+b229b329b429b52904b029b229b329b4
+29b52901b129b6291166e935e526ee2a
+ef1165ee351165eea28007b317b362b4
+30b55ab601b229b62904b027b127b327
+b427b5272a2bb030b12cb201b229b629
+04b029b129b329b427b529617662a29c
+6305720f7230efa2b6f91165ee3512f3
+66f235653068386b12e261f23513e266
+69ec3512f265ee3507eb5beb4ef252f9
+a250fa1172f534012a854c166174ee43
+7972ec351165ee351161f2a28007b313
+b33cb426b522b605b422b429b529b629
+10b3292a2bb026b122b205b40fb429b5
+29b62905b406b429b529b629b029b129
+b2291165f3356334e13ae2a244e6116e
+ec3512e865ee35127373f92905ee24ee
+3af258f30168287310e82910f2291165
+ee34012a854216656ee74c6174ee3511
+66f235652ee534e81168e93512ed65ee
+351172f535
+ } // trie
+ regionToPartitions:bin{ // 1677 bytes
+000008090a00020000000b0009000201
+090001010a000a000000000000000a0c
+00000001010000000100000000000000
+000000000000010100000b0000000b00
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+0000000000000000000000000000000c
+010001000000000d0100000d01000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000900000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+000000000a0000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000000000000
+00000000000000000000000101010102
+00020000010102010001020301010002
+01000101020001010101010101000202
+01020002020201010201000102020001
+01000101010100010201010201000201
+00020102010101000000010100010000
+01010002000200000000000000000000
+04010002000100010400000000000000
+00000101010d00000000010000000000
+00000001010200010001000001000000
+00000100000105000201020101010000
+02010100020101020203000100020000
+00000000000000000006000102000000
+01000201000000000000000101010000
+00000000010101010001010101000000
+00000000000000010000000000000002
+00010100000000000000000000000000
+00010001010100000001020001000100
+00000001000201010102000000000001
+00010000000000000101010101000004
+00040001010102010300000101010106
+03020402010101010201010100010001
+01010002000001000001010001000101
+00000000010000000000000000000000
+00010000000000000000000000000002
+00000002010101000001010201000000
+07010100000100020001000000000000
+00000000000000010000000000010000
+00000000000000010000000000000000
+00010000000100010001000000010101
+01010001010101010101010100000201
+01010200020101010002010001010100
+01010101040101000100020001010000
+01010000000000010000000000030e00
+00000007000000000002010100020002
+00020007000000000100000000000001
+00000000000000000000010000000000
+00000000000000010000000000000000
+00000000000000000001000000000000
+00000000000000000000000001010000
+00000000000000000000000001010000
+00000001000000000000000000000001
+00000000010000000001000000
+ } // regionToPartitions
+ partitions{".","0","1","2","3","4","5","6","0123456","03","16","02","05","04","012346"}
+ paradigms{
+ "pt","Latn","BR",
+ "pt","Latn","PT",
+ "es","Latn","ES",
+ "es","Latn","419",
+ "en","Latn","US",
+ "en","Latn","GB",
+ }
+ distances:intvector{80,50,4,3}
+ } // match
+}
"\t-c or --copyright include copyright notice\n");
fprintf(stderr,
"\t-e or --encoding encoding of source files\n"
- "\t-d of --destdir destination directory, followed by the path, defaults to %s\n"
- "\t-s or --sourcedir source directory for files followed by path, defaults to %s\n"
+ "\t-d or --destdir destination directory, followed by the path, defaults to '%s'\n"
+ "\t-s or --sourcedir source directory for files followed by path, defaults to '%s'\n"
"\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
- "\t followed by path, defaults to %s\n",
+ "\t followed by path, defaults to '%s'\n",
u_getDataDirectory(), u_getDataDirectory(), u_getDataDirectory());
fprintf(stderr,
"\t-j or --write-java write a Java ListResourceBundle for ICU4J, followed by optional encoding\n"
}
}
-static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
+static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment,
+ int32_t &stringLength, UErrorCode *status)
{
struct UString *tokenValue;
char *result;
- uint32_t count;
expect(state, TOK_STRING, &tokenValue, comment, line, status);
return NULL;
}
- count = u_strlen(tokenValue->fChars);
- if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
+ if(!uprv_isInvariantUString(tokenValue->fChars, tokenValue->fLength)) {
*status = U_INVALID_FORMAT_ERROR;
error(*line, "invariant characters required for table keys, binary data, etc.");
return NULL;
}
- result = static_cast<char *>(uprv_malloc(count+1));
+ result = static_cast<char *>(uprv_malloc(tokenValue->fLength+1));
if (result == NULL)
{
return NULL;
}
- u_UCharsToChars(tokenValue->fChars, result, count+1);
+ u_UCharsToChars(tokenValue->fChars, result, tokenValue->fLength+1);
+ stringLength = tokenValue->fLength;
return result;
}
int32_t value;
UBool readToken = FALSE;
char *stopstring;
- uint32_t len;
struct UString memberComments;
IntVectorResource *result = intvector_open(state->bundle, tag, comment, status);
return result;
}
- string = getInvariantString(state, NULL, NULL, status);
+ int32_t stringLength;
+ string = getInvariantString(state, NULL, NULL, stringLength, status);
if (U_FAILURE(*status))
{
/* For handling illegal char in the Intvector */
value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
- len=(uint32_t)(stopstring-string);
+ int32_t len = (int32_t)(stopstring-string);
- if(len==uprv_strlen(string))
+ if(len==stringLength)
{
result->add(value, *status);
uprv_free(string);
parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
{
uint32_t line;
- LocalMemory<char> string(getInvariantString(state, &line, NULL, status));
+ int32_t stringLength;
+ LocalMemory<char> string(getInvariantString(state, &line, NULL, stringLength, status));
if (string.isNull() || U_FAILURE(*status))
{
return NULL;
printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
- uint32_t count = (uint32_t)uprv_strlen(string.getAlias());
- if (count > 0){
- if((count % 2)==0){
- LocalMemory<uint8_t> value;
- if (value.allocateInsteadAndCopy(count) == NULL)
- {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
-
- char toConv[3] = {'\0', '\0', '\0'};
- for (uint32_t i = 0; i < count; i += 2)
- {
- toConv[0] = string[i];
- toConv[1] = string[i + 1];
+ LocalMemory<uint8_t> value;
+ int32_t count = 0;
+ if (stringLength > 0 && value.allocateInsteadAndCopy(stringLength) == NULL)
+ {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
- char *stopstring;
- value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
- uint32_t len=(uint32_t)(stopstring-toConv);
+ char toConv[3] = {'\0', '\0', '\0'};
+ for (int32_t i = 0; i < stringLength;)
+ {
+ // Skip spaces (which may have been line endings).
+ char c0 = string[i++];
+ if (c0 == ' ') { continue; }
+ if (i == stringLength) {
+ *status=U_INVALID_CHAR_FOUND;
+ error(line, "Encountered invalid binary value (odd number of hex digits)");
+ return NULL;
+ }
+ toConv[0] = c0;
+ toConv[1] = string[i++];
- if(len!=2)
- {
- *status=U_INVALID_CHAR_FOUND;
- return NULL;
- }
- }
+ char *stopstring;
+ value[count++] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
+ uint32_t len=(uint32_t)(stopstring-toConv);
- return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status);
- }
- else
+ if(len!=2)
{
- *status = U_INVALID_CHAR_FOUND;
- error(line, "Encountered invalid binary value (length is odd)");
+ *status=U_INVALID_CHAR_FOUND;
+ error(line, "Encountered invalid binary value (not all pairs of hex digits)");
return NULL;
}
}
- else
- {
+
+ if (count == 0) {
warning(startline, "Encountered empty binary value");
return bin_open(state->bundle, tag, 0, NULL, "", comment, status);
+ } else {
+ return bin_open(state->bundle, tag, count, value.getAlias(), NULL, comment, status);
}
}
int32_t value;
char *string;
char *stopstring;
- uint32_t len;
- string = getInvariantString(state, NULL, NULL, status);
+ int32_t stringLength;
+ string = getInvariantString(state, NULL, NULL, stringLength, status);
if (string == NULL || U_FAILURE(*status))
{
printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
- if (uprv_strlen(string) <= 0)
+ if (stringLength == 0)
{
warning(startline, "Encountered empty integer. Default value is 0.");
}
/* Allow integer support for hexdecimal, octal digit and decimal*/
/* and handle illegal char in the integer*/
value = uprv_strtoul(string, &stopstring, 0);
- len=(uint32_t)(stopstring-string);
- if(len==uprv_strlen(string))
+ int32_t len = (int32_t)(stopstring-string);
+ if(len==stringLength)
{
result = int_open(state->bundle, tag, value, comment, status);
}
parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
{
uint32_t line;
- LocalMemory<char> filename(getInvariantString(state, &line, NULL, status));
+ int32_t stringLength;
+ LocalMemory<char> filename(getInvariantString(state, &line, NULL, stringLength, status));
if (U_FAILURE(*status))
{
return NULL;
UCHARBUF *ucbuf;
char *fullname = NULL;
- int32_t count = 0;
const char* cp = NULL;
const UChar* uBuffer = NULL;
- filename = getInvariantString(state, &line, NULL, status);
- count = (int32_t)uprv_strlen(filename);
+ int32_t stringLength;
+ filename = getInvariantString(state, &line, NULL, stringLength, status);
if (U_FAILURE(*status))
{
printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
- fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
+ fullname = (char *) uprv_malloc(state->inputdirLength + stringLength + 2);
/* test for NULL */
if(fullname == NULL)
{
<pathelement location="${icu4j.regiondata.jar}"/>
<pathelement location="${icu4j.translit.jar}"/>
<pathelement location="${icu4j.test-framework.jar}"/>
+ <pathelement location="${icu4j.tools.jar}"/>
<pathelement location="${icu4j.core-tests.jar}"/>
<pathelement location="${icu4j.collate-tests.jar}"/>
<pathelement location="${icu4j.charset-tests.jar}"/>
</ant>
</target>
- <target name="core-tests" depends="core, test-framework" description="Build core tests">
+ <target name="core-tests" depends="core, test-framework, tools" description="Build core tests">
<ant dir="${icu4j.core-tests.dir}" inheritAll="false">
<reference refid="junit.jars"/>
</ant>
<ant dir="${icu4j.build-tools.dir}" inheritAll="false"/>
</target>
- <target name="tools" depends="core, core-tests, collate, translit, translit-tests" description="Build tool classes">
+ <target name="tools" depends="core, collate, translit" description="Build tool classes">
<ant dir="${icu4j.tools.dir}" inheritAll="false"/>
</target>
import java.util.Objects;
-final class LSR {
- static final int REGION_INDEX_LIMIT = 1001 + 26 * 26;
+public final class LSR {
+ public static final int REGION_INDEX_LIMIT = 1001 + 26 * 26;
- static final boolean DEBUG_OUTPUT = false;
+ public static final boolean DEBUG_OUTPUT = false;
- final String language;
- final String script;
- final String region;
+ public final String language;
+ public final String script;
+ public final String region;
/** Index for region, negative if ill-formed. @see indexForRegion */
final int regionIndex;
- LSR(String language, String script, String region) {
+ public LSR(String language, String script, String region) {
this.language = language;
this.script = script;
this.region = region;
* Do not rely on a particular region->index mapping; it may change.
* Returns 0 for ill-formed strings.
*/
- static final int indexForRegion(String region) {
+ public static final int indexForRegion(String region) {
if (region.length() == 2) {
int a = region.charAt(0) - 'A';
if (a < 0 || 25 < a) { return 0; }
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
import java.util.Map;
+import java.util.MissingResourceException;
import java.util.Set;
import java.util.TreeMap;
-import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
+import com.ibm.icu.impl.ICUData;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.UResource;
import com.ibm.icu.util.BytesTrie;
+import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
import com.ibm.icu.util.ULocale;
/**
*/
public class LocaleDistance {
/** Distance value bit flag, set by the builder. */
- static final int DISTANCE_SKIP_SCRIPT = 0x80;
+ public static final int DISTANCE_SKIP_SCRIPT = 0x80;
/** Distance value bit flag, set by trieNext(). */
private static final int DISTANCE_IS_FINAL = 0x100;
private static final int DISTANCE_IS_FINAL_OR_SKIP_SCRIPT =
DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
// Indexes into array of distances.
- static final int IX_DEF_LANG_DISTANCE = 0;
- static final int IX_DEF_SCRIPT_DISTANCE = 1;
- static final int IX_DEF_REGION_DISTANCE = 2;
- static final int IX_MIN_REGION_DISTANCE = 3;
- static final int IX_LIMIT = 4;
+ public static final int IX_DEF_LANG_DISTANCE = 0;
+ public static final int IX_DEF_SCRIPT_DISTANCE = 1;
+ public static final int IX_DEF_REGION_DISTANCE = 2;
+ public static final int IX_MIN_REGION_DISTANCE = 3;
+ public static final int IX_LIMIT = 4;
private static final int ABOVE_THRESHOLD = 100;
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
private final int minRegionDistance;
private final int defaultDemotionPerDesiredLocale;
- // TODO: Load prebuilt data from a resource bundle
- // to avoid the dependency on the builder code.
// VisibleForTesting
- public static final LocaleDistance INSTANCE = LocaleDistanceBuilder.build();
-
- LocaleDistance(BytesTrie trie,
- byte[] regionToPartitionsIndex, String[] partitionArrays,
- Set<LSR> paradigmLSRs, int[] distances) {
- this.trie = trie;
- this.regionToPartitionsIndex = regionToPartitionsIndex;
- this.partitionArrays = partitionArrays;
- this.paradigmLSRs = paradigmLSRs;
- defaultLanguageDistance = distances[IX_DEF_LANG_DISTANCE];
- defaultScriptDistance = distances[IX_DEF_SCRIPT_DISTANCE];
- defaultRegionDistance = distances[IX_DEF_REGION_DISTANCE];
- this.minRegionDistance = distances[IX_MIN_REGION_DISTANCE];
+ public static final class Data {
+ public byte[] trie;
+ public byte[] regionToPartitionsIndex;
+ public String[] partitionArrays;
+ public Set<LSR> paradigmLSRs;
+ public int[] distances;
+
+ public Data(byte[] trie,
+ byte[] regionToPartitionsIndex, String[] partitionArrays,
+ Set<LSR> paradigmLSRs, int[] distances) {
+ this.trie = trie;
+ this.regionToPartitionsIndex = regionToPartitionsIndex;
+ this.partitionArrays = partitionArrays;
+ this.paradigmLSRs = paradigmLSRs;
+ this.distances = distances;
+ }
+
+ private static UResource.Value getValue(UResource.Table table,
+ String key, UResource.Value value) {
+ if (!table.findValue(key, value)) {
+ throw new MissingResourceException(
+ "langInfo.res missing data", "", "match/" + key);
+ }
+ return value;
+ }
+
+ // VisibleForTesting
+ public static Data load() throws MissingResourceException {
+ ICUResourceBundle langInfo = ICUResourceBundle.getBundleInstance(
+ ICUData.ICU_BASE_NAME, "langInfo",
+ ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
+ UResource.Value value = langInfo.getValueWithFallback("match");
+ UResource.Table matchTable = value.getTable();
+
+ ByteBuffer buffer = getValue(matchTable, "trie", value).getBinary();
+ byte[] trie = new byte[buffer.remaining()];
+ buffer.get(trie);
+
+ buffer = getValue(matchTable, "regionToPartitions", value).getBinary();
+ byte[] regionToPartitions = new byte[buffer.remaining()];
+ buffer.get(regionToPartitions);
+ if (regionToPartitions.length < LSR.REGION_INDEX_LIMIT) {
+ throw new MissingResourceException(
+ "langInfo.res binary data too short", "", "match/regionToPartitions");
+ }
+
+ String[] partitions = getValue(matchTable, "partitions", value).getStringArray();
+
+ Set<LSR> paradigmLSRs;
+ if (matchTable.findValue("paradigms", value)) {
+ String[] paradigms = value.getStringArray();
+ paradigmLSRs = new HashSet<>(paradigms.length / 3);
+ for (int i = 0; i < paradigms.length; i += 3) {
+ paradigmLSRs.add(new LSR(paradigms[i], paradigms[i + 1], paradigms[i + 2]));
+ }
+ } else {
+ paradigmLSRs = Collections.emptySet();
+ }
+
+ int[] distances = getValue(matchTable, "distances", value).getIntVector();
+ if (distances.length < IX_LIMIT) {
+ throw new MissingResourceException(
+ "langInfo.res intvector too short", "", "match/distances");
+ }
+
+ return new Data(trie, regionToPartitions, partitions, paradigmLSRs, distances);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this == other) { return true; }
+ if (!getClass().equals(other.getClass())) { return false; }
+ Data od = (Data)other;
+ return Arrays.equals(trie, od.trie) &&
+ Arrays.equals(regionToPartitionsIndex, od.regionToPartitionsIndex) &&
+ Arrays.equals(partitionArrays, od.partitionArrays) &&
+ paradigmLSRs.equals(od.paradigmLSRs) &&
+ Arrays.equals(distances, od.distances);
+ }
+ }
+
+ // VisibleForTesting
+ public static final LocaleDistance INSTANCE = new LocaleDistance(Data.load());
+
+ private LocaleDistance(Data data) {
+ this.trie = new BytesTrie(data.trie, 0);
+ this.regionToPartitionsIndex = data.regionToPartitionsIndex;
+ this.partitionArrays = data.partitionArrays;
+ this.paradigmLSRs = data.paradigmLSRs;
+ defaultLanguageDistance = data.distances[IX_DEF_LANG_DISTANCE];
+ defaultScriptDistance = data.distances[IX_DEF_SCRIPT_DISTANCE];
+ defaultRegionDistance = data.distances[IX_DEF_REGION_DISTANCE];
+ this.minRegionDistance = data.distances[IX_MIN_REGION_DISTANCE];
LSR en = new LSR("en", "Latn", "US");
LSR enGB = new LSR("en", "Latn", "GB");
* (negative if none has a distance below the threshold),
* and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
*/
- int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
+ public int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
int threshold, FavorSubtag favorSubtag) {
BytesTrie iter = new BytesTrie(trie);
// Look up the desired language only once for all supported LSRs.
return partitionArrays[pIndex];
}
- boolean isParadigmLSR(LSR lsr) {
+ public boolean isParadigmLSR(LSR lsr) {
return paradigmLSRs.contains(lsr);
}
return defaultRegionDistance;
}
- int getDefaultDemotionPerDesiredLocale() {
+ public int getDefaultDemotionPerDesiredLocale() {
return defaultDemotionPerDesiredLocale;
}
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
+import java.util.MissingResourceException;
import java.util.TreeMap;
+import com.ibm.icu.impl.ICUData;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.UResource;
import com.ibm.icu.util.BytesTrie;
import com.ibm.icu.util.ULocale;
private static final String PSEUDO_BIDI_PREFIX = "+"; // -XB, -PSBIDI
private static final String PSEUDO_CRACKED_PREFIX = ","; // -XC, -PSCRACK
- static final int SKIP_SCRIPT = 1;
+ public static final int SKIP_SCRIPT = 1;
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
- // TODO: Load prebuilt data from a resource bundle
- // to avoid the dependency on the builder code.
// VisibleForTesting
- public static final XLikelySubtags INSTANCE = new XLikelySubtags(LikelySubtagsBuilder.build());
-
- static final class Data {
- private final Map<String, String> languageAliases;
- private final Map<String, String> regionAliases;
- private final BytesTrie trie;
- private final LSR[] lsrs;
-
- Data(Map<String, String> languageAliases, Map<String, String> regionAliases,
- BytesTrie trie, LSR[] lsrs) {
+ public static final class Data {
+ public final Map<String, String> languageAliases;
+ public final Map<String, String> regionAliases;
+ public final byte[] trie;
+ public final LSR[] lsrs;
+
+ public Data(Map<String, String> languageAliases, Map<String, String> regionAliases,
+ byte[] trie, LSR[] lsrs) {
this.languageAliases = languageAliases;
this.regionAliases = regionAliases;
this.trie = trie;
this.lsrs = lsrs;
}
+
+ private static UResource.Value getValue(UResource.Table table,
+ String key, UResource.Value value) {
+ if (!table.findValue(key, value)) {
+ throw new MissingResourceException(
+ "langInfo.res missing data", "", "likely/" + key);
+ }
+ return value;
+ }
+
+ // VisibleForTesting
+ public static Data load() throws MissingResourceException {
+ ICUResourceBundle langInfo = ICUResourceBundle.getBundleInstance(
+ ICUData.ICU_BASE_NAME, "langInfo",
+ ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
+ UResource.Value value = langInfo.getValueWithFallback("likely");
+ UResource.Table likelyTable = value.getTable();
+
+ Map<String, String> languageAliases;
+ if (likelyTable.findValue("languageAliases", value)) {
+ String[] pairs = value.getStringArray();
+ languageAliases = new HashMap<>(pairs.length / 2);
+ for (int i = 0; i < pairs.length; i += 2) {
+ languageAliases.put(pairs[i], pairs[i + 1]);
+ }
+ } else {
+ languageAliases = Collections.emptyMap();
+ }
+
+ Map<String, String> regionAliases;
+ if (likelyTable.findValue("regionAliases", value)) {
+ String[] pairs = value.getStringArray();
+ regionAliases = new HashMap<>(pairs.length / 2);
+ for (int i = 0; i < pairs.length; i += 2) {
+ regionAliases.put(pairs[i], pairs[i + 1]);
+ }
+ } else {
+ regionAliases = Collections.emptyMap();
+ }
+
+ ByteBuffer buffer = getValue(likelyTable, "trie", value).getBinary();
+ byte[] trie = new byte[buffer.remaining()];
+ buffer.get(trie);
+
+ String[] lsrSubtags = getValue(likelyTable, "lsrs", value).getStringArray();
+ LSR[] lsrs = new LSR[lsrSubtags.length / 3];
+ for (int i = 0, j = 0; i < lsrSubtags.length; i += 3, ++j) {
+ lsrs[j] = new LSR(lsrSubtags[i], lsrSubtags[i + 1], lsrSubtags[i + 2]);
+ }
+
+ return new Data(languageAliases, regionAliases, trie, lsrs);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this == other) { return true; }
+ if (!getClass().equals(other.getClass())) { return false; }
+ Data od = (Data)other;
+ return
+ languageAliases.equals(od.languageAliases) &&
+ regionAliases.equals(od.regionAliases) &&
+ Arrays.equals(trie, od.trie) &&
+ Arrays.equals(lsrs, od.lsrs);
+ }
}
+ // VisibleForTesting
+ public static final XLikelySubtags INSTANCE = new XLikelySubtags(Data.load());
+
private final Map<String, String> languageAliases;
private final Map<String, String> regionAliases;
private XLikelySubtags(XLikelySubtags.Data data) {
languageAliases = data.languageAliases;
regionAliases = data.regionAliases;
- trie = data.trie;
+ trie = new BytesTrie(data.trie, 0);
lsrs = data.lsrs;
// Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
}
}
+ /**
+ * Implementation of LocaleMatcher.canonicalize(ULocale).
+ */
+ public ULocale canonicalize(ULocale locale) {
+ String lang = locale.getLanguage();
+ String lang2 = languageAliases.get(lang);
+ String region = locale.getCountry();
+ String region2 = regionAliases.get(region);
+ if (lang2 != null || region2 != null) {
+ return new ULocale(
+ lang2 == null ? lang : lang2,
+ locale.getScript(),
+ region2 == null ? region : region2);
+ }
+ return locale;
+ }
+
private static String getCanonical(Map<String, String> aliases, String alias) {
String canonical = aliases.get(alias);
return canonical == null ? alias : canonical;
locale.getVariant());
}
- LSR makeMaximizedLsrFrom(Locale locale) {
+ public LSR makeMaximizedLsrFrom(Locale locale) {
String tag = locale.toLanguageTag();
if (tag.startsWith("x-")) {
// Private use language tag x-subtag-subtag...
+++ /dev/null
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.locale;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Objects;
-
-import com.ibm.icu.util.LocalePriorityList;
-import com.ibm.icu.util.ULocale;
-
-/**
- * Immutable class that picks the best match between a user's desired locales and
- * and application's supported locales.
- *
- * <p>If there are multiple supported locales with the same (language, script, region)
- * likely subtags, then the current implementation returns the first of those locales.
- * It ignores variant subtags (except for pseudolocale variants) and extensions.
- * This may change in future versions.
- *
- * <p>For example, the current implementation does not distinguish between
- * de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
- *
- * <p>If you prefer one equivalent locale over another, then provide only the preferred one,
- * or place it earlier in the list of supported locales.
- *
- * <p>Otherwise, the order of supported locales may have no effect on the best-match results.
- * The current implementation compares each desired locale with supported locales
- * in the following order:
- * 1. Default locale, if supported;
- * 2. CLDR "paradigm locales" like en-GB and es-419;
- * 3. other supported locales.
- * This may change in future versions.
- *
- * <p>TODO: Migration notes.
- *
- * @author markdavis
- */
-public final class XLocaleMatcher {
- private static final LSR UND_LSR = new LSR("und","","");
- private static final ULocale UND_ULOCALE = new ULocale("und");
- private static final Locale UND_LOCALE = new Locale("und");
-
- // Activates debugging output to stderr with details of GetBestMatch.
- private static final boolean TRACE_MATCHER = false;
-
- private static abstract class LsrIterator implements Iterator<LSR> {
- int bestDesiredIndex = -1;
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException();
- }
-
- public abstract void rememberCurrent(int desiredIndex);
- }
-
- /**
- * Builder option for whether the language subtag or the script subtag is most important.
- *
- * @see Builder#setFavorSubtag(FavorSubtag)
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public enum FavorSubtag {
- /**
- * Language differences are most important, then script differences, then region differences.
- * (This is the default behavior.)
- *
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- LANGUAGE,
- /**
- * Makes script differences matter relatively more than language differences.
- *
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- SCRIPT
- }
-
- /**
- * Builder option for whether all desired locales are treated equally or
- * earlier ones are preferred.
- *
- * @see Builder#setDemotionPerDesiredLocale(Demotion)
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public enum Demotion {
- /**
- * All desired locales are treated equally.
- *
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- NONE,
- /**
- * Earlier desired locales are preferred.
- *
- * <p>From each desired locale to the next,
- * the distance to any supported locale is increased by an additional amount
- * which is at least as large as most region mismatches.
- * A later desired locale has to have a better match with some supported locale
- * due to more than merely having the same region subtag.
- *
- * <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code>
- * yields <code>Result(en-GB, en)</code> because
- * with the demotion of sv its perfect match is no better than
- * the region distance between the earlier desired locale en-GB and en=en-US.
- *
- * <p>Notes:
- * <ul>
- * <li>In some cases, language and/or script differences can be as small as
- * the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
- * <li>It is possible for certain region differences to be larger than usual,
- * and larger than the demotion.
- * (As of CLDR 35 there is no such case, but
- * this is possible in future versions of the data.)
- * </ul>
- *
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- REGION
- }
-
- /**
- * Data for the best-matching pair of a desired and a supported locale.
- *
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public static final class Result {
- private final ULocale desiredULocale;
- private final ULocale supportedULocale;
- private final Locale desiredLocale;
- private final Locale supportedLocale;
- private final int desiredIndex;
- private final int supportedIndex;
-
- private Result(ULocale udesired, ULocale usupported,
- Locale desired, Locale supported,
- int desIndex, int suppIndex) {
- desiredULocale = udesired;
- supportedULocale = usupported;
- desiredLocale = desired;
- supportedLocale = supported;
- desiredIndex = desIndex;
- supportedIndex = suppIndex;
- }
-
- /**
- * Returns the best-matching desired locale.
- * null if the list of desired locales is empty or if none matched well enough.
- *
- * @return the best-matching desired locale, or null.
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public ULocale getDesiredULocale() {
- return desiredULocale == null && desiredLocale != null ?
- ULocale.forLocale(desiredLocale) : desiredULocale;
- }
- /**
- * Returns the best-matching desired locale.
- * null if the list of desired locales is empty or if none matched well enough.
- *
- * @return the best-matching desired locale, or null.
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public Locale getDesiredLocale() {
- return desiredLocale == null && desiredULocale != null ?
- desiredULocale.toLocale() : desiredLocale;
- }
-
- /**
- * Returns the best-matching supported locale.
- * If none matched well enough, this is the default locale.
- * The default locale is null if the list of supported locales is empty and
- * no explicit default locale is set.
- *
- * @return the best-matching supported locale, or null.
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public ULocale getSupportedULocale() { return supportedULocale; }
- /**
- * Returns the best-matching supported locale.
- * If none matched well enough, this is the default locale.
- * The default locale is null if the list of supported locales is empty and
- * no explicit default locale is set.
- *
- * @return the best-matching supported locale, or null.
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public Locale getSupportedLocale() { return supportedLocale; }
-
- /**
- * Returns the index of the best-matching desired locale in the input Iterable order.
- * -1 if the list of desired locales is empty or if none matched well enough.
- *
- * @return the index of the best-matching desired locale, or -1.
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public int getDesiredIndex() { return desiredIndex; }
-
- /**
- * Returns the index of the best-matching supported locale in the constructor’s or builder’s input order
- * (“set” Collection plus “added” locales).
- * If the matcher was built from a locale list string, then the iteration order is that
- * of a LocalePriorityList built from the same string.
- * -1 if the list of supported locales is empty or if none matched well enough.
- *
- * @return the index of the best-matching supported locale, or -1.
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public int getSupportedIndex() { return supportedIndex; }
-
- /**
- * Takes the best-matching supported locale and adds relevant fields of the
- * best-matching desired locale, such as the -t- and -u- extensions.
- * May replace some fields of the supported locale.
- * The result is the locale that should be used for date and number formatting, collation, etc.
- *
- * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
- *
- * @return the service locale, combining the best-matching desired and supported locales.
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public ULocale makeServiceULocale() {
- ULocale bestDesired = getDesiredULocale();
- ULocale serviceLocale = supportedULocale;
- if (!serviceLocale.equals(bestDesired) && bestDesired != null) {
- ULocale.Builder b = new ULocale.Builder().setLocale(serviceLocale);
-
- // Copy the region from bestDesired, if there is one.
- // TODO: Seems wrong to clobber serviceLocale.getCountry() if that is not empty.
- String region = bestDesired.getCountry();
- if (!region.isEmpty()) {
- b.setRegion(region);
- }
-
- // Copy the variants from bestDesired, if there are any.
- // Note that this will override any serviceLocale variants.
- // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
- // TODO: Why replace? Why not append?
- String variants = bestDesired.getVariant();
- if (!variants.isEmpty()) {
- b.setVariant(variants);
- }
-
- // Copy the extensions from bestDesired, if there are any.
- // Note that this will override any serviceLocale extensions.
- // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
- // (replacing calendar).
- // TODO: Maybe enumerate -u- keys to not replace others in the serviceLocale??
- // (Unsure about this one.)
- for (char extensionKey : bestDesired.getExtensionKeys()) {
- b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
- }
- serviceLocale = b.build();
- }
- return serviceLocale;
- }
-
- /**
- * Takes the best-matching supported locale and adds relevant fields of the
- * best-matching desired locale, such as the -t- and -u- extensions.
- * May replace some fields of the supported locale.
- * The result is the locale that should be used for date and number formatting, collation, etc.
- *
- * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
- *
- * @return the service locale, combining the best-matching desired and supported locales.
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public Locale makeServiceLocale() {
- return makeServiceULocale().toLocale();
- }
- }
-
- private final int thresholdDistance;
- private final int demotionPerDesiredLocale;
- private final FavorSubtag favorSubtag;
-
- // These are in input order.
- private final ULocale[] supportedULocales;
- private final Locale[] supportedLocales;
- // These are in preference order: 1. Default locale 2. paradigm locales 3. others.
- private final Map<LSR, Integer> supportedLsrToIndex;
- // Array versions of the supportedLsrToIndex keys and values.
- // The distance lookup loops over the supportedLsrs and returns the index of the best match.
- private final LSR[] supportedLsrs;
- private final int[] supportedIndexes;
- private final ULocale defaultULocale;
- private final Locale defaultLocale;
- private final int defaultLocaleIndex;
-
- /**
- * LocaleMatcher Builder.
- *
- * @see XLocaleMatcher#builder()
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public static class Builder {
- private List<ULocale> supportedLocales;
- private int thresholdDistance = -1;
- private Demotion demotion;
- private ULocale defaultLocale;
- private FavorSubtag favor;
-
- /**
- * Parses the string like {@link LocalePriorityList} does and
- * sets the supported locales accordingly.
- * Clears any previously set/added supported locales first.
- *
- * @param locales the languagePriorityList to set
- * @return this Builder object
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public Builder setSupportedLocales(String locales) {
- return setSupportedULocales(LocalePriorityList.add(locales).build().getULocales());
- }
-
- /**
- * Copies the supported locales, preserving iteration order.
- * Clears any previously set/added supported locales first.
- * Duplicates are allowed, and are not removed.
- *
- * @param locales the list of locale
- * @return this Builder object
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public Builder setSupportedULocales(Collection<ULocale> locales) {
- supportedLocales = new ArrayList<>(locales);
- return this;
- }
-
- /**
- * Copies the supported locales, preserving iteration order.
- * Clears any previously set/added supported locales first.
- * Duplicates are allowed, and are not removed.
- *
- * @param locales the list of locale
- * @return this Builder object
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public Builder setSupportedLocales(Collection<Locale> locales) {
- supportedLocales = new ArrayList<>(locales.size());
- for (Locale locale : locales) {
- supportedLocales.add(ULocale.forLocale(locale));
- }
- return this;
- }
-
- /**
- * Adds another supported locale.
- * Duplicates are allowed, and are not removed.
- *
- * @param locale the list of locale
- * @return this Builder object
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public Builder addSupportedULocale(ULocale locale) {
- if (supportedLocales == null) {
- supportedLocales = new ArrayList<>();
- }
- supportedLocales.add(locale);
- return this;
- }
-
- /**
- * Adds another supported locale.
- * Duplicates are allowed, and are not removed.
- *
- * @param locale the list of locale
- * @return this Builder object
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public Builder addSupportedLocale(Locale locale) {
- return addSupportedULocale(ULocale.forLocale(locale));
- }
-
- /**
- * Sets the default locale; if null, or if it is not set explicitly,
- * then the first supported locale is used as the default locale.
- *
- * @param defaultLocale the default locale
- * @return this Builder object
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public Builder setDefaultULocale(ULocale defaultLocale) {
- this.defaultLocale = defaultLocale;
- return this;
- }
-
- /**
- * Sets the default locale; if null, or if it is not set explicitly,
- * then the first supported locale is used as the default locale.
- *
- * @param defaultLocale the default locale
- * @return this Builder object
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public Builder setDefaultLocale(Locale defaultLocale) {
- this.defaultLocale = ULocale.forLocale(defaultLocale);
- return this;
- }
-
- /**
- * If SCRIPT, then the language differences are smaller than script differences.
- * This is used in situations (such as maps) where
- * it is better to fall back to the same script than a similar language.
- *
- * @param subtag the subtag to favor
- * @return this Builder object
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public Builder setFavorSubtag(FavorSubtag subtag) {
- this.favor = subtag;
- return this;
- }
-
- /**
- * Option for whether all desired locales are treated equally or
- * earlier ones are preferred (this is the default).
- *
- * @param demotion the demotion per desired locale to set.
- * @return this Builder object
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public Builder setDemotionPerDesiredLocale(Demotion demotion) {
- this.demotion = demotion;
- return this;
- }
-
- /**
- * <i>Internal only!</i>
- *
- * @param thresholdDistance the thresholdDistance to set, with -1 = default
- * @return this Builder object
- * @internal
- * @deprecated This API is ICU internal only.
- */
- @Deprecated
- public Builder internalSetThresholdDistance(int thresholdDistance) {
- if (thresholdDistance > 100) {
- thresholdDistance = 100;
- }
- this.thresholdDistance = thresholdDistance;
- return this;
- }
-
- /**
- * Builds and returns a new locale matcher.
- * This builder can continue to be used.
- *
- * @return new XLocaleMatcher.
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public XLocaleMatcher build() {
- return new XLocaleMatcher(this);
- }
-
- @Override
- public String toString() {
- StringBuilder s = new StringBuilder().append("{XLocaleMatcher.Builder");
- if (!supportedLocales.isEmpty()) {
- s.append(" supported={").append(supportedLocales.toString()).append('}');
- }
- if (defaultLocale != null) {
- s.append(" default=").append(defaultLocale.toString());
- }
- if (favor != null) {
- s.append(" distance=").append(favor.toString());
- }
- if (thresholdDistance >= 0) {
- s.append(String.format(" threshold=%d", thresholdDistance));
- }
- if (demotion != null) {
- s.append(" demotion=").append(demotion.toString());
- }
- return s.append('}').toString();
- }
- }
-
- /**
- * Returns a builder used in chaining parameters for building a LocaleMatcher.
- *
- * @return a new Builder object
- * @draft ICU 65
- * @provisional This API might change or be removed in a future release.
- */
- public static Builder builder() {
- return new Builder();
- }
-
- /** Convenience method */
- public XLocaleMatcher(String supportedLocales) {
- this(builder().setSupportedLocales(supportedLocales));
- }
- /** Convenience method */
- public XLocaleMatcher(LocalePriorityList supportedLocales) {
- this(builder().setSupportedULocales(supportedLocales.getULocales()));
- }
-
- private XLocaleMatcher(Builder builder) {
- thresholdDistance = builder.thresholdDistance < 0 ?
- LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
- // Store the supported locales in input order,
- // so that when different types are used (e.g., java.util.Locale)
- // we can return those by parallel index.
- int supportedLocalesLength = builder.supportedLocales.size();
- supportedULocales = new ULocale[supportedLocalesLength];
- supportedLocales = new Locale[supportedLocalesLength];
- // Supported LRSs in input order.
- LSR lsrs[] = new LSR[supportedLocalesLength];
- // Also find the first supported locale whose LSR is
- // the same as that for the default locale.
- ULocale udef = builder.defaultLocale;
- Locale def = null;
- LSR defLSR = null;
- int idef = -1;
- if (udef != null) {
- def = udef.toLocale();
- defLSR = getMaximalLsrOrUnd(udef);
- }
- int i = 0;
- for (ULocale locale : builder.supportedLocales) {
- supportedULocales[i] = locale;
- supportedLocales[i] = locale.toLocale();
- LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale);
- if (idef < 0 && defLSR != null && lsr.equals(defLSR)) {
- idef = i;
- }
- ++i;
- }
-
- // We need an unordered map from LSR to first supported locale with that LSR,
- // and an ordered list of (LSR, Indexes).
- // We use a LinkedHashMap for both,
- // and insert the supported locales in the following order:
- // 1. Default locale, if it is supported.
- // 2. Priority locales in builder order.
- // 3. Remaining locales in builder order.
- supportedLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
- Map<LSR, Integer> otherLsrToIndex = null;
- if (idef >= 0) {
- supportedLsrToIndex.put(defLSR, idef);
- }
- i = 0;
- for (ULocale locale : supportedULocales) {
- if (i == idef) { continue; }
- LSR lsr = lsrs[i];
- if (defLSR == null) {
- assert i == 0;
- udef = locale;
- def = supportedLocales[0];
- defLSR = lsr;
- idef = 0;
- supportedLsrToIndex.put(lsr, 0);
- } else if (lsr.equals(defLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
- putIfAbsent(supportedLsrToIndex, lsr, i);
- } else {
- if (otherLsrToIndex == null) {
- otherLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
- }
- putIfAbsent(otherLsrToIndex, lsr, i);
- }
- ++i;
- }
- if (otherLsrToIndex != null) {
- supportedLsrToIndex.putAll(otherLsrToIndex);
- }
- int numSuppLsrs = supportedLsrToIndex.size();
- supportedLsrs = new LSR[numSuppLsrs];
- supportedIndexes = new int[numSuppLsrs];
- i = 0;
- for (Map.Entry<LSR, Integer> entry : supportedLsrToIndex.entrySet()) {
- supportedLsrs[i] = entry.getKey(); // = lsrs[entry.getValue()]
- supportedIndexes[i++] = entry.getValue();
- }
-
- defaultULocale = udef;
- defaultLocale = def;
- defaultLocaleIndex = idef;
- demotionPerDesiredLocale =
- builder.demotion == Demotion.NONE ? 0 :
- LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale(); // null or REGION
- favorSubtag = builder.favor;
- }
-
- private static final void putIfAbsent(Map<LSR, Integer> lsrToIndex, LSR lsr, int i) {
- Integer index = lsrToIndex.get(lsr);
- if (index == null) {
- lsrToIndex.put(lsr, i);
- }
- }
-
- private static final LSR getMaximalLsrOrUnd(ULocale locale) {
- if (locale.equals(UND_ULOCALE)) {
- return UND_LSR;
- } else {
- return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
- }
- }
-
- private static final LSR getMaximalLsrOrUnd(Locale locale) {
- if (locale.equals(UND_LOCALE)) {
- return UND_LSR;
- } else {
- return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
- }
- }
-
- private static final class ULocaleLsrIterator extends LsrIterator {
- private Iterator<ULocale> locales;
- private ULocale current, remembered;
-
- ULocaleLsrIterator(Iterator<ULocale> locales) {
- this.locales = locales;
- }
-
- @Override
- public boolean hasNext() {
- return locales.hasNext();
- }
-
- @Override
- public LSR next() {
- current = locales.next();
- return getMaximalLsrOrUnd(current);
- }
-
- @Override
- public void rememberCurrent(int desiredIndex) {
- bestDesiredIndex = desiredIndex;
- remembered = current;
- }
- }
-
- private static final class LocaleLsrIterator extends LsrIterator {
- private Iterator<Locale> locales;
- private Locale current, remembered;
-
- LocaleLsrIterator(Iterator<Locale> locales) {
- this.locales = locales;
- }
-
- @Override
- public boolean hasNext() {
- return locales.hasNext();
- }
-
- @Override
- public LSR next() {
- current = locales.next();
- return getMaximalLsrOrUnd(current);
- }
-
- @Override
- public void rememberCurrent(int desiredIndex) {
- bestDesiredIndex = desiredIndex;
- remembered = current;
- }
- }
-
- public ULocale getBestMatch(ULocale desiredLocale) {
- LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
- int suppIndex = getBestSuppIndex(desiredLSR, null);
- return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale;
- }
-
- public ULocale getBestMatch(Iterable<ULocale> desiredLocales) {
- Iterator<ULocale> desiredIter = desiredLocales.iterator();
- if (!desiredIter.hasNext()) {
- return defaultULocale;
- }
- ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter);
- LSR desiredLSR = lsrIter.next();
- int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
- return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale;
- }
-
- public ULocale getBestMatch(String desiredLocaleList) {
- return getBestMatch(LocalePriorityList.add(desiredLocaleList).build());
- }
-
- public Locale getBestLocale(Locale desiredLocale) {
- LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
- int suppIndex = getBestSuppIndex(desiredLSR, null);
- return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
- }
-
- public Locale getBestLocale(Iterable<Locale> desiredLocales) {
- Iterator<Locale> desiredIter = desiredLocales.iterator();
- if (!desiredIter.hasNext()) {
- return defaultLocale;
- }
- LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter);
- LSR desiredLSR = lsrIter.next();
- int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
- return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
- }
-
- private Result makeResult(ULocale desiredLocale, ULocaleLsrIterator lsrIter, int suppIndex) {
- if (suppIndex < 0) {
- return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
- } else if (desiredLocale != null) {
- return new Result(desiredLocale, supportedULocales[suppIndex],
- null, supportedLocales[suppIndex], 0, suppIndex);
- } else {
- return new Result(lsrIter.remembered, supportedULocales[suppIndex],
- null, supportedLocales[suppIndex], lsrIter.bestDesiredIndex, suppIndex);
- }
- }
-
- private Result makeResult(Locale desiredLocale, LocaleLsrIterator lsrIter, int suppIndex) {
- if (suppIndex < 0) {
- return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
- } else if (desiredLocale != null) {
- return new Result(null, supportedULocales[suppIndex],
- desiredLocale, supportedLocales[suppIndex], 0, suppIndex);
- } else {
- return new Result(null, supportedULocales[suppIndex],
- lsrIter.remembered, supportedLocales[suppIndex],
- lsrIter.bestDesiredIndex, suppIndex);
- }
- }
-
- public Result getBestMatchResult(ULocale desiredLocale) {
- LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
- int suppIndex = getBestSuppIndex(desiredLSR, null);
- return makeResult(desiredLocale, null, suppIndex);
- }
-
- /**
- * Returns the best match between the desired and supported locales.
- *
- * @param desiredLocales Typically a user's languages, in order of preference (descending).
- * @return the best-matching pair of a desired and a supported locale.
- */
- public Result getBestMatchResult(Iterable<ULocale> desiredLocales) {
- Iterator<ULocale> desiredIter = desiredLocales.iterator();
- if (!desiredIter.hasNext()) {
- return makeResult(UND_ULOCALE, null, -1);
- }
- ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter);
- LSR desiredLSR = lsrIter.next();
- int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
- return makeResult(null, lsrIter, suppIndex);
- }
-
- public Result getBestLocaleResult(Locale desiredLocale) {
- LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
- int suppIndex = getBestSuppIndex(desiredLSR, null);
- return makeResult(desiredLocale, null, suppIndex);
- }
-
- public Result getBestLocaleResult(Iterable<Locale> desiredLocales) {
- Iterator<Locale> desiredIter = desiredLocales.iterator();
- if (!desiredIter.hasNext()) {
- return makeResult(UND_LOCALE, null, -1);
- }
- LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter);
- LSR desiredLSR = lsrIter.next();
- int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
- return makeResult(null, lsrIter, suppIndex);
- }
-
- /**
- * @param desiredLSR The first desired locale's LSR.
- * @param remainingIter Remaining desired LSRs, null or empty if none.
- * @return the index of the best-matching supported locale, or -1 if there is no good match.
- */
- private int getBestSuppIndex(LSR desiredLSR, LsrIterator remainingIter) {
- int desiredIndex = 0;
- int bestSupportedLsrIndex = -1;
- for (int bestDistance = thresholdDistance;;) {
- // Quick check for exact maximized LSR.
- Integer index = supportedLsrToIndex.get(desiredLSR);
- if (index != null) {
- int suppIndex = index;
- if (TRACE_MATCHER) {
- System.err.printf("Returning %s: desiredLSR=supportedLSR\n",
- supportedULocales[suppIndex]);
- }
- if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
- return suppIndex;
- }
- int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
- desiredLSR, supportedLsrs, bestDistance, favorSubtag);
- if (bestIndexAndDistance >= 0) {
- bestDistance = bestIndexAndDistance & 0xff;
- if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
- bestSupportedLsrIndex = bestIndexAndDistance >> 8;
- }
- if ((bestDistance -= demotionPerDesiredLocale) <= 0) {
- break;
- }
- if (remainingIter == null || !remainingIter.hasNext()) {
- break;
- }
- desiredLSR = remainingIter.next();
- }
- if (bestSupportedLsrIndex < 0) {
- if (TRACE_MATCHER) {
- System.err.printf("Returning default %s: no good match\n", defaultULocale);
- }
- return -1;
- }
- int suppIndex = supportedIndexes[bestSupportedLsrIndex];
- if (TRACE_MATCHER) {
- System.err.printf("Returning %s: best matching supported locale\n",
- supportedULocales[suppIndex]);
- }
- return suppIndex;
- }
-
- @Override
- public String toString() {
- StringBuilder s = new StringBuilder().append("{XLocaleMatcher");
- if (supportedULocales.length > 0) {
- s.append(" supported={").append(supportedULocales[0].toString());
- for (int i = 1; i < supportedULocales.length; ++i) {
- s.append(", ").append(supportedULocales[i].toString());
- }
- s.append('}');
- }
- s.append(" default=").append(Objects.toString(defaultULocale));
- if (favorSubtag != null) {
- s.append(" distance=").append(favorSubtag.toString());
- }
- if (thresholdDistance >= 0) {
- s.append(String.format(" threshold=%d", thresholdDistance));
- }
- s.append(String.format(" demotion=%d", demotionPerDesiredLocale));
- return s.append('}').toString();
- }
-
- /**
- * Returns a fraction between 0 and 1, where 1 means that the languages are a
- * perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0.
- * <br>Note that
- * the precise values may change over time; no code should be made dependent
- * on the values remaining constant.
- * @param desired Desired locale
- * @param desiredMax Maximized locale (using likely subtags)
- * @param supported Supported locale
- * @param supportedMax Maximized locale (using likely subtags)
- * @return value between 0 and 1, inclusive.
- * @deprecated ICU 65 Build and use a matcher rather than comparing pairs of locales.
- */
- @Deprecated
- public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
- // Returns the inverse of the distance: That is, 1-distance(desired, supported).
- int distance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
- XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired),
- new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) },
- thresholdDistance, favorSubtag) & 0xff;
- return (100 - distance) / 100.0;
- }
-
- /**
- * Canonicalize a locale (language). Note that for now, it is canonicalizing
- * according to CLDR conventions (he vs iw, etc), since that is what is needed
- * for likelySubtags.
- * @param ulocale language/locale code
- * @return ULocale with remapped subtags.
- * @stable ICU 4.4
- */
- public ULocale canonicalize(ULocale ulocale) {
- // TODO
- return null;
- }
-}
*/
package com.ibm.icu.util;
-import java.util.HashMap;
-import java.util.HashSet;
+import java.util.ArrayList;
+import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Locale;
import java.util.Map;
-import java.util.Map.Entry;
import java.util.Objects;
-import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import com.ibm.icu.impl.ICUData;
-import com.ibm.icu.impl.ICUResourceBundle;
-import com.ibm.icu.impl.Relation;
-import com.ibm.icu.impl.Row;
-import com.ibm.icu.impl.Row.R3;
-import com.ibm.icu.impl.locale.XLocaleMatcher;
-import com.ibm.icu.impl.locale.XLocaleMatcher.Builder;
-import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
+
+import com.ibm.icu.impl.locale.LSR;
+import com.ibm.icu.impl.locale.LocaleDistance;
+import com.ibm.icu.impl.locale.XLikelySubtags;
/**
- * Provides a way to match the languages (locales) supported by a product to the
- * languages (locales) acceptable to a user, and get the best match. For
- * example:
+ * Immutable class that picks the best match between a user's desired locales and
+ * and application's supported locales.
*
+ * <p>Example:
* <pre>
- * LocaleMatcher matcher = new LocaleMatcher("fr, en-GB, en");
- *
- * // afterwards:
- * matcher.getBestMatch("en-US").toLanguageTag() => "en"
+ * LocaleMatcher matcher = LocaleMatcher.builder().setSupportedLocales("fr, en-GB, en").build();
+ * Locale bestSupported = matcher.getBestLocale(Locale.US); // "en"
* </pre>
*
- * It takes into account when languages are close to one another, such as fil
- * and tl, and when language regional variants are close, like en-GB and en-AU.
- * It also handles scripts, like zh-Hant vs zh-TW. For examples, see the test
- * file.
+ * <p>A matcher takes into account when languages are close to one another,
+ * such as Danish and Norwegian,
+ * and when regional variants are close, like en-GB and en-AU as opposed to en-US.
+ *
+ * <p>If there are multiple supported locales with the same (language, script, region)
+ * likely subtags, then the current implementation returns the first of those locales.
+ * It ignores variant subtags (except for pseudolocale variants) and extensions.
+ * This may change in future versions.
+ *
+ * <p>For example, the current implementation does not distinguish between
+ * de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
+ *
+ * <p>If you prefer one equivalent locale over another, then provide only the preferred one,
+ * or place it earlier in the list of supported locales.
+ *
+ * <p>Otherwise, the order of supported locales may have no effect on the best-match results.
+ * The current implementation compares each desired locale with supported locales
+ * in the following order:
+ * 1. Default locale, if supported;
+ * 2. CLDR "paradigm locales" like en-GB and es-419;
+ * 3. other supported locales.
+ * This may change in future versions.
+ *
* <p>All classes implementing this interface should be immutable. Often a
* product will just need one static instance, built with the languages
* that it supports. However, it may want multiple instances with different
* @stable ICU 4.4
*/
public class LocaleMatcher {
+ private static final LSR UND_LSR = new LSR("und","","");
+ private static final ULocale UND_ULOCALE = new ULocale("und");
+ private static final Locale UND_LOCALE = new Locale("und");
- /**
- * @internal
- * @deprecated This API is ICU internal only.
- */
- @Deprecated
- public static final boolean DEBUG = false;
-
- private static final ULocale UNKNOWN_LOCALE = new ULocale("und");
-
- /**
- * Threshold for falling back to the default (first) language. May make this
- * a parameter in the future.
- */
- private static final double DEFAULT_THRESHOLD = 0.5;
+ // Activates debugging output to stderr with details of GetBestMatch.
+ private static final boolean TRACE_MATCHER = false;
- /**
- * The default language, in case the threshold is not met.
- */
- private final ULocale defaultLanguage;
+ private static abstract class LsrIterator implements Iterator<LSR> {
+ int bestDesiredIndex = -1;
- /**
- * The default language, in case the threshold is not met.
- */
- private final double threshold;
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
- /**
- * Create a new language matcher. The highest-weighted language is the
- * default. That means that if no other language is matches closer than a given
- * threshold, that default language is chosen. Typically the default is English,
- * but it could be different based on additional information, such as the domain
- * of the page.
- *
- * @param languagePriorityList weighted list
- * @stable ICU 4.4
- */
- public LocaleMatcher(LocalePriorityList languagePriorityList) {
- this(languagePriorityList, defaultWritten);
+ public abstract void rememberCurrent(int desiredIndex);
}
/**
- * Create a new language matcher from a String form. The highest-weighted
- * language is the default.
+ * Builder option for whether the language subtag or the script subtag is most important.
*
- * @param languagePriorityListString String form of LanguagePriorityList
- * @stable ICU 4.4
- */
- public LocaleMatcher(String languagePriorityListString) {
- this(LocalePriorityList.add(languagePriorityListString).build());
- }
-
- /**
- * Internal testing function; may expose API later.
- * @param languagePriorityList LocalePriorityList to match
- * @param matcherData Internal matching data
- * @internal
- * @deprecated This API is ICU internal only.
- */
- @Deprecated
- public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData) {
- this(languagePriorityList, matcherData, DEFAULT_THRESHOLD);
- }
-
- /**
- * Internal testing function; may expose API later.
- * @param languagePriorityList LocalePriorityList to match
- * @param matcherData Internal matching data
- * @internal
- * @deprecated This API is ICU internal only.
+ * @see Builder#setFavorSubtag(FavorSubtag)
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
*/
- @Deprecated
- public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold) {
- this.matcherData = matcherData == null ? defaultWritten : matcherData.freeze();
- this.languagePriorityList = languagePriorityList;
- for (final ULocale language : languagePriorityList) {
- add(language, languagePriorityList.getWeight(language));
- }
- processMapping();
- Iterator<ULocale> it = languagePriorityList.iterator();
- defaultLanguage = it.hasNext() ? it.next() : null;
- this.threshold = threshold;
- }
-
-
- /**
- * Returns a fraction between 0 and 1, where 1 means that the languages are a
- * perfect match, and 0 means that they are completely different. Note that
- * the precise values may change over time; no code should be made dependent
- * on the values remaining constant.
- * @param desired Desired locale
- * @param desiredMax Maximized locale (using likely subtags)
- * @param supported Supported locale
- * @param supportedMax Maximized locale (using likely subtags)
- * @return value between 0 and 1, inclusive.
- * @stable ICU 4.4
- */
- public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
- return matcherData.match(desired, desiredMax, supported, supportedMax);
+ public enum FavorSubtag {
+ /**
+ * Language differences are most important, then script differences, then region differences.
+ * (This is the default behavior.)
+ *
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ LANGUAGE,
+ /**
+ * Makes script differences matter relatively more than language differences.
+ *
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ SCRIPT
}
-
/**
- * Canonicalize a locale (language). Note that for now, it is canonicalizing
- * according to CLDR conventions (he vs iw, etc), since that is what is needed
- * for likelySubtags.
- * @param ulocale language/locale code
- * @return ULocale with remapped subtags.
- * @stable ICU 4.4
+ * Builder option for whether all desired locales are treated equally or
+ * earlier ones are preferred.
+ *
+ * @see Builder#setDemotionPerDesiredLocale(Demotion)
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
*/
- public ULocale canonicalize(ULocale ulocale) {
- // TODO Get the data from CLDR, use Java conventions.
- String lang = ulocale.getLanguage();
- String lang2 = canonicalMap.get(lang);
- String script = ulocale.getScript();
- String script2 = canonicalMap.get(script);
- String region = ulocale.getCountry();
- String region2 = canonicalMap.get(region);
- if (lang2 != null || script2 != null || region2 != null) {
- return new ULocale(
- lang2 == null ? lang : lang2,
- script2 == null ? script : script2,
- region2 == null ? region : region2
- );
- }
- return ulocale;
+ public enum Demotion {
+ /**
+ * All desired locales are treated equally.
+ *
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ NONE,
+ /**
+ * Earlier desired locales are preferred.
+ *
+ * <p>From each desired locale to the next,
+ * the distance to any supported locale is increased by an additional amount
+ * which is at least as large as most region mismatches.
+ * A later desired locale has to have a better match with some supported locale
+ * due to more than merely having the same region subtag.
+ *
+ * <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code>
+ * yields <code>Result(en-GB, en)</code> because
+ * with the demotion of sv its perfect match is no better than
+ * the region distance between the earlier desired locale en-GB and en=en-US.
+ *
+ * <p>Notes:
+ * <ul>
+ * <li>In some cases, language and/or script differences can be as small as
+ * the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
+ * <li>It is possible for certain region differences to be larger than usual,
+ * and larger than the demotion.
+ * (As of CLDR 35 there is no such case, but
+ * this is possible in future versions of the data.)
+ * </ul>
+ *
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ REGION
}
/**
- * Get the best match for a LanguagePriorityList
+ * Data for the best-matching pair of a desired and a supported locale.
*
- * @param languageList list to match
- * @return best matching language code
- * @stable ICU 4.4
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
*/
- public ULocale getBestMatch(LocalePriorityList languageList) {
- double bestWeight = 0;
- ULocale bestTableMatch = null;
- double penalty = 0;
- OutputDouble matchWeight = new OutputDouble();
- for (final ULocale language : languageList) {
- final ULocale matchLocale = getBestMatchInternal(language, matchWeight);
- final double weight = matchWeight.value * languageList.getWeight(language) - penalty;
- if (weight > bestWeight) {
- bestWeight = weight;
- bestTableMatch = matchLocale;
- }
- penalty += 0.07000001;
- }
- if (bestWeight < threshold) {
- bestTableMatch = defaultLanguage;
+ public static final class Result {
+ private final ULocale desiredULocale;
+ private final ULocale supportedULocale;
+ private final Locale desiredLocale;
+ private final Locale supportedLocale;
+ private final int desiredIndex;
+ private final int supportedIndex;
+
+ private Result(ULocale udesired, ULocale usupported,
+ Locale desired, Locale supported,
+ int desIndex, int suppIndex) {
+ desiredULocale = udesired;
+ supportedULocale = usupported;
+ desiredLocale = desired;
+ supportedLocale = supported;
+ desiredIndex = desIndex;
+ supportedIndex = suppIndex;
}
- return bestTableMatch;
- }
- /**
- * Convenience method: Get the best match for a LanguagePriorityList
- *
- * @param languageList String form of language priority list
- * @return best matching language code
- * @stable ICU 4.4
- */
- public ULocale getBestMatch(String languageList) {
- return getBestMatch(LocalePriorityList.add(languageList).build());
- }
+ /**
+ * Returns the best-matching desired locale.
+ * null if the list of desired locales is empty or if none matched well enough.
+ *
+ * @return the best-matching desired locale, or null.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public ULocale getDesiredULocale() {
+ return desiredULocale == null && desiredLocale != null ?
+ ULocale.forLocale(desiredLocale) : desiredULocale;
+ }
+ /**
+ * Returns the best-matching desired locale.
+ * null if the list of desired locales is empty or if none matched well enough.
+ *
+ * @return the best-matching desired locale, or null.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Locale getDesiredLocale() {
+ return desiredLocale == null && desiredULocale != null ?
+ desiredULocale.toLocale() : desiredLocale;
+ }
- /**
- * Get the best match for an individual language code.
- *
- * @param ulocale locale/language code to match
- * @return best matching language code
- * @stable ICU 4.4
- */
- public ULocale getBestMatch(ULocale ulocale) {
- return getBestMatchInternal(ulocale, null);
- }
+ /**
+ * Returns the best-matching supported locale.
+ * If none matched well enough, this is the default locale.
+ * The default locale is null if the list of supported locales is empty and
+ * no explicit default locale is set.
+ *
+ * @return the best-matching supported locale, or null.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public ULocale getSupportedULocale() { return supportedULocale; }
+ /**
+ * Returns the best-matching supported locale.
+ * If none matched well enough, this is the default locale.
+ * The default locale is null if the list of supported locales is empty and
+ * no explicit default locale is set.
+ *
+ * @return the best-matching supported locale, or null.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Locale getSupportedLocale() { return supportedLocale; }
- /**
- * @internal
- * @deprecated This API is ICU internal only.
- */
- @Deprecated
- public ULocale getBestMatch(ULocale... ulocales) {
- return getBestMatch(LocalePriorityList.add(ulocales).build());
- }
+ /**
+ * Returns the index of the best-matching desired locale in the input Iterable order.
+ * -1 if the list of desired locales is empty or if none matched well enough.
+ *
+ * @return the index of the best-matching desired locale, or -1.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int getDesiredIndex() { return desiredIndex; }
- /**
- * {@inheritDoc}
- * @stable ICU 4.4
- */
- @Override
- public String toString() {
- return "{" + defaultLanguage + ", "
- + localeToMaxLocaleAndWeight + "}";
- }
- // ================= Privates =====================
+ /**
+ * Returns the index of the best-matching supported locale in the
+ * constructor’s or builder’s input order (“set” Collection plus “added” locales).
+ * If the matcher was built from a locale list string, then the iteration order is that
+ * of a LocalePriorityList built from the same string.
+ * -1 if the list of supported locales is empty or if none matched well enough.
+ *
+ * @return the index of the best-matching supported locale, or -1.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int getSupportedIndex() { return supportedIndex; }
- /**
- * Get the best match for an individual language code.
- *
- * @param languageCode
- * @return best matching language code and weight (as per
- * {@link #match(ULocale, ULocale)})
- */
- private ULocale getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight) {
- languageCode = canonicalize(languageCode);
- final ULocale maximized = addLikelySubtags(languageCode);
- if (DEBUG) {
- System.out.println("\ngetBestMatchInternal: " + languageCode + ";\t" + maximized);
- }
- double bestWeight = 0;
- ULocale bestTableMatch = null;
- String baseLanguage = maximized.getLanguage();
- Set<R3<ULocale, ULocale, Double>> searchTable = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(baseLanguage);
- if (searchTable != null) { // we preprocessed the table so as to filter by language
- if (DEBUG) System.out.println("\tSearching: " + searchTable);
- for (final R3<ULocale, ULocale, Double> tableKeyValue : searchTable) {
- ULocale tableKey = tableKeyValue.get0();
- ULocale maxLocale = tableKeyValue.get1();
- Double matchedWeight = tableKeyValue.get2();
- final double match = match(languageCode, maximized, tableKey, maxLocale);
- if (DEBUG) {
- System.out.println("\t" + tableKeyValue + ";\t" + match + "\n");
- }
- final double weight = match * matchedWeight;
- if (weight > bestWeight) {
- bestWeight = weight;
- bestTableMatch = tableKey;
- if (weight > 0.999d) { // bail on good enough match.
- break;
- }
+ /**
+ * Takes the best-matching supported locale and adds relevant fields of the
+ * best-matching desired locale, such as the -t- and -u- extensions.
+ * May replace some fields of the supported locale.
+ * The result is the locale that should be used for date and number formatting, collation, etc.
+ *
+ * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
+ *
+ * @return the service locale, combining the best-matching desired and supported locales.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public ULocale makeServiceULocale() {
+ ULocale bestDesired = getDesiredULocale();
+ ULocale serviceLocale = supportedULocale;
+ if (!serviceLocale.equals(bestDesired) && bestDesired != null) {
+ ULocale.Builder b = new ULocale.Builder().setLocale(serviceLocale);
+
+ // Copy the region from bestDesired, if there is one.
+ String region = bestDesired.getCountry();
+ if (!region.isEmpty()) {
+ b.setRegion(region);
}
- }
- }
- if (bestWeight < threshold) {
- bestTableMatch = defaultLanguage;
- }
- if (outputWeight != null) {
- outputWeight.value = bestWeight; // only return the weight when needed
- }
- return bestTableMatch;
- }
- /**
- * @internal
- * @deprecated This API is ICU internal only.
- */
- @Deprecated
- private static class OutputDouble { // TODO, move to where OutputInt is
- double value;
- }
-
- private void add(ULocale language, Double weight) {
- language = canonicalize(language);
- R3<ULocale, ULocale, Double> row = Row.of(language, addLikelySubtags(language), weight);
- row.freeze();
- localeToMaxLocaleAndWeight.add(row);
- }
+ // Copy the variants from bestDesired, if there are any.
+ // Note that this will override any serviceLocale variants.
+ // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
+ String variants = bestDesired.getVariant();
+ if (!variants.isEmpty()) {
+ b.setVariant(variants);
+ }
- /**
- * We preprocess the data to get just the possible matches for each desired base language.
- */
- private void processMapping() {
- for (Entry<String, Set<String>> desiredToMatchingLanguages : matcherData.matchingLanguages().keyValuesSet()) {
- String desired = desiredToMatchingLanguages.getKey();
- Set<String> supported = desiredToMatchingLanguages.getValue();
- for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) {
- final ULocale key = localeToMaxAndWeight.get0();
- String lang = key.getLanguage();
- if (supported.contains(lang)) {
- addFiltered(desired, localeToMaxAndWeight);
+ // Copy the extensions from bestDesired, if there are any.
+ // Note that this will override any serviceLocale extensions.
+ // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
+ // (replacing calendar).
+ for (char extensionKey : bestDesired.getExtensionKeys()) {
+ b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
}
+ serviceLocale = b.build();
}
+ return serviceLocale;
}
- // now put in the values directly, since languages always map to themselves
- for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) {
- final ULocale key = localeToMaxAndWeight.get0();
- String lang = key.getLanguage();
- addFiltered(lang, localeToMaxAndWeight);
- }
- }
- private void addFiltered(String desired, R3<ULocale, ULocale, Double> localeToMaxAndWeight) {
- Set<R3<ULocale, ULocale, Double>> map = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(desired);
- if (map == null) {
- desiredLanguageToPossibleLocalesToMaxLocaleToData.put(desired, map = new LinkedHashSet<>());
- }
- map.add(localeToMaxAndWeight);
- if (DEBUG) {
- System.out.println(desired + ", " + localeToMaxAndWeight);
+ /**
+ * Takes the best-matching supported locale and adds relevant fields of the
+ * best-matching desired locale, such as the -t- and -u- extensions.
+ * May replace some fields of the supported locale.
+ * The result is the locale that should be used for
+ * date and number formatting, collation, etc.
+ *
+ * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
+ *
+ * @return the service locale, combining the best-matching desired and supported locales.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Locale makeServiceLocale() {
+ return makeServiceULocale().toLocale();
}
}
- Set<Row.R3<ULocale, ULocale, Double>> localeToMaxLocaleAndWeight = new LinkedHashSet<>();
- Map<String,Set<Row.R3<ULocale, ULocale, Double>>> desiredLanguageToPossibleLocalesToMaxLocaleToData
- = new LinkedHashMap<>();
-
- // =============== Special Mapping Information ==============
+ private final int thresholdDistance;
+ private final int demotionPerDesiredLocale;
+ private final FavorSubtag favorSubtag;
+
+ // These are in input order.
+ private final ULocale[] supportedULocales;
+ private final Locale[] supportedLocales;
+ // These are in preference order: 1. Default locale 2. paradigm locales 3. others.
+ private final Map<LSR, Integer> supportedLsrToIndex;
+ // Array versions of the supportedLsrToIndex keys and values.
+ // The distance lookup loops over the supportedLsrs and returns the index of the best match.
+ private final LSR[] supportedLsrs;
+ private final int[] supportedIndexes;
+ private final ULocale defaultULocale;
+ private final Locale defaultLocale;
+ private final int defaultLocaleIndex;
/**
- * We need to add another method to addLikelySubtags that doesn't return
- * null, but instead substitutes Zzzz and ZZ if unknown. There are also
- * a few cases where addLikelySubtags needs to have expanded data, to handle
- * all deprecated codes.
- * @param languageCode
- * @return "fixed" addLikelySubtags
+ * LocaleMatcher Builder.
+ *
+ * @see LocaleMatcher#builder()
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
*/
- private ULocale addLikelySubtags(ULocale languageCode) {
- // max("und") = "en_Latn_US", and since matching is based on maximized tags, the undefined
- // language would normally match English. But that would produce the counterintuitive results
- // that getBestMatch("und", LocaleMatcher("it,en")) would be "en", and
- // getBestMatch("en", LocaleMatcher("it,und")) would be "und".
- //
- // To avoid that, we change the matcher's definitions of max (AddLikelySubtagsWithDefaults)
- // so that max("und")="und". That produces the following, more desirable results:
- if (languageCode.equals(UNKNOWN_LOCALE)) {
- return UNKNOWN_LOCALE;
- }
- final ULocale result = ULocale.addLikelySubtags(languageCode);
- // should have method on getLikelySubtags for this
- if (result == null || result.equals(languageCode)) {
- final String language = languageCode.getLanguage();
- final String script = languageCode.getScript();
- final String region = languageCode.getCountry();
- return new ULocale((language.length()==0 ? "und"
- : language)
- + "_"
- + (script.length()==0 ? "Zzzz" : script)
- + "_"
- + (region.length()==0 ? "ZZ" : region));
- }
- return result;
- }
+ public static class Builder {
+ private List<ULocale> supportedLocales;
+ private int thresholdDistance = -1;
+ private Demotion demotion;
+ private ULocale defaultLocale;
+ private FavorSubtag favor;
- private static class LocalePatternMatcher {
- // a value of null means a wildcard; matches any.
- private String lang;
- private String script;
- private String region;
- private Level level;
- static Pattern pattern = Pattern.compile(
- "([a-z]{1,8}|\\*)"
- + "(?:[_-]([A-Z][a-z]{3}|\\*))?"
- + "(?:[_-]([A-Z]{2}|[0-9]{3}|\\*))?");
-
- public LocalePatternMatcher(String toMatch) {
- Matcher matcher = pattern.matcher(toMatch);
- if (!matcher.matches()) {
- throw new IllegalArgumentException("Bad pattern: " + toMatch);
- }
- lang = matcher.group(1);
- script = matcher.group(2);
- region = matcher.group(3);
- level = region != null ? Level.region : script != null ? Level.script : Level.language;
+ private Builder() {}
- if (lang.equals("*")) {
- lang = null;
- }
- if (script != null && script.equals("*")) {
- script = null;
- }
- if (region != null && region.equals("*")) {
- region = null;
- }
- }
-
- boolean matches(ULocale ulocale) {
- if (lang != null && !lang.equals(ulocale.getLanguage())) {
- return false;
- }
- if (script != null && !script.equals(ulocale.getScript())) {
- return false;
- }
- if (region != null && !region.equals(ulocale.getCountry())) {
- return false;
- }
- return true;
+ /**
+ * Parses the string like {@link LocalePriorityList} does and
+ * sets the supported locales accordingly.
+ * Clears any previously set/added supported locales first.
+ *
+ * @param locales the string of locales to set, to be parsed like LocalePriorityList does
+ * @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setSupportedLocales(String locales) {
+ return setSupportedULocales(LocalePriorityList.add(locales).build().getULocales());
}
- public Level getLevel() {
- return level;
+ /**
+ * Copies the supported locales, preserving iteration order.
+ * Clears any previously set/added supported locales first.
+ * Duplicates are allowed, and are not removed.
+ *
+ * @param locales the list of locales
+ * @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setSupportedULocales(Collection<ULocale> locales) {
+ supportedLocales = new ArrayList<>(locales);
+ return this;
}
- public String getLanguage() {
- return (lang == null ? "*" : lang);
+ /**
+ * Copies the supported locales, preserving iteration order.
+ * Clears any previously set/added supported locales first.
+ * Duplicates are allowed, and are not removed.
+ *
+ * @param locales the list of locale
+ * @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setSupportedLocales(Collection<Locale> locales) {
+ supportedLocales = new ArrayList<>(locales.size());
+ for (Locale locale : locales) {
+ supportedLocales.add(ULocale.forLocale(locale));
+ }
+ return this;
}
- public String getScript() {
- return (script == null ? "*" : script);
+ /**
+ * Adds another supported locale.
+ * Duplicates are allowed, and are not removed.
+ *
+ * @param locale the list of locale
+ * @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder addSupportedULocale(ULocale locale) {
+ if (supportedLocales == null) {
+ supportedLocales = new ArrayList<>();
+ }
+ supportedLocales.add(locale);
+ return this;
}
- public String getRegion() {
- return (region == null ? "*" : region);
+ /**
+ * Adds another supported locale.
+ * Duplicates are allowed, and are not removed.
+ *
+ * @param locale the list of locale
+ * @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder addSupportedLocale(Locale locale) {
+ return addSupportedULocale(ULocale.forLocale(locale));
}
- @Override
- public String toString() {
- String result = getLanguage();
- if (level != Level.language) {
- result += "-" + getScript();
- if (level != Level.script) {
- result += "-" + getRegion();
- }
- }
- return result;
+ /**
+ * Sets the default locale; if null, or if it is not set explicitly,
+ * then the first supported locale is used as the default locale.
+ *
+ * @param defaultLocale the default locale
+ * @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setDefaultULocale(ULocale defaultLocale) {
+ this.defaultLocale = defaultLocale;
+ return this;
}
- /* (non-Javadoc)
- * @see java.lang.Object#equals(java.lang.Object)
+ /**
+ * Sets the default locale; if null, or if it is not set explicitly,
+ * then the first supported locale is used as the default locale.
+ *
+ * @param defaultLocale the default locale
+ * @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
*/
- @Override
- public boolean equals(Object obj) {
- if (obj == this) {
- return true;
- }
- if (obj == null || !(obj instanceof LocalePatternMatcher)) {
- return false;
- }
- LocalePatternMatcher other = (LocalePatternMatcher) obj;
- return Objects.equals(level, other.level)
- && Objects.equals(lang, other.lang)
- && Objects.equals(script, other.script)
- && Objects.equals(region, other.region);
+ public Builder setDefaultLocale(Locale defaultLocale) {
+ this.defaultLocale = ULocale.forLocale(defaultLocale);
+ return this;
}
- /* (non-Javadoc)
- * @see java.lang.Object#hashCode()
+ /**
+ * If SCRIPT, then the language differences are smaller than script differences.
+ * This is used in situations (such as maps) where
+ * it is better to fall back to the same script than a similar language.
+ *
+ * @param subtag the subtag to favor
+ * @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
*/
- @Override
- public int hashCode() {
- return level.ordinal()
- ^ (lang == null ? 0 : lang.hashCode())
- ^ (script == null ? 0 : script.hashCode())
- ^ (region == null ? 0 : region.hashCode());
+ public Builder setFavorSubtag(FavorSubtag subtag) {
+ this.favor = subtag;
+ return this;
}
- }
- enum Level {
- language(0.99),
- script(0.2),
- region(0.04);
-
- final double worst;
-
- Level(double d) {
- worst = d;
+ /**
+ * Option for whether all desired locales are treated equally or
+ * earlier ones are preferred (this is the default).
+ *
+ * @param demotion the demotion per desired locale to set.
+ * @return this Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setDemotionPerDesiredLocale(Demotion demotion) {
+ this.demotion = demotion;
+ return this;
}
- }
- private static class ScoreData implements Freezable<ScoreData> {
- @SuppressWarnings("unused")
- private static final double maxUnequal_changeD_sameS = 0.5;
-
- @SuppressWarnings("unused")
- private static final double maxUnequal_changeEqual = 0.75;
-
- LinkedHashSet<Row.R3<LocalePatternMatcher,LocalePatternMatcher,Double>> scores = new LinkedHashSet<>();
- final Level level;
-
- public ScoreData(Level level) {
- this.level = level;
- }
-
- void addDataToScores(String desired, String supported, R3<LocalePatternMatcher,LocalePatternMatcher,Double> data) {
- // Map<String, Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>>> lang_result = scores.get(desired);
- // if (lang_result == null) {
- // scores.put(desired, lang_result = new HashMap());
- // }
- // Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>> result = lang_result.get(supported);
- // if (result == null) {
- // lang_result.put(supported, result = new LinkedHashSet());
- // }
- // result.add(data);
- boolean added = scores.add(data);
- if (!added) {
- throw new ICUException("trying to add duplicate data: " + data);
+ /**
+ * <i>Internal only!</i>
+ *
+ * @param thresholdDistance the thresholdDistance to set, with -1 = default
+ * @return this Builder object
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ public Builder internalSetThresholdDistance(int thresholdDistance) {
+ if (thresholdDistance > 100) {
+ thresholdDistance = 100;
}
+ this.thresholdDistance = thresholdDistance;
+ return this;
}
- double getScore(ULocale dMax, String desiredRaw, String desiredMax,
- ULocale sMax, String supportedRaw, String supportedMax) {
- double distance = 0;
- if (!desiredMax.equals(supportedMax)) {
- distance = getRawScore(dMax, sMax);
- } else if (!desiredRaw.equals(supportedRaw)) { // maxes are equal, changes are equal
- distance += 0.001;
- }
- return distance;
+ /**
+ * Builds and returns a new locale matcher.
+ * This builder can continue to be used.
+ *
+ * @return new LocaleMatcher.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public LocaleMatcher build() {
+ return new LocaleMatcher(this);
}
- private double getRawScore(ULocale desiredLocale, ULocale supportedLocale) {
- if (DEBUG) {
- System.out.println("\t\t\t" + level + " Raw Score:\t" + desiredLocale + ";\t" + supportedLocale);
+ /**
+ * {@inheritDoc}
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public String toString() {
+ StringBuilder s = new StringBuilder().append("{LocaleMatcher.Builder");
+ if (!supportedLocales.isEmpty()) {
+ s.append(" supported={").append(supportedLocales.toString()).append('}');
}
- for (R3<LocalePatternMatcher,LocalePatternMatcher,Double> datum : scores) { // : result
- if (datum.get0().matches(desiredLocale)
- && datum.get1().matches(supportedLocale)) {
- if (DEBUG) {
- System.out.println("\t\t\t\tFOUND\t" + datum);
- }
- return datum.get2();
- }
+ if (defaultLocale != null) {
+ s.append(" default=").append(defaultLocale.toString());
}
- if (DEBUG) {
- System.out.println("\t\t\t\tNOTFOUND\t" + level.worst);
+ if (favor != null) {
+ s.append(" distance=").append(favor.toString());
}
- return level.worst;
- }
-
- @Override
- public String toString() {
- StringBuilder result = new StringBuilder().append(level);
- for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> score : scores) {
- result.append("\n\t\t").append(score);
+ if (thresholdDistance >= 0) {
+ s.append(String.format(" threshold=%d", thresholdDistance));
+ }
+ if (demotion != null) {
+ s.append(" demotion=").append(demotion.toString());
}
- return result.toString();
+ return s.append('}').toString();
}
+ }
+ /**
+ * Returns a builder used in chaining parameters for building a LocaleMatcher.
+ *
+ * @return a new Builder object
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static Builder builder() {
+ return new Builder();
+ }
- @Override
- @SuppressWarnings("unchecked")
- public ScoreData cloneAsThawed() {
- try {
- ScoreData result = (ScoreData) clone();
- result.scores = (LinkedHashSet<R3<LocalePatternMatcher, LocalePatternMatcher, Double>>) result.scores.clone();
- result.frozen = false;
- return result;
- } catch (CloneNotSupportedException e) {
- throw new ICUCloneNotSupportedException(e); // will never happen
- }
+ /**
+ * Copies the supported locales, preserving iteration order, and constructs a LocaleMatcher.
+ * The first locale is used as the default locale for when there is no good match.
+ *
+ * @param supportedLocales list of locales
+ * @stable ICU 4.4
+ */
+ public LocaleMatcher(LocalePriorityList supportedLocales) {
+ this(builder().setSupportedULocales(supportedLocales.getULocales()));
+ }
- }
+ /**
+ * Parses the string like {@link LocalePriorityList} does and
+ * constructs a LocaleMatcher for the supported locales parsed from the string.
+ * The first one (in LocalePriorityList iteration order) is used as the default locale for
+ * when there is no good match.
+ *
+ * @param supportedLocales the string of locales to set,
+ * to be parsed like LocalePriorityList does
+ * @stable ICU 4.4
+ */
+ public LocaleMatcher(String supportedLocales) {
+ this(builder().setSupportedLocales(supportedLocales));
+ }
- private volatile boolean frozen = false;
+ private LocaleMatcher(Builder builder) {
+ thresholdDistance = builder.thresholdDistance < 0 ?
+ LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
+ // Store the supported locales in input order,
+ // so that when different types are used (e.g., java.util.Locale)
+ // we can return those by parallel index.
+ int supportedLocalesLength = builder.supportedLocales.size();
+ supportedULocales = new ULocale[supportedLocalesLength];
+ supportedLocales = new Locale[supportedLocalesLength];
+ // Supported LRSs in input order.
+ LSR lsrs[] = new LSR[supportedLocalesLength];
+ // Also find the first supported locale whose LSR is
+ // the same as that for the default locale.
+ ULocale udef = builder.defaultLocale;
+ Locale def = null;
+ LSR defLSR = null;
+ int idef = -1;
+ if (udef != null) {
+ def = udef.toLocale();
+ defLSR = getMaximalLsrOrUnd(udef);
+ }
+ int i = 0;
+ for (ULocale locale : builder.supportedLocales) {
+ supportedULocales[i] = locale;
+ supportedLocales[i] = locale.toLocale();
+ LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale);
+ if (idef < 0 && defLSR != null && lsr.equals(defLSR)) {
+ idef = i;
+ }
+ ++i;
+ }
+
+ // We need an unordered map from LSR to first supported locale with that LSR,
+ // and an ordered list of (LSR, Indexes).
+ // We use a LinkedHashMap for both,
+ // and insert the supported locales in the following order:
+ // 1. Default locale, if it is supported.
+ // 2. Priority locales in builder order.
+ // 3. Remaining locales in builder order.
+ supportedLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
+ Map<LSR, Integer> otherLsrToIndex = null;
+ if (idef >= 0) {
+ supportedLsrToIndex.put(defLSR, idef);
+ }
+ i = 0;
+ for (ULocale locale : supportedULocales) {
+ if (i == idef) { continue; }
+ LSR lsr = lsrs[i];
+ if (defLSR == null) {
+ assert i == 0;
+ udef = locale;
+ def = supportedLocales[0];
+ defLSR = lsr;
+ idef = 0;
+ supportedLsrToIndex.put(lsr, 0);
+ } else if (lsr.equals(defLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
+ putIfAbsent(supportedLsrToIndex, lsr, i);
+ } else {
+ if (otherLsrToIndex == null) {
+ otherLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
+ }
+ putIfAbsent(otherLsrToIndex, lsr, i);
+ }
+ ++i;
+ }
+ if (otherLsrToIndex != null) {
+ supportedLsrToIndex.putAll(otherLsrToIndex);
+ }
+ int numSuppLsrs = supportedLsrToIndex.size();
+ supportedLsrs = new LSR[numSuppLsrs];
+ supportedIndexes = new int[numSuppLsrs];
+ i = 0;
+ for (Map.Entry<LSR, Integer> entry : supportedLsrToIndex.entrySet()) {
+ supportedLsrs[i] = entry.getKey(); // = lsrs[entry.getValue()]
+ supportedIndexes[i++] = entry.getValue();
+ }
+
+ defaultULocale = udef;
+ defaultLocale = def;
+ defaultLocaleIndex = idef;
+ demotionPerDesiredLocale =
+ builder.demotion == Demotion.NONE ? 0 :
+ LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale(); // null or REGION
+ favorSubtag = builder.favor;
+ }
- @Override
- public ScoreData freeze() {
- return this;
+ private static final void putIfAbsent(Map<LSR, Integer> lsrToIndex, LSR lsr, int i) {
+ Integer index = lsrToIndex.get(lsr);
+ if (index == null) {
+ lsrToIndex.put(lsr, i);
}
+ }
- @Override
- public boolean isFrozen() {
- return frozen;
+ private static final LSR getMaximalLsrOrUnd(ULocale locale) {
+ if (locale.equals(UND_ULOCALE)) {
+ return UND_LSR;
+ } else {
+ return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
}
+ }
- public Relation<String,String> getMatchingLanguages() {
- Relation<String,String> desiredToSupported = Relation.of(new LinkedHashMap<String,Set<String>>(), HashSet.class);
- for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> item : scores) {
- LocalePatternMatcher desired = item.get0();
- LocalePatternMatcher supported = item.get1();
- if (desired.lang != null && supported.lang != null) { // explicitly mentioned languages must have reasonable distance
- desiredToSupported.put(desired.lang, supported.lang);
- }
- }
- desiredToSupported.freeze();
- return desiredToSupported;
+ private static final LSR getMaximalLsrOrUnd(Locale locale) {
+ if (locale.equals(UND_LOCALE)) {
+ return UND_LSR;
+ } else {
+ return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
}
}
- /**
- * Only for testing and use by tools. Interface may change!!
- * @internal
- * @deprecated This API is ICU internal only.
- */
- @Deprecated
- public static class LanguageMatcherData implements Freezable<LanguageMatcherData> {
- private ScoreData languageScores = new ScoreData(Level.language);
- private ScoreData scriptScores = new ScoreData(Level.script);
- private ScoreData regionScores = new ScoreData(Level.region);
- private Relation<String, String> matchingLanguages;
- private volatile boolean frozen = false;
-
+ private static final class ULocaleLsrIterator extends LsrIterator {
+ private Iterator<ULocale> locales;
+ private ULocale current, remembered;
- /**
- * @internal
- * @deprecated This API is ICU internal only.
- */
- @Deprecated
- public LanguageMatcherData() {
+ ULocaleLsrIterator(Iterator<ULocale> locales) {
+ this.locales = locales;
}
- /**
- * @internal
- * @deprecated This API is ICU internal only.
- */
- @Deprecated
- public Relation<String, String> matchingLanguages() {
- return matchingLanguages;
+ @Override
+ public boolean hasNext() {
+ return locales.hasNext();
}
- /**
- * @internal
- * @deprecated This API is ICU internal only.
- */
@Override
- @Deprecated
- public String toString() {
- return languageScores + "\n\t" + scriptScores + "\n\t" + regionScores;
+ public LSR next() {
+ current = locales.next();
+ return getMaximalLsrOrUnd(current);
}
- /**
- * @internal
- * @deprecated This API is ICU internal only.
- */
- @Deprecated
- public double match(ULocale a, ULocale aMax, ULocale b, ULocale bMax) {
- double diff = 0;
- diff += languageScores.getScore(aMax, a.getLanguage(), aMax.getLanguage(), bMax, b.getLanguage(), bMax.getLanguage());
- if (diff > 0.999d) { // with no language match, we bail
- return 0.0d;
- }
- diff += scriptScores.getScore(aMax, a.getScript(), aMax.getScript(), bMax, b.getScript(), bMax.getScript());
- diff += regionScores.getScore(aMax, a.getCountry(), aMax.getCountry(), bMax, b.getCountry(), bMax.getCountry());
-
- if (!a.getVariant().equals(b.getVariant())) {
- diff += 0.01;
- }
- if (diff < 0.0d) {
- diff = 0.0d;
- } else if (diff > 1.0d) {
- diff = 1.0d;
- }
- if (DEBUG) {
- System.out.println("\t\t\tTotal Distance\t" + diff);
- }
- return 1.0 - diff;
+ @Override
+ public void rememberCurrent(int desiredIndex) {
+ bestDesiredIndex = desiredIndex;
+ remembered = current;
}
+ }
- /**
- * @internal
- * @deprecated This API is ICU internal only.
- */
- @Deprecated
- public LanguageMatcherData addDistance(String desired, String supported, int percent, String comment) {
- return addDistance(desired, supported, percent, false, comment);
- }
- /**
- * @internal
- * @deprecated This API is ICU internal only.
- */
- @Deprecated
- public LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway) {
- return addDistance(desired, supported, percent, oneway, null);
- }
-
- private LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway, String comment) {
- if (DEBUG) {
- System.out.println("\t<languageMatch desired=\"" + desired + "\"" +
- " supported=\"" + supported + "\"" +
- " percent=\"" + percent + "\""
- + (oneway ? " oneway=\"true\"" : "")
- + "/>"
- + (comment == null ? "" : "\t<!-- " + comment + " -->"));
- // // .addDistance("nn", "nb", 4, true)
- // System.out.println(".addDistance(\"" + desired + "\"" +
- // ", \"" + supported + "\"" +
- // ", " + percent + ""
- // + (oneway ? "" : ", true")
- // + (comment == null ? "" : ", \"" + comment + "\"")
- // + ")"
- // );
+ private static final class LocaleLsrIterator extends LsrIterator {
+ private Iterator<Locale> locales;
+ private Locale current, remembered;
- }
- double score = 1-percent/100.0; // convert from percentage
- LocalePatternMatcher desiredMatcher = new LocalePatternMatcher(desired);
- Level desiredLen = desiredMatcher.getLevel();
- LocalePatternMatcher supportedMatcher = new LocalePatternMatcher(supported);
- Level supportedLen = supportedMatcher.getLevel();
- if (desiredLen != supportedLen) {
- throw new IllegalArgumentException("Lengths unequal: " + desired + ", " + supported);
- }
- R3<LocalePatternMatcher,LocalePatternMatcher,Double> data = Row.of(desiredMatcher, supportedMatcher, score);
- R3<LocalePatternMatcher,LocalePatternMatcher,Double> data2 = oneway ? null : Row.of(supportedMatcher, desiredMatcher, score);
- boolean desiredEqualsSupported = desiredMatcher.equals(supportedMatcher);
- switch (desiredLen) {
- case language:
- String dlanguage = desiredMatcher.getLanguage();
- String slanguage = supportedMatcher.getLanguage();
- languageScores.addDataToScores(dlanguage, slanguage, data);
- if (!oneway && !desiredEqualsSupported) {
- languageScores.addDataToScores(slanguage, dlanguage, data2);
- }
- break;
- case script:
- String dscript = desiredMatcher.getScript();
- String sscript = supportedMatcher.getScript();
- scriptScores.addDataToScores(dscript, sscript, data);
- if (!oneway && !desiredEqualsSupported) {
- scriptScores.addDataToScores(sscript, dscript, data2);
- }
- break;
- case region:
- String dregion = desiredMatcher.getRegion();
- String sregion = supportedMatcher.getRegion();
- regionScores.addDataToScores(dregion, sregion, data);
- if (!oneway && !desiredEqualsSupported) {
- regionScores.addDataToScores(sregion, dregion, data2);
- }
- break;
- }
- return this;
+ LocaleLsrIterator(Iterator<Locale> locales) {
+ this.locales = locales;
}
- /**
- * {@inheritDoc}
- * @internal
- * @deprecated This API is ICU internal only.
- */
@Override
- @Deprecated
- public LanguageMatcherData cloneAsThawed() {
- LanguageMatcherData result;
- try {
- result = (LanguageMatcherData) clone();
- result.languageScores = languageScores.cloneAsThawed();
- result.scriptScores = scriptScores.cloneAsThawed();
- result.regionScores = regionScores.cloneAsThawed();
- result.frozen = false;
- return result;
- } catch (CloneNotSupportedException e) {
- throw new ICUCloneNotSupportedException(e); // will never happen
- }
+ public boolean hasNext() {
+ return locales.hasNext();
}
- /**
- * {@inheritDoc}
- * @internal
- * @deprecated This API is ICU internal only.
- */
@Override
- @Deprecated
- public LanguageMatcherData freeze() {
- languageScores.freeze();
- regionScores.freeze();
- scriptScores.freeze();
- matchingLanguages = languageScores.getMatchingLanguages();
- frozen = true;
- return this;
+ public LSR next() {
+ current = locales.next();
+ return getMaximalLsrOrUnd(current);
}
- /**
- * {@inheritDoc}
- * @internal
- * @deprecated This API is ICU internal only.
- */
@Override
- @Deprecated
- public boolean isFrozen() {
- return frozen;
+ public void rememberCurrent(int desiredIndex) {
+ bestDesiredIndex = desiredIndex;
+ remembered = current;
}
}
- LanguageMatcherData matcherData;
- LocalePriorityList languagePriorityList;
+ /**
+ * Returns the supported locale which best matches the desired locale.
+ *
+ * @param desiredLocale Typically a user's language.
+ * @return the best-matching supported locale.
+ * @stable ICU 4.4
+ */
+ public ULocale getBestMatch(ULocale desiredLocale) {
+ LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
+ int suppIndex = getBestSuppIndex(desiredLSR, null);
+ return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale;
+ }
- private static final LanguageMatcherData defaultWritten;
+ /**
+ * Returns the supported locale which best matches one of the desired locales.
+ *
+ * @param desiredLocales Typically a user's languages, in order of preference (descending).
+ * (In ICU 4.4..63 this parameter had type LocalePriorityList.)
+ * @return the best-matching supported locale.
+ * @stable ICU 4.4
+ */
+ public ULocale getBestMatch(Iterable<ULocale> desiredLocales) {
+ Iterator<ULocale> desiredIter = desiredLocales.iterator();
+ if (!desiredIter.hasNext()) {
+ return defaultULocale;
+ }
+ ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter);
+ LSR desiredLSR = lsrIter.next();
+ int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
+ return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale;
+ }
- private static HashMap<String,String> canonicalMap = new HashMap<>();
+ /**
+ * Parses the string like {@link LocalePriorityList} does and
+ * returns the supported locale which best matches one of the desired locales.
+ *
+ * @param desiredLocaleList Typically a user's languages, in order of preference (descending),
+ * as a string which is to be parsed like LocalePriorityList does.
+ * @return the best-matching supported locale.
+ * @stable ICU 4.4
+ */
+ public ULocale getBestMatch(String desiredLocaleList) {
+ return getBestMatch(LocalePriorityList.add(desiredLocaleList).build());
+ }
+ /**
+ * Returns the supported locale which best matches the desired locale.
+ *
+ * @param desiredLocale Typically a user's language.
+ * @return the best-matching supported locale.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Locale getBestLocale(Locale desiredLocale) {
+ LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
+ int suppIndex = getBestSuppIndex(desiredLSR, null);
+ return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
+ }
- static {
- canonicalMap.put("iw", "he");
- canonicalMap.put("mo", "ro");
- canonicalMap.put("tl", "fil");
+ /**
+ * Returns the supported locale which best matches one of the desired locales.
+ *
+ * @param desiredLocales Typically a user's languages, in order of preference (descending).
+ * @return the best-matching supported locale.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Locale getBestLocale(Iterable<Locale> desiredLocales) {
+ Iterator<Locale> desiredIter = desiredLocales.iterator();
+ if (!desiredIter.hasNext()) {
+ return defaultLocale;
+ }
+ LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter);
+ LSR desiredLSR = lsrIter.next();
+ int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
+ return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
+ }
- ICUResourceBundle suppData = getICUSupplementalData();
- ICUResourceBundle languageMatching = suppData.findTopLevel("languageMatching");
- ICUResourceBundle written = (ICUResourceBundle) languageMatching.get("written");
- defaultWritten = new LanguageMatcherData();
+ private Result makeResult(ULocale desiredLocale, ULocaleLsrIterator lsrIter, int suppIndex) {
+ if (suppIndex < 0) {
+ return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
+ } else if (desiredLocale != null) {
+ return new Result(desiredLocale, supportedULocales[suppIndex],
+ null, supportedLocales[suppIndex], 0, suppIndex);
+ } else {
+ return new Result(lsrIter.remembered, supportedULocales[suppIndex],
+ null, supportedLocales[suppIndex], lsrIter.bestDesiredIndex, suppIndex);
+ }
+ }
- for(UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) {
- ICUResourceBundle item = (ICUResourceBundle) iter.next();
- /*
- "*_*_*",
- "*_*_*",
- "96",
- */
- // <languageMatch desired="gsw" supported="de" percent="96" oneway="true" />
- boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3));
- defaultWritten.addDistance(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2)), oneway);
+ private Result makeResult(Locale desiredLocale, LocaleLsrIterator lsrIter, int suppIndex) {
+ if (suppIndex < 0) {
+ return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
+ } else if (desiredLocale != null) {
+ return new Result(null, supportedULocales[suppIndex],
+ desiredLocale, supportedLocales[suppIndex], 0, suppIndex);
+ } else {
+ return new Result(null, supportedULocales[suppIndex],
+ lsrIter.remembered, supportedLocales[suppIndex],
+ lsrIter.bestDesiredIndex, suppIndex);
}
- defaultWritten.freeze();
}
/**
- * @internal
- * @deprecated This API is ICU internal only.
+ * Returns the best match between the desired locale and the supported locales.
+ *
+ * @param desiredLocale Typically a user's language.
+ * @return the best-matching pair of the desired and a supported locale.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Result getBestMatchResult(ULocale desiredLocale) {
+ LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
+ int suppIndex = getBestSuppIndex(desiredLSR, null);
+ return makeResult(desiredLocale, null, suppIndex);
+ }
+
+ /**
+ * Returns the best match between the desired and supported locales.
+ *
+ * @param desiredLocales Typically a user's languages, in order of preference (descending).
+ * @return the best-matching pair of a desired and a supported locale.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
*/
- @Deprecated
- public static ICUResourceBundle getICUSupplementalData() {
- ICUResourceBundle suppData = (ICUResourceBundle) UResourceBundle.getBundleInstance(
- ICUData.ICU_BASE_NAME,
- "supplementalData",
- ICUResourceBundle.ICU_DATA_CLASS_LOADER);
- return suppData;
+ public Result getBestMatchResult(Iterable<ULocale> desiredLocales) {
+ Iterator<ULocale> desiredIter = desiredLocales.iterator();
+ if (!desiredIter.hasNext()) {
+ return makeResult(UND_ULOCALE, null, -1);
+ }
+ ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter);
+ LSR desiredLSR = lsrIter.next();
+ int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
+ return makeResult(null, lsrIter, suppIndex);
}
/**
- * @internal
- * @deprecated This API is ICU internal only.
+ * Returns the best match between the desired locale and the supported locales.
+ *
+ * @param desiredLocale Typically a user's language.
+ * @return the best-matching pair of the desired and a supported locale.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
*/
- @Deprecated
- public static double match(ULocale a, ULocale b) {
- final LocaleMatcher matcher = new LocaleMatcher("");
- return matcher.match(a, matcher.addLikelySubtags(a), b, matcher.addLikelySubtags(b));
+ public Result getBestLocaleResult(Locale desiredLocale) {
+ LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
+ int suppIndex = getBestSuppIndex(desiredLSR, null);
+ return makeResult(desiredLocale, null, suppIndex);
}
- transient XLocaleMatcher xLocaleMatcher = null;
- transient ULocale xDefaultLanguage = null;
- transient boolean xFavorScript = false;
+ /**
+ * Returns the best match between the desired and supported locales.
+ *
+ * @param desiredLocales Typically a user's languages, in order of preference (descending).
+ * @return the best-matching pair of a desired and a supported locale.
+ * @draft ICU 65
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Result getBestLocaleResult(Iterable<Locale> desiredLocales) {
+ Iterator<Locale> desiredIter = desiredLocales.iterator();
+ if (!desiredIter.hasNext()) {
+ return makeResult(UND_LOCALE, null, -1);
+ }
+ LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter);
+ LSR desiredLSR = lsrIter.next();
+ int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
+ return makeResult(null, lsrIter, suppIndex);
+ }
- private synchronized XLocaleMatcher getLocaleMatcher() {
- if (xLocaleMatcher == null) {
- Builder builder = XLocaleMatcher.builder();
- builder.setSupportedULocales(languagePriorityList.getULocales());
- if (xDefaultLanguage != null) {
- builder.setDefaultULocale(xDefaultLanguage);
+ /**
+ * @param desiredLSR The first desired locale's LSR.
+ * @param remainingIter Remaining desired LSRs, null or empty if none.
+ * @return the index of the best-matching supported locale, or -1 if there is no good match.
+ */
+ private int getBestSuppIndex(LSR desiredLSR, LsrIterator remainingIter) {
+ int desiredIndex = 0;
+ int bestSupportedLsrIndex = -1;
+ for (int bestDistance = thresholdDistance;;) {
+ // Quick check for exact maximized LSR.
+ Integer index = supportedLsrToIndex.get(desiredLSR);
+ if (index != null) {
+ int suppIndex = index;
+ if (TRACE_MATCHER) {
+ System.err.printf("Returning %s: desiredLSR=supportedLSR\n",
+ supportedULocales[suppIndex]);
+ }
+ if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
+ return suppIndex;
}
- if (xFavorScript) {
- builder.setFavorSubtag(FavorSubtag.SCRIPT);
+ int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
+ desiredLSR, supportedLsrs, bestDistance, favorSubtag);
+ if (bestIndexAndDistance >= 0) {
+ bestDistance = bestIndexAndDistance & 0xff;
+ if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
+ bestSupportedLsrIndex = bestIndexAndDistance >> 8;
}
- xLocaleMatcher = builder.build();
+ if ((bestDistance -= demotionPerDesiredLocale) <= 0) {
+ break;
+ }
+ if (remainingIter == null || !remainingIter.hasNext()) {
+ break;
+ }
+ desiredLSR = remainingIter.next();
+ }
+ if (bestSupportedLsrIndex < 0) {
+ if (TRACE_MATCHER) {
+ System.err.printf("Returning default %s: no good match\n", defaultULocale);
+ }
+ return -1;
}
- return xLocaleMatcher;
+ int suppIndex = supportedIndexes[bestSupportedLsrIndex];
+ if (TRACE_MATCHER) {
+ System.err.printf("Returning %s: best matching supported locale\n",
+ supportedULocales[suppIndex]);
+ }
+ return suppIndex;
}
/**
- * Get the best match between the desired languages and supported languages
- * This supports the new CLDR syntax to provide for better matches within
- * regional clusters (such as maghreb Arabic vs non-maghreb Arabic, or regions that use en-GB vs en-US)
- * and also matching between regions and macroregions, such as comparing es-419 to es-AR).
- * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
- * @param outputBestDesired The one of the desired languages that matched best.
- * Set to null if the best match was not below the threshold distance.
- * @return best-match supported language
- * @internal
- * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
+ * Returns a fraction between 0 and 1, where 1 means that the languages are a
+ * perfect match, and 0 means that they are completely different.
+ *
+ * <p>This is mostly an implementation detail, and the precise values may change over time.
+ * The implementation may use either the maximized forms or the others ones, or both.
+ * The implementation may or may not rely on the forms to be consistent with each other.
+ *
+ * <p>Callers should construct and use a matcher rather than match pairs of locales directly.
+ *
+ * @param desired Desired locale.
+ * @param desiredMax Maximized locale (using likely subtags).
+ * @param supported Supported locale.
+ * @param supportedMax Maximized locale (using likely subtags).
+ * @return value between 0 and 1, inclusive.
+ * @deprecated ICU 65 Build and use a matcher rather than comparing pairs of locales.
*/
@Deprecated
- public ULocale getBestMatch(LinkedHashSet<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) {
- if (outputBestDesired == null) {
- return getLocaleMatcher().getBestMatch(desiredLanguages);
- } else {
- XLocaleMatcher.Result result = getLocaleMatcher().getBestMatchResult(desiredLanguages);
- outputBestDesired.value = result.getDesiredULocale();
- return result.getSupportedULocale();
- }
+ public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
+ // Returns the inverse of the distance: That is, 1-distance(desired, supported).
+ int distance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
+ XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired),
+ new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) },
+ thresholdDistance, favorSubtag) & 0xff;
+ return (100 - distance) / 100.0;
}
/**
- * Set the default language, with null = default = first supported language
- * @param defaultLanguage Language to use in case the threshold for distance is exceeded.
- * @return this, for chaining
- * @internal
- * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
+ * Partially canonicalizes a locale (language). Note that for now, it is canonicalizing
+ * according to CLDR conventions (he vs iw, etc), since that is what is needed
+ * for likelySubtags.
+ *
+ * <p>Currently, this is a much simpler canonicalization than what the ULocale class does:
+ * The language/script/region subtags are each mapped separately, ignoring the other subtags.
+ * If none of these change, then the input locale is returned.
+ * Otherwise a new ULocale with only those subtags is returned, removing variants and extensions.
+ *
+ * @param locale language/locale code
+ * @return ULocale with remapped subtags.
+ * @stable ICU 4.4
*/
- @Deprecated
- public synchronized LocaleMatcher setDefaultLanguage(ULocale defaultLanguage) {
- this.xDefaultLanguage = defaultLanguage;
- xLocaleMatcher = null;
- return this;
+ public ULocale canonicalize(ULocale locale) {
+ return XLikelySubtags.INSTANCE.canonicalize(locale);
}
/**
- * If true, then the language differences are smaller than than script differences.
- * This is used in situations (such as maps) where it is better to fall back to the same script than a similar language.
- * @param favorScript Set to true to treat script as most important.
- * @return this, for chaining.
- * @internal
- * @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
+ * {@inheritDoc}
+ * @stable ICU 4.4
*/
- @Deprecated
- public synchronized LocaleMatcher setFavorScript(boolean favorScript) {
- this.xFavorScript = favorScript;
- xLocaleMatcher = null;
- return this;
+ @Override
+ public String toString() {
+ StringBuilder s = new StringBuilder().append("{LocaleMatcher");
+ if (supportedULocales.length > 0) {
+ s.append(" supported={").append(supportedULocales[0].toString());
+ for (int i = 1; i < supportedULocales.length; ++i) {
+ s.append(", ").append(supportedULocales[i].toString());
+ }
+ s.append('}');
+ }
+ s.append(" default=").append(Objects.toString(defaultULocale));
+ if (favorSubtag != null) {
+ s.append(" distance=").append(favorSubtag.toString());
+ }
+ if (thresholdDistance >= 0) {
+ s.append(String.format(" threshold=%d", thresholdDistance));
+ }
+ s.append(String.format(" demotion=%d", demotionPerDesiredLocale));
+ return s.append('}').toString();
}
}
<path id="javac.classpathref.core-tests">
<pathelement location="${icu4j.core.jar}"/>
<pathelement location="${icu4j.test-framework.jar}"/>
+ <pathelement location="${icu4j.tools.jar}"/>
</path>
- <target name="_all.core-tests" depends="_all.core, _all.test-framework">
+ <target name="_all.core-tests" depends="_all.core, _all.test-framework, _all.tools">
<ant dir="${icu4j.core-tests.dir}" inheritAll="false"/>
</target>
<pathelement location="${icu4j.collate.jar}"/>
<pathelement location="${icu4j.translit.jar}"/>
<pathelement location="${icu4j.test-framework.jar}"/>
- <pathelement location="${icu4j.core-tests.jar}"/>
- <pathelement location="${icu4j.translit-tests.jar}"/>
</path>
- <target name="_all.tools" depends="_all.core, _all.collate, _all.translit, _all.test-framework, _all.core-tests, _all.translit-tests">
+ <target name="_all.tools" depends="_all.core, _all.collate, _all.translit, _all.test-framework">
<ant dir="${icu4j.tools.dir}" inheritAll="false"/>
</target>
version https://git-lfs.github.com/spec/v1
-oid sha256:bd004f5d8064e047cef4f7d31326b39b7fc43fba685fab2f0d23c154f4dbc637
-size 12818511
+oid sha256:b21585ec768edea7b099bd6a97b0a4130b53966a63e6a10de2f31b22f8b59fbd
+size 12840921
<attribute name="javadoc_location" value="jar:platform:/resource/external-libraries/JUnitParams-1.0.5-javadoc.jar!/"/>
</attributes>
</classpathentry>
+ <classpathentry kind="src" path="/icu4j-tools"/>
<classpathentry kind="output" path="out/bin"/>
</classpath>
<project>icu4j-regiondata</project>
<project>icu4j-shared</project>
<project>icu4j-test-framework</project>
+ <project>icu4j-tools</project>
</projects>
<buildSpec>
<buildCommand>
*
*/
-package com.ibm.icu.dev.tool.serializable;
+package com.ibm.icu.dev.test.serializable;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.Iterator;
import java.util.List;
-import com.ibm.icu.dev.test.serializable.SerializableTestUtility;
import com.ibm.icu.impl.URLHandler;
/**
* and lists all those classes that implement <code>Serializable</code>. It also checks
* to make sure that those classes have the <code>serialVersionUID</code>
* field define.
- *
+ *
*/
public class SerializableChecker implements URLHandler.URLVisitor
{
private static Class serializable;
//private static Class throwable;
-
+
private String path = null;
-
+
//private boolean write;
-
+
public SerializableChecker(String path)
{
this.path = path;
-
+
if (path != null) {
File dir = new File(path);
-
+
if (!dir.exists()) {
dir.mkdirs();
}
}
}
-
+
static {
- try {
+ try {
serializable = Class.forName("java.io.Serializable");
//throwable = Class.forName("java.lang.Throwable");
} catch (Exception e) {
System.out.println("Woops! Can't get class info for Serializable and Throwable.");
}
}
-
+
private void writeFile(String className, byte bytes[])
{
File file = new File(path + File.separator + className + ".dat");
- FileOutputStream stream;
-
- try {
- stream = new FileOutputStream(file);
-
+
+ try (FileOutputStream stream = new FileOutputStream(file)) {
stream.write(bytes);
stream.close();
} catch (Exception e) {
System.out.print(" - can't write file!");
}
}
-
+
+ @Override
public void visit(String str)
{
int ix = str.lastIndexOf(".class");
-
+
if (ix >= 0) {
String className = "com.ibm.icu" + str.substring(0, ix).replace('/', '.');
-
+
// Skip things in com.ibm.icu.dev; they're not relevant.
if (className.startsWith("com.ibm.icu.dev.")) {
return;
}
-
+
try {
Class c = Class.forName(className);
int m = c.getModifiers();
-
+
if (serializable.isAssignableFrom(c) /*&&
(! throwable.isAssignableFrom(c) || c.getDeclaredFields().length > 0)*/) {
//Field uid;
-
+
System.out.print(className + " (" + Modifier.toString(m) + ") - ");
-
- if(!Modifier.isInterface(m)){
+
+ if(!Modifier.isInterface(m)){
try {
/* uid = */
c.getDeclaredField("serialVersionUID");
System.out.print("no serialVersionUID - ");
}
}
-
+
if (Modifier.isPublic(m)) {
SerializableTestUtility.Handler handler = SerializableTestUtility.getHandler(className);
-
+
if (!Modifier.isInterface(m) && handler != null) {
Object objectsOut[] = handler.getTestObjects();
Object objectsIn[];
boolean passed = true;
-
+
ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
ObjectOutputStream out = new ObjectOutputStream(byteOut);
-
+
try {
out.writeObject(objectsOut);
out.close();
System.out.println("Eror writing test objects:" + e.toString());
return;
}
-
+
if (path != null) {
writeFile(className, byteOut.toByteArray());
}
-
+
ByteArrayInputStream byteIn = new ByteArrayInputStream(byteOut.toByteArray());
ObjectInputStream in = new ObjectInputStream(byteIn);
-
+
try {
objectsIn = (Object[]) in.readObject();
in.close();
System.out.println("Object " + i + " failed behavior test.");
}
}
-
+
if (passed) {
System.out.print("test passed.");
}
}
}
}
-
+
System.out.println();
}
} catch (Exception e) {
{
List argList = Arrays.asList(args);
String path = null;
-
+
for (Iterator it = argList.iterator(); it.hasNext(); /*anything?*/) {
String arg = (String) it.next();
-
+
if (arg.equals("-w")) {
if (it.hasNext()) {
path = (String) it.next();
System.out.println("Missing directory name on -w command.");
}
} else {
-
-
+
+
try {
//URL jarURL = new URL("jar:file:/dev/eclipse/workspace/icu4j/icu4j.jar!/com/ibm/icu");
//URL fileURL = new URL("file:/dev/eclipse/workspace/icu4j/classes/com/ibm/icu");
URL url = new URL(arg);
URLHandler handler = URLHandler.get(url);
SerializableChecker checker = new SerializableChecker(path);
-
+
System.out.println("Checking classes from " + arg + ":");
handler.guide(checker, true, false);
} catch (Exception e) {
import org.junit.runners.JUnit4;
import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.dev.tool.locale.LocaleDistanceBuilder;
import com.ibm.icu.impl.locale.LocaleDistance;
-import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
import com.ibm.icu.util.LocaleMatcher;
+import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
* @author markdavis
*/
@RunWith(JUnit4.class)
-public class XLocaleDistanceTest extends TestFmwk {
+public class LocaleDistanceTest extends TestFmwk {
private static final boolean REFORMAT = false; // set to true to get a reformatted data file listed
private LocaleDistance localeDistance = LocaleDistance.INSTANCE;
DataDrivenTestHelper tfh = new MyTestFileHandler()
.setFramework(this)
- .load(XLocaleDistanceTest.class, "data/localeDistanceTest.txt");
+ .load(LocaleDistanceTest.class, "data/localeDistanceTest.txt");
static class Arguments {
final ULocale desired;
}
}
+ @Test
+ public void testLoadedDataSameAsBuiltFromScratch() {
+ LocaleDistance.Data built = LocaleDistanceBuilder.build();
+ LocaleDistance.Data loaded = LocaleDistance.Data.load();
+ assertEquals("run LocaleDistanceBuilder and update ICU4C langInfo.txt", built, loaded);
+ }
+
@SuppressWarnings("unused")
@Ignore("Disabled because of Linux; need to investigate.")
@Test
+++ /dev/null
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-/*
- *******************************************************************************
- * Copyright (C) 2015, Google, Inc., International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- */
-package com.ibm.icu.dev.test.util;
-
-import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData;
-
-/**
- * @author markdavis
- *
- */
-public class LocaleMatcherShim {
- public static LanguageMatcherData load() {
- // In CLDR, has different value
- return null;
- }
-}
package com.ibm.icu.dev.test.util;
-import java.util.Arrays;
-import java.util.LinkedHashSet;
+import java.io.BufferedReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.junit.Test;
import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.dev.tool.locale.LikelySubtagsBuilder;
+import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
+import com.ibm.icu.impl.locale.XLikelySubtags;
import com.ibm.icu.util.LocaleMatcher;
-import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData;
+import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
import com.ibm.icu.util.LocalePriorityList;
-import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
+import junitparams.JUnitParamsRunner;
+import junitparams.Parameters;
+
/**
* Test the LocaleMatcher.
*
* @author markdavis
*/
-@SuppressWarnings("deprecation")
-@RunWith(JUnit4.class)
+@RunWith(JUnitParamsRunner.class)
public class LocaleMatcherTest extends TestFmwk {
-
-
private static final ULocale ZH_MO = new ULocale("zh_MO");
private static final ULocale ZH_HK = new ULocale("zh_HK");
- static LanguageMatcherData LANGUAGE_MATCHER_DATA = LocaleMatcherShim.load();
private LocaleMatcher newLocaleMatcher(LocalePriorityList build) {
- return new LocaleMatcher(build, LANGUAGE_MATCHER_DATA);
- }
-
- private LocaleMatcher newLocaleMatcher(LocalePriorityList build, LanguageMatcherData data) {
- return new LocaleMatcher(build, data == null ? LANGUAGE_MATCHER_DATA : data);
- }
-
- private LocaleMatcher newLocaleMatcher(LocalePriorityList lpl, LanguageMatcherData data, double d) {
- return new LocaleMatcher(lpl, data == null ? LANGUAGE_MATCHER_DATA : data, d);
+ return new LocaleMatcher(build);
}
private LocaleMatcher newLocaleMatcher(String string) {
- return new LocaleMatcher(LocalePriorityList.add(string).build(), LANGUAGE_MATCHER_DATA);
+ return new LocaleMatcher(LocalePriorityList.add(string).build());
}
- // public LocaleMatcher(LocalePriorityList languagePriorityList,
- // LocaleMatcherData matcherData, double threshold)
-
@Test
public void testParentLocales() {
assertCloser("es_AR", "es_419", "es_ES");
assertEquals("test " + a + " is closer to " + closer + " than to " + further, new ULocale(closer), matcher.getBestMatch(a));
}
- // public void testParentLocales() {
- // // find all the regions that have a closer relation because of an explicit parent
- // Set<String> explicitParents = new HashSet<>(INFO.getExplicitParents());
- // explicitParents.remove("root");
- // Set<String> otherParents = new HashSet<>(INFO.getExplicitParents());
- // for (String locale : explicitParents) {
- // while (true) {
- // locale = LocaleIDParser.getParent(locale);
- // if (locale == null || locale.equals("root")) {
- // break;
- // }
- // otherParents.add(locale);
- // }
- // }
- // otherParents.remove("root");
- //
- // for (String locale : CONFIG.getCldrFactory().getAvailable()) {
- // String parentId = LocaleIDParser.getParent(locale);
- // String parentIdSimple = LocaleIDParser.getSimpleParent(locale);
- // if (!explicitParents.contains(parentId) && !otherParents.contains(parentIdSimple)) {
- // continue;
- // }
- // System.out.println(locale + "\t" + CONFIG.getEnglish().getName(locale) + "\t" + parentId + "\t" + parentIdSimple);
- // }
- // }
-
@Test
public void testChinese() {
LocaleMatcher matcher = newLocaleMatcher("zh_CN, zh_TW, iw");
@Test
public void testFallbacks() {
LocalePriorityList lpl = LocalePriorityList.add("en, hi").build();
- final LocaleMatcher matcher = newLocaleMatcher(lpl, null, 0.09);
+ final LocaleMatcher matcher = newLocaleMatcher(lpl);
assertEquals("hi", matcher.getBestMatch("sa").toString());
}
- @Test
- public void testOverrideData() {
- double threshold = 0.05;
- LanguageMatcherData localeMatcherData = new LanguageMatcherData()
- .addDistance("br", "fr", 10, true)
- .addDistance("es", "cy", 10, true);
- logln(localeMatcherData.toString());
-
- final LocaleMatcher matcher = newLocaleMatcher(
- LocalePriorityList
- .add(ULocale.ENGLISH)
- .add(ULocale.FRENCH)
- .add(ULocale.UK)
- .build(), localeMatcherData, threshold);
- logln(matcher.toString());
-
- assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br")));
- assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one
- // way
- }
-
@Test
public void testBasics() {
final LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(ULocale.FRENCH).add(ULocale.UK)
assertEquals(new ULocale("zh_CN"), matcher.getBestMatch("zh"));
assertEquals(new ULocale("zh_CN"), matcher.getBestMatch("zh_Hans_CN"));
assertEquals(new ULocale("zh_TW"), matcher.getBestMatch("zh_Hant_HK"));
- assertEquals(new ULocale("he"), matcher.getBestMatch("iw_IT"));
+ assertEquals(new ULocale("iw"), matcher.getBestMatch("iw_IT"));
}
@Test
@Test
public void TestLocaleMatcherCoverage() {
// Add tests for better code coverage
- LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(null, 0).build(), null);
+ LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(null, 0).build());
logln(matcher.toString());
-
- LanguageMatcherData data = new LanguageMatcherData();
-
- LanguageMatcherData clone = data.cloneAsThawed();
-
- if (clone.equals(data)) {
- errln("Error cloneAsThawed() is equal.");
- }
-
- if (data.isFrozen()) {
- errln("Error LocaleMatcherData is frozen!");
- }
}
private void assertEquals(Object expected, Object string) {
static final ULocale ENGLISH_CANADA = new ULocale("en_CA");
+ private static double match(ULocale a, ULocale b) {
+ final LocaleMatcher matcher = new LocaleMatcher("");
+ return matcher.match(a, null, b, null);
+ }
+
@Test
public void testMatch_exact() {
- assertEquals(1.0,
- LocaleMatcher.match(ENGLISH_CANADA, ENGLISH_CANADA));
+ assertEquals(1.0, match(ENGLISH_CANADA, ENGLISH_CANADA));
}
@Test
public void testMatch_none() {
- double match = LocaleMatcher.match(
- new ULocale("ar_MK"),
- ENGLISH_CANADA);
+ double match = match(new ULocale("ar_MK"), ENGLISH_CANADA);
assertTrue("Actual < 0: " + match, 0 <= match);
assertTrue("Actual > 0.15 (~ language + script distance): " + match, 0.2 > match);
}
public void testMatch_matchOnMazimized() {
ULocale undTw = new ULocale("und_TW");
ULocale zhHant = new ULocale("zh_Hant");
- double matchZh = LocaleMatcher.match(undTw, new ULocale("zh"));
- double matchZhHant = LocaleMatcher.match(undTw, zhHant);
+ double matchZh = match(undTw, new ULocale("zh"));
+ double matchZhHant = match(undTw, zhHant);
assertTrue("und_TW should be closer to zh_Hant (" + matchZhHant +
") than to zh (" + matchZh + ")",
matchZh < matchZhHant);
- double matchEnHantTw = LocaleMatcher.match(new ULocale("en_Hant_TW"),
- zhHant);
+ double matchEnHantTw = match(new ULocale("en_Hant_TW"), zhHant);
assertTrue("zh_Hant should be closer to und_TW (" + matchZhHant +
") than to en_Hant_TW (" + matchEnHantTw + ")",
matchEnHantTw < matchZhHant);
assertEquals("it", matcher.getBestMatch("en").toString());
}
- // public void testGetBestMatch_emptyList() {
- // final LocaleMatcher matcher = newLocaleMatcher(
- // new LocalePriorityList(new HashMap()));
- // assertNull(matcher.getBestMatch(ULocale.ENGLISH));
- // }
-
@Test
public void testGetBestMatch_googlePseudoLocales() {
// Google pseudo locales are primarily based on variant subtags.
- // See http://sites/intl_eng/pseudo_locales.
// (See below for the region code based fall back options.)
final LocaleMatcher matcher = newLocaleMatcher(
"fr, pt");
check2(sorted);
}
+ private static final ULocale posix = new ULocale("en_US_POSIX");
+
/**
* @param sorted
*/
private void check2(Set<ULocale> sorted) {
- // TODO Auto-generated method stub
logln("Checking: " + sorted);
LocaleMatcher matcher = newLocaleMatcher(
LocalePriorityList.add(
sorted.toArray(new ULocale[sorted.size()]))
- .build());
+ .build());
for (ULocale loc : sorted) {
- String stringLoc = loc.toString();
- assertEquals(stringLoc, matcher.getBestMatch(stringLoc).toString());
+ // The result may not be the exact same locale, but it must be equivalent.
+ // Variants and extensions are ignored.
+ if (loc.equals(posix)) { continue; }
+ ULocale max = ULocale.addLikelySubtags(loc);
+ ULocale best = matcher.getBestMatch(loc);
+ ULocale maxBest = ULocale.addLikelySubtags(best);
+ assertEquals(loc.toString(), max, maxBest);
}
}
}
- // public void testComputeDistance_monkeyTest() {
- // RegionCode[] codes = RegionCode.values();
- // Random random = new Random();
- // for (int i = 0; i < 1000; ++i) {
- // RegionCode x = codes[random.nextInt(codes.length)];
- // RegionCode y = codes[random.nextInt(codes.length)];
- // double d = LocaleMatcher.getRegionDistance(x, y, null, null);
- // if (x == RegionCode.ZZ || y == RegionCode.ZZ) {
- // assertEquals(LocaleMatcher.REGION_DISTANCE, d);
- // } else if (x == y) {
- // assertEquals(0.0, d);
- // } else {
- // assertTrue(d > 0);
- // assertTrue(d <= LocaleMatcher.REGION_DISTANCE);
- // }
- // }
- // }
-
@Test
public void testGetBestMatchForList_matchOnMaximized2() {
-// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
-// return;
-// }
final LocaleMatcher matcher = newLocaleMatcher("fr, en-GB, ja, es-ES, es-MX");
// ja-JP matches ja on likely subtags, and it's listed first, thus it wins over
// thus it wins over the second preference en-GB.
@Test
public void testGetBestMatchForList_closeEnoughMatchOnMaximized() {
-// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
-// return;
-// }
final LocaleMatcher matcher = newLocaleMatcher("en-GB, en, de, fr, ja");
assertEquals("de", matcher.getBestMatch("de-CH, fr").toString());
assertEquals("en", matcher.getBestMatch("en-US, ar, nl, de, ja").toString());
@Test
public void testGetBestMatchForPortuguese() {
-
-// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
-// return;
-// }
-
final LocaleMatcher withPTExplicit = newLocaleMatcher("pt_PT, pt_BR, es, es_419");
final LocaleMatcher withPTImplicit = newLocaleMatcher("pt_PT, pt, es, es_419");
// Could happen because "pt_BR" is a tier_1 language and "pt_PT" is tier_2.
final LocaleMatcher withoutPT = newLocaleMatcher("pt_BR, es, es_419");
- // European user who prefers Spanish over Brazillian Portuguese as a fallback.
+ // European user who prefers Spanish over Brazilian Portuguese as a fallback.
assertEquals("pt_PT", withPTExplicit.getBestMatch("pt_PT, es, pt").toString());
assertEquals("pt_PT", withPTImplicit.getBestMatch("pt_PT, es, pt").toString());
- assertEquals("es", withoutPT.getBestMatch("pt_PT, es, pt").toString());
+ // The earlier pt_PT vs. pt_BR region mismatch is as good as the later es perfect match
+ // because of the demotion per desired locale.
+ assertEquals("pt_BR", withoutPT.getBestMatch("pt_PT, es, pt").toString());
- // Brazillian user who prefers South American Spanish over European Portuguese as a fallback.
+ // Brazilian user who prefers South American Spanish over European Portuguese as a fallback.
// The asymmetry between this case and above is because it's "pt_PT" that's missing between the
// matchers as "pt_BR" is a much more common language.
assertEquals("pt_BR", withPTExplicit.getBestMatch("pt, es_419, pt_PT").toString());
@Test
public void testVariantWithScriptMatch() {
-// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
-// return;
-// }
final LocaleMatcher matcher = newLocaleMatcher("fr, en, sv");
assertEquals("en", matcher.getBestMatch("en-GB").toString());
assertEquals("en", matcher.getBestMatch("en-GB, sv").toString());
@Test
public void testVariantWithScriptMatch2() {
-// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
-// return;
-// }
final LocaleMatcher matcher = newLocaleMatcher("en, sv");
assertEquals("en", matcher.getBestMatch("en-GB, sv").toString());
}
@Test
- public void testPerf() {
- if (LANGUAGE_MATCHER_DATA == null) {
- return; // skip except when testing data
+ public void Test8288() {
+ final LocaleMatcher matcher = newLocaleMatcher("it, en");
+ assertEquals("it", matcher.getBestMatch("und").toString());
+ assertEquals("en", matcher.getBestMatch("und, en").toString());
+ }
+
+ @Test
+ public void testDemotion() {
+ LocalePriorityList supported = LocalePriorityList.add("fr, de-CH, it").build();
+ LocalePriorityList desired = LocalePriorityList.add("fr-CH, de-CH, it").build();
+ LocaleMatcher noDemotion = LocaleMatcher.builder().
+ setSupportedULocales(supported.getULocales()).
+ setDemotionPerDesiredLocale(LocaleMatcher.Demotion.NONE).build();
+ assertEquals("no demotion", new ULocale("de-CH"), noDemotion.getBestMatch(desired));
+
+ LocaleMatcher regionDemotion = LocaleMatcher.builder().
+ setSupportedULocales(supported.getULocales()).
+ setDemotionPerDesiredLocale(LocaleMatcher.Demotion.REGION).build();
+ assertEquals("region demotion", ULocale.FRENCH, regionDemotion.getBestMatch(desired));
+ }
+
+ private static final class PerfCase {
+ ULocale desired;
+ ULocale expectedShort;
+ ULocale expectedLong;
+ ULocale expectedVeryLong;
+
+ PerfCase(String des, String expShort, String expLong, String expVeryLong) {
+ desired = new ULocale(des);
+ expectedShort = new ULocale(expShort);
+ expectedLong = new ULocale(expLong);
+ expectedVeryLong = new ULocale(expVeryLong);
}
- final String desired = "sv, en";
-
- final LocaleMatcher matcherShort = newLocaleMatcher(desired);
- final LocaleMatcher matcherLong = newLocaleMatcher("af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu");
- final LocaleMatcher matcherVeryLong = newLocaleMatcher("af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_001, en_150, en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA");
-
- //LocaleMatcher.DEBUG = true;
- ULocale expected = new ULocale("sv");
- assertEquals(expected, matcherShort.getBestMatch(desired));
- assertEquals(expected, matcherLong.getBestMatch(desired));
- assertEquals(expected, matcherVeryLong.getBestMatch(desired));
- //LocaleMatcher.DEBUG = false;
-
- for (int i = 0; i < 2; ++i) {
- int iterations = i == 0 ? 1000 : 100000;
- boolean showMessage = i != 0;
- long timeShort = timeLocaleMatcher("Duration (few supported):\t", desired, matcherShort, showMessage, iterations, 0);
- @SuppressWarnings("unused")
- long timeMedium = timeLocaleMatcher("Duration (med. supported):\t", desired, matcherLong, showMessage, iterations, timeShort);
- @SuppressWarnings("unused")
- long timeLong = timeLocaleMatcher("Duration (many supported):\t", desired, matcherVeryLong, showMessage, iterations, timeShort);
+ }
+
+ private static final int WARM_UP_ITERATIONS = 1000;
+ private static final int BENCHMARK_ITERATIONS = 20000;
+
+ @Test
+ public void testPerf() {
+ final String shortList = "en, sv";
+ final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
+ "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
+ "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
+ "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
+ "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
+ "zh-CN, zh-TW, zu";
+ final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, " +
+ "ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, " +
+ "ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, " +
+ "ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, " +
+ "ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, " +
+ "bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, " +
+ "bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, " +
+ "bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, " +
+ "ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, " +
+ "cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, " +
+ "de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, " +
+ // removed en_001 to avoid exact match
+ "ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_150, " +
+ "en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, " +
+ "en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, " +
+ "en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, " +
+ "en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, " +
+ "en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, " +
+ "en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, " +
+ "en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, " +
+ "en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, " +
+ "en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, " +
+ "en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, " +
+ "es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, " +
+ "es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, " +
+ "et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, " +
+ "ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, " +
+ "fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, " +
+ "fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, " +
+ "fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, " +
+ "fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, " +
+ "fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, " +
+ "gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, " +
+ "he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, " +
+ "id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, " +
+ "jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, " +
+ "kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, " +
+ "kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, " +
+ "ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, " +
+ "lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, " +
+ "lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, " +
+ "lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, " +
+ "mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, " +
+ "ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, " +
+ "nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, " +
+ "nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, " +
+ "nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, " +
+ "pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, " +
+ "pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, " +
+ "rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, " +
+ "ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, " +
+ "sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, " +
+ "shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, " +
+ "smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, " +
+ "sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, " +
+ "sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, " +
+ "sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, " +
+ "teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, " +
+ "tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, " +
+ "uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, " +
+ "vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, " +
+ "wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, " +
+ "zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, " +
+ "zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
+
+ final LocaleMatcher matcherShort = newLocaleMatcher(shortList);
+ final LocaleMatcher matcherLong = newLocaleMatcher(longList);
+ final LocaleMatcher matcherVeryLong = newLocaleMatcher(veryLongList);
+
+ PerfCase[] pcs = new PerfCase[] {
+ // Exact match in all matchers.
+ new PerfCase("sv", "sv", "sv", "sv"),
+ // Common locale, exact match only in very long list.
+ new PerfCase("fr_CA", "en", "fr", "fr_CA"),
+ // Unusual locale, no exact match.
+ new PerfCase("de_CA", "en", "de", "de"),
+ // World English maps to several region partitions.
+ new PerfCase("en_001", "en", "en", "en"),
+ // Ancient language with interesting subtags.
+ new PerfCase("egy_Copt_CY", "en", "af", "af")
+ };
+
+ for (PerfCase pc : pcs) {
+ final ULocale desired = pc.desired;
+
+ assertEquals(desired.toString(), pc.expectedShort, matcherShort.getBestMatch(desired));
+ assertEquals(desired.toString(), pc.expectedLong, matcherLong.getBestMatch(desired));
+ assertEquals(desired.toString(), pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired));
+
+ timeLocaleMatcher(desired, matcherShort, WARM_UP_ITERATIONS);
+ timeLocaleMatcher(desired, matcherLong, WARM_UP_ITERATIONS);
+ timeLocaleMatcher(desired, matcherVeryLong, WARM_UP_ITERATIONS);
+ long tns = timeLocaleMatcher(desired, matcherShort, BENCHMARK_ITERATIONS);
+ System.out.format("New Duration (few supported):\t%s\t%d\tnanos\n", desired, tns);
+ long tnl = timeLocaleMatcher(desired, matcherLong, BENCHMARK_ITERATIONS);
+ System.out.format("New Duration (med. supported):\t%s\t%d\tnanos\n", desired, tnl);
+ long tnv = timeLocaleMatcher(desired, matcherVeryLong, BENCHMARK_ITERATIONS);
+ System.out.format("New Duration (many supported):\t%s\t%d\tnanos\n", desired, tnv);
}
+
+ maximizePerf();
}
- private long timeLocaleMatcher(String title, String desired, LocaleMatcher matcher,
- boolean showmessage, int iterations, long comparisonTime) {
+ private static long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) {
long start = System.nanoTime();
for (int i = iterations; i > 0; --i) {
matcher.getBestMatch(desired);
}
long delta = System.nanoTime() - start;
- if (showmessage) warnln(title + (delta / iterations) + " nanos, "
- + (comparisonTime > 0 ? (delta * 100 / comparisonTime - 100) + "% longer" : ""));
- return delta;
+ return (delta / iterations);
+ }
+
+ private void maximizePerf() {
+ final String tags = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
+ "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
+ "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
+ "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
+ "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
+ "zh-CN, zh-TW, zu";
+ LocalePriorityList list = LocalePriorityList.add(tags).build();
+ int few = 1000;
+ long t = timeMaximize(list, few); // warm up
+ t = timeMaximize(list, few); // measure for scale
+ long targetTime = 100000000L; // 10^8 ns = 0.1s
+ int iterations = (int)((targetTime * few) / t);
+ t = timeMaximize(list, iterations);
+ int length = 0;
+ for (@SuppressWarnings("unused") ULocale locale : list) { ++length; }
+ System.out.println("maximize: " + (t / iterations / length) + " ns/locale: " +
+ t + " ns / " + iterations + " iterations / " + length + " locales");
+ }
+
+ // returns total ns not per iteration
+ private static long timeMaximize(Iterable<ULocale> list, int iterations) {
+ long start = System.nanoTime();
+ for (int i = iterations; i > 0; --i) {
+ for (ULocale locale : list) {
+ XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
+ }
+ }
+ return System.nanoTime() - start;
}
@Test
- public void Test8288() {
- final LocaleMatcher matcher = newLocaleMatcher("it, en");
- assertEquals("it", matcher.getBestMatch("und").toString());
- assertEquals("en", matcher.getBestMatch("und, en").toString());
+ public void testLikelySubtagsLoadedDataSameAsBuiltFromScratch() {
+ XLikelySubtags.Data built = LikelySubtagsBuilder.build();
+ XLikelySubtags.Data loaded = XLikelySubtags.Data.load();
+ assertEquals("run LocaleDistanceBuilder and update ICU4C langInfo.txt", built, loaded);
}
- @Test
- public void TestTechPreview() {
- final LocaleMatcher matcher = newLocaleMatcher("it, en, ru");
- ULocale und = new ULocale("und");
- ULocale bulgarian = new ULocale("bg");
- ULocale russian = new ULocale("ru");
+ private static final class TestCase implements Cloneable {
+ private static final String ENDL = System.getProperties().getProperty("line.separator");
+
+ int lineNr = 0;
+
+ String nameLine = "";
+ String supportedLine = "";
+ String defaultLine = "";
+ String distanceLine = "";
+ String thresholdLine = "";
+ String matchLine = "";
+
+ String supported = "";
+ String def = "";
+ String favor = "";
+ String threshold = "";
+ String desired = "";
+ String expMatch = "";
+ String expDesired = "";
+ String expCombined = "";
+
+ @Override
+ public TestCase clone() throws CloneNotSupportedException {
+ return (TestCase) super.clone();
+ }
+
+ void reset(String newNameLine) {
+ nameLine = newNameLine;
+ supportedLine = "";
+ defaultLine = "";
+ distanceLine = "";
+ thresholdLine = "";
+
+ supported = "";
+ def = "";
+ favor = "";
+ threshold = "";
+ }
+
+ String toInputsKey() {
+ return supported + '+' + def + '+' + favor + '+' + threshold + '+' + desired;
+ }
+
+ private static void appendLine(StringBuilder sb, String line) {
+ if (!line.isEmpty()) {
+ sb.append(ENDL).append(line);
+ }
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(nameLine);
+ appendLine(sb, supportedLine);
+ appendLine(sb, defaultLine);
+ appendLine(sb, distanceLine);
+ appendLine(sb, thresholdLine);
+ sb.append(ENDL).append("line ").append(lineNr).append(':');
+ appendLine(sb, matchLine);
+ return sb.toString();
+ }
+ }
- Output<ULocale> outputBestDesired = new Output<>();
+ private static String getSuffixAfterPrefix(String s, int limit, String prefix) {
+ if (prefix.length() <= limit && s.startsWith(prefix)) {
+ return s.substring(prefix.length(), limit);
+ } else {
+ return null;
+ }
+ }
+
+ // UsedReflectively, not private to avoid unused-warning
+ static List<TestCase> readTestCases() throws Exception {
+ List<TestCase> tests = new ArrayList<>();
+ Map<String, Integer> uniqueTests = new HashMap<>();
+ TestCase test = new TestCase();
+ String filename = "data/localeMatcherTest.txt";
+ try (BufferedReader in = FileUtilities.openFile(LocaleMatcherTest.class, filename)) {
+ String line;
+ while ((line = in.readLine()) != null) {
+ ++test.lineNr;
+ // Start of comment, or end of line, minus trailing spaces.
+ int limit = line.indexOf('#');
+ if (limit < 0) {
+ limit = line.length();
+ }
+ char c;
+ while (limit > 0 && ((c = line.charAt(limit - 1)) == ' ' || c == '\t')) {
+ --limit;
+ }
+ if (limit == 0) { // empty line
+ continue;
+ }
+ String suffix;
+ if (line.startsWith("** test: ")) {
+ test.reset(line);
+ } else if ((suffix = getSuffixAfterPrefix(line, limit, "@supported=")) != null) {
+ test.supportedLine = line;
+ test.supported = suffix;
+ } else if ((suffix = getSuffixAfterPrefix(line, limit, "@default=")) != null) {
+ test.defaultLine = line;
+ test.def = suffix;
+ } else if ((suffix = getSuffixAfterPrefix(line, limit, "@favor=")) != null) {
+ test.distanceLine = line;
+ test.favor = suffix;
+ } else if ((suffix = getSuffixAfterPrefix(line, limit, "@threshold=")) != null) {
+ test.thresholdLine = line;
+ test.threshold = suffix;
+ } else {
+ int matchSep = line.indexOf(">>");
+ // >> before an inline comment, and followed by more than white space.
+ if (0 <= matchSep && (matchSep + 2) < limit) {
+ test.matchLine = line;
+ test.desired = line.substring(0, matchSep).trim();
+ test.expDesired = test.expCombined = "";
+ int start = matchSep + 2;
+ int expLimit = line.indexOf('|', start);
+ if (expLimit < 0) {
+ test.expMatch = line.substring(start, limit).trim();
+ } else {
+ test.expMatch = line.substring(start, expLimit).trim();
+ start = expLimit + 1;
+ expLimit = line.indexOf('|', start);
+ if (expLimit < 0) {
+ test.expDesired = line.substring(start, limit).trim();
+ } else {
+ test.expDesired = line.substring(start, expLimit).trim();
+ test.expCombined = line.substring(expLimit + 1, limit).trim();
+ }
+ }
+ String inputs = test.toInputsKey();
+ Integer prevIndex = uniqueTests.get(inputs);
+ if (prevIndex == null) {
+ uniqueTests.put(inputs, tests.size());
+ } else {
+ System.out.println("Locale matcher test case on line " + test.lineNr
+ + " is a duplicate of line " + tests.get(prevIndex).lineNr);
+ }
+ tests.add(test.clone());
+ } else {
+ throw new IllegalArgumentException("test data syntax error on line "
+ + test.lineNr + "\n" + line);
+ }
+ }
+ }
+ }
+ System.out.println("Number of duplicate locale matcher test cases: " + (tests.size() - uniqueTests.size()));
+ return tests;
+ }
- ULocale best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired);
- assertEquals(ULocale.ITALIAN, best);
- assertEquals(null, outputBestDesired.value);
+ private static ULocale getULocaleOrNull(String s) {
+ if (s.equals("null")) {
+ return null;
+ } else {
+ return new ULocale(s);
+ }
+ }
- matcher.setDefaultLanguage(ULocale.JAPANESE);
- best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired);
- assertEquals(ULocale.JAPANESE, best);
+ @Test
+ @Parameters(method = "readTestCases")
+ public void dataDriven(TestCase test) {
+ LocaleMatcher matcher;
+ if (test.def.isEmpty() && test.favor.isEmpty() && test.threshold.isEmpty()) {
+ matcher = new LocaleMatcher(test.supported);
+ } else {
+ LocaleMatcher.Builder builder = LocaleMatcher.builder();
+ builder.setSupportedLocales(test.supported);
+ if (!test.def.isEmpty()) {
+ builder.setDefaultULocale(new ULocale(test.def));
+ }
+ if (!test.favor.isEmpty()) {
+ FavorSubtag favor;
+ switch (test.favor) {
+ case "normal":
+ favor = FavorSubtag.LANGUAGE;
+ break;
+ case "script":
+ favor = FavorSubtag.SCRIPT;
+ break;
+ default:
+ throw new IllegalArgumentException("unsupported FavorSubtag value " + test.favor);
+ }
+ builder.setFavorSubtag(favor);
+ }
+ if (!test.threshold.isEmpty()) {
+ int threshold = Integer.valueOf(test.threshold);
+ builder.internalSetThresholdDistance(threshold);
+ }
+ matcher = builder.build();
+ }
- matcher.setFavorScript(true);
- best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, bulgarian)), outputBestDesired);
- assertEquals(russian, best);
+ ULocale expMatch = getULocaleOrNull(test.expMatch);
+ if (test.expDesired.isEmpty() && test.expCombined.isEmpty()) {
+ ULocale bestSupported = matcher.getBestMatch(test.desired);
+ assertEquals("bestSupported", expMatch, bestSupported);
+ } else {
+ LocalePriorityList desired = LocalePriorityList.add(test.desired).build();
+ LocaleMatcher.Result result = matcher.getBestMatchResult(desired);
+ assertEquals("bestSupported", expMatch, result.getSupportedULocale());
+ if (!test.expDesired.isEmpty()) {
+ ULocale expDesired = getULocaleOrNull(test.expDesired);
+ assertEquals("bestDesired", expDesired, result.getDesiredULocale());
+ }
+ if (!test.expCombined.isEmpty()) {
+ ULocale expCombined = getULocaleOrNull(test.expCombined);
+ ULocale combined = result.makeServiceULocale();
+ assertEquals("combined", expCombined, combined);
+ }
+ }
}
}
+++ /dev/null
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.dev.test.util;
-
-import java.io.BufferedReader;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeSet;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-
-import com.ibm.icu.dev.test.TestFmwk;
-import com.ibm.icu.impl.locale.LocaleDistance;
-import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
-import com.ibm.icu.impl.locale.XLikelySubtags;
-import com.ibm.icu.impl.locale.XLocaleMatcher;
-import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
-import com.ibm.icu.util.LocaleMatcher;
-import com.ibm.icu.util.LocalePriorityList;
-import com.ibm.icu.util.ULocale;
-
-import junitparams.JUnitParamsRunner;
-import junitparams.Parameters;
-
-/**
- * Test the XLocaleMatcher.
- *
- * @author markdavis
- */
-@RunWith(JUnitParamsRunner.class)
-public class XLocaleMatcherTest extends TestFmwk {
- private static final int REGION_DISTANCE = 4;
-
- private static final LocaleDistance LANGUAGE_MATCHER_DATA = LocaleDistance.INSTANCE;
-
- private XLocaleMatcher newXLocaleMatcher() {
- return new XLocaleMatcher("");
- }
-
- private XLocaleMatcher newXLocaleMatcher(LocalePriorityList build) {
- return new XLocaleMatcher(build);
- }
-
- private XLocaleMatcher newXLocaleMatcher(String string) {
- return new XLocaleMatcher(LocalePriorityList.add(string).build());
- }
-
- @SuppressWarnings("unused")
- private XLocaleMatcher newXLocaleMatcher(LocalePriorityList list, int d) {
- return XLocaleMatcher.builder().setSupportedULocales(list.getULocales()).
- internalSetThresholdDistance(d).build();
- }
-
- // public void testParentLocales() {
- // // find all the regions that have a closer relation because of an explicit parent
- // Set<String> explicitParents = new HashSet<>(INFO.getExplicitParents());
- // explicitParents.remove("root");
- // Set<String> otherParents = new HashSet<>(INFO.getExplicitParents());
- // for (String locale : explicitParents) {
- // while (true) {
- // locale = LocaleIDParser.getParent(locale);
- // if (locale == null || locale.equals("root")) {
- // break;
- // }
- // otherParents.add(locale);
- // }
- // }
- // otherParents.remove("root");
- //
- // for (String locale : CONFIG.getCldrFactory().getAvailable()) {
- // String parentId = LocaleIDParser.getParent(locale);
- // String parentIdSimple = LocaleIDParser.getSimpleParent(locale);
- // if (!explicitParents.contains(parentId) && !otherParents.contains(parentIdSimple)) {
- // continue;
- // }
- // System.out.println(locale + "\t" + CONFIG.getEnglish().getName(locale) + "\t" + parentId + "\t" + parentIdSimple);
- // }
- // }
-
-
-// TBD reenable with override data
-// public void testOverrideData() {
-// double threshold = 0.05;
-// XLocaleDistance XLocaleMatcherData = new XLocaleDistance()
-// .addDistance("br", "fr", 10, true)
-// .addDistance("es", "cy", 10, true);
-// logln(XLocaleMatcherData.toString());
-//
-// final XLocaleMatcher matcher = newXLocaleMatcher(
-// LocalePriorityList
-// .add(ULocale.ENGLISH)
-// .add(ULocale.FRENCH)
-// .add(ULocale.UK)
-// .build(), XLocaleMatcherData, threshold);
-// logln(matcher.toString());
-//
-// assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br")));
-// assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one
-// // way
-// }
-
-
- /**
- * If all the base languages are the same, then each sublocale matches
- * itself most closely
- */
- @Test
- public void testExactMatches() {
- String lastBase = "";
- TreeSet<ULocale> sorted = new TreeSet<>();
- for (ULocale loc : ULocale.getAvailableLocales()) {
- String language = loc.getLanguage();
- if (!lastBase.equals(language)) {
- check(sorted);
- sorted.clear();
- lastBase = language;
- }
- sorted.add(loc);
- }
- check(sorted);
- }
-
- private void check(Set<ULocale> sorted) {
- if (sorted.isEmpty()) {
- return;
- }
- check2(sorted);
- ULocale first = sorted.iterator().next();
- ULocale max = ULocale.addLikelySubtags(first);
- sorted.add(max);
- check2(sorted);
- }
-
- private static final ULocale posix = new ULocale("en_US_POSIX");
-
- /**
- * @param sorted
- */
- private void check2(Set<ULocale> sorted) {
- logln("Checking: " + sorted);
- XLocaleMatcher matcher = newXLocaleMatcher(
- LocalePriorityList.add(
- sorted.toArray(new ULocale[sorted.size()]))
- .build());
- for (ULocale loc : sorted) {
- // The result may not be the exact same locale, but it must be equivalent.
- // Variants and extensions are ignored.
- if (loc.equals(posix)) { continue; }
- ULocale max = ULocale.addLikelySubtags(loc);
- ULocale best = matcher.getBestMatch(loc);
- ULocale maxBest = ULocale.addLikelySubtags(best);
- assertEquals(loc.toString(), max, maxBest);
- }
- }
-
- @Test
- public void testDemotion() {
- LocalePriorityList supported = LocalePriorityList.add("fr, de-CH, it").build();
- LocalePriorityList desired = LocalePriorityList.add("fr-CH, de-CH, it").build();
- XLocaleMatcher noDemotion = XLocaleMatcher.builder().
- setSupportedULocales(supported.getULocales()).
- setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.NONE).build();
- assertEquals("no demotion", new ULocale("de-CH"), noDemotion.getBestMatch(desired));
-
- XLocaleMatcher regionDemotion = XLocaleMatcher.builder().
- setSupportedULocales(supported.getULocales()).
- setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.REGION).build();
- assertEquals("region demotion", ULocale.FRENCH, regionDemotion.getBestMatch(desired));
- }
-
- private static final class PerfCase {
- ULocale desired;
- ULocale expectedShort;
- ULocale expectedLong;
- ULocale expectedVeryLong;
-
- PerfCase(String des, String expShort, String expLong, String expVeryLong) {
- desired = new ULocale(des);
- expectedShort = new ULocale(expShort);
- expectedLong = new ULocale(expLong);
- expectedVeryLong = new ULocale(expVeryLong);
- }
- }
-
- private static final int WARM_UP_ITERATIONS = 1000;
- private static final int BENCHMARK_ITERATIONS = 20000;
- private static final int AVG_PCT_MEDIUM_NEW_OLD = 33;
- private static final int AVG_PCT_LONG_NEW_OLD = 80;
-
- @Test
- public void testPerf() {
- if (LANGUAGE_MATCHER_DATA == null) {
- return; // skip except when testing data
- }
-
- final String shortList = "en, sv";
- final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
- "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
- "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
- "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
- "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
- "zh-CN, zh-TW, zu";
- final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, " +
- "ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, " +
- "ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, " +
- "ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, " +
- "ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, " +
- "bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, " +
- "bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, " +
- "bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, " +
- "ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, " +
- "cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, " +
- "de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, " +
- // removed en_001 to avoid exact match
- "ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_150, " +
- "en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, " +
- "en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, " +
- "en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, " +
- "en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, " +
- "en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, " +
- "en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, " +
- "en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, " +
- "en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, " +
- "en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, " +
- "en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, " +
- "es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, " +
- "es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, " +
- "et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, " +
- "ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, " +
- "fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, " +
- "fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, " +
- "fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, " +
- "fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, " +
- "fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, " +
- "gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, " +
- "he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, " +
- "id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, " +
- "jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, " +
- "kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, " +
- "kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, " +
- "ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, " +
- "lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, " +
- "lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, " +
- "lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, " +
- "mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, " +
- "ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, " +
- "nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, " +
- "nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, " +
- "nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, " +
- "pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, " +
- "pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, " +
- "rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, " +
- "ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, " +
- "sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, " +
- "shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, " +
- "smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, " +
- "sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, " +
- "sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, " +
- "sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, " +
- "teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, " +
- "tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, " +
- "uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, " +
- "vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, " +
- "wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, " +
- "zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, " +
- "zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
-
- final XLocaleMatcher matcherShort = newXLocaleMatcher(shortList);
- final XLocaleMatcher matcherLong = newXLocaleMatcher(longList);
- final XLocaleMatcher matcherVeryLong = newXLocaleMatcher(veryLongList);
-
- final LocaleMatcher matcherShortOld = new LocaleMatcher(shortList);
- final LocaleMatcher matcherLongOld = new LocaleMatcher(longList);
- final LocaleMatcher matcherVeryLongOld = new LocaleMatcher(veryLongList);
-
- long timeShortNew=0;
- long timeMediumNew=0;
- long timeLongNew=0;
-
- long timeShortOld=0;
- long timeMediumOld=0;
- long timeLongOld=0;
-
- PerfCase[] pcs = new PerfCase[] {
- // Exact match in all matchers.
- new PerfCase("sv", "sv", "sv", "sv"),
- // Common locale, exact match only in very long list.
- new PerfCase("fr_CA", "en", "fr", "fr_CA"),
- // Unusual locale, no exact match.
- new PerfCase("de_CA", "en", "de", "de"),
- // World English maps to several region partitions.
- new PerfCase("en_001", "en", "en", "en"),
- // Ancient language with interesting subtags.
- new PerfCase("egy_Copt_CY", "en", "af", "af")
- };
-
- for (PerfCase pc : pcs) {
- final ULocale desired = pc.desired;
-
- assertEquals(desired.toString(), pc.expectedShort, matcherShort.getBestMatch(desired));
- assertEquals(desired.toString(), pc.expectedLong, matcherLong.getBestMatch(desired));
- assertEquals(desired.toString(), pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired));
-
- timeXLocaleMatcher(desired, matcherShort, WARM_UP_ITERATIONS);
- timeXLocaleMatcher(desired, matcherLong, WARM_UP_ITERATIONS);
- timeXLocaleMatcher(desired, matcherVeryLong, WARM_UP_ITERATIONS);
- long tns = timeXLocaleMatcher(desired, matcherShort, BENCHMARK_ITERATIONS);
- System.out.format("New Duration (few supported):\t%s\t%d\tnanos\n", desired, tns);
- timeShortNew += tns;
- long tnl = timeXLocaleMatcher(desired, matcherLong, BENCHMARK_ITERATIONS);
- System.out.format("New Duration (med. supported):\t%s\t%d\tnanos\n", desired, tnl);
- timeMediumNew += tnl;
- long tnv = timeXLocaleMatcher(desired, matcherVeryLong, BENCHMARK_ITERATIONS);
- System.out.format("New Duration (many supported):\t%s\t%d\tnanos\n", desired, tnv);
- timeLongNew += tnv;
-
- timeLocaleMatcher(desired, matcherShortOld, WARM_UP_ITERATIONS);
- timeLocaleMatcher(desired, matcherLongOld, WARM_UP_ITERATIONS);
- timeLocaleMatcher(desired, matcherVeryLongOld, WARM_UP_ITERATIONS);
- long tos = timeLocaleMatcher(desired, matcherShortOld, BENCHMARK_ITERATIONS);
- System.out.format("Old Duration (few supported):\t%s\t%d\tnanos new/old=%d%%\n",
- desired, tos, (100 * tns) / tos);
- timeShortOld += tos;
- long tol = timeLocaleMatcher(desired, matcherLongOld, BENCHMARK_ITERATIONS);
- System.out.format("Old Duration (med. supported):\t%s\t%d\tnanos new/old=%d%%\n",
- desired, tol, (100 * tnl) / tol);
- timeMediumOld += tol;
- long tov = timeLocaleMatcher(desired, matcherVeryLongOld, BENCHMARK_ITERATIONS);
- System.out.format("Old Duration (many supported):\t%s\t%d\tnanos new/old=%d%%\n",
- desired, tov, (100 * tnv) / tov);
- timeLongOld += tov;
- }
-
- assertTrue(
- String.format("timeShortNew=%d < %d%% of timeShortOld=%d",
- timeShortNew, AVG_PCT_MEDIUM_NEW_OLD, timeShortOld),
- timeShortNew * 100 < timeShortOld * AVG_PCT_MEDIUM_NEW_OLD);
- assertTrue(
- String.format("timeMediumNew=%d < %d%% of timeMediumOld=%d",
- timeMediumNew, AVG_PCT_MEDIUM_NEW_OLD, timeMediumOld),
- timeMediumNew * 100 < timeMediumOld * AVG_PCT_MEDIUM_NEW_OLD);
- assertTrue(
- String.format("timeLongNew=%d < %d%% of timeLongOld=%d",
- timeLongNew, AVG_PCT_LONG_NEW_OLD, timeLongOld),
- timeLongNew * 100 < timeLongOld * AVG_PCT_LONG_NEW_OLD);
-
- maximizePerf();
- }
-
- private static long timeXLocaleMatcher(ULocale desired, XLocaleMatcher matcher, int iterations) {
- long start = System.nanoTime();
- for (int i = iterations; i > 0; --i) {
- matcher.getBestMatch(desired);
- }
- long delta = System.nanoTime() - start;
- return (delta / iterations);
- }
-
- private static long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) {
- long start = System.nanoTime();
- for (int i = iterations; i > 0; --i) {
- matcher.getBestMatch(desired);
- }
- long delta = System.nanoTime() - start;
- return (delta / iterations);
- }
-
- private void maximizePerf() {
- final String tags = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
- "el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
- "hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
- "mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
- "si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
- "zh-CN, zh-TW, zu";
- LocalePriorityList list = LocalePriorityList.add(tags).build();
- int few = 1000;
- long t = timeMaximize(list, few); // warm up
- t = timeMaximize(list, few); // measure for scale
- long targetTime = 100000000L; // 10^8 ns = 0.1s
- int iterations = (int)((targetTime * few) / t);
- t = timeMaximize(list, iterations);
- int length = 0;
- for (@SuppressWarnings("unused") ULocale locale : list) { ++length; }
- System.out.println("maximize: " + (t / iterations / length) + " ns/locale: " +
- t + " ns / " + iterations + " iterations / " + length + " locales");
- }
-
- // returns total ns not per iteration
- private static long timeMaximize(Iterable<ULocale> list, int iterations) {
- long start = System.nanoTime();
- for (int i = iterations; i > 0; --i) {
- for (ULocale locale : list) {
- XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
- }
- }
- return System.nanoTime() - start;
- }
-
- private static final class TestCase implements Cloneable {
- private static final String ENDL = System.getProperties().getProperty("line.separator");
-
- int lineNr = 0;
-
- String nameLine = "";
- String supportedLine = "";
- String defaultLine = "";
- String distanceLine = "";
- String thresholdLine = "";
- String matchLine = "";
-
- String supported = "";
- String def = "";
- String favor = "";
- String threshold = "";
- String desired = "";
- String expMatch = "";
- String expDesired = "";
- String expCombined = "";
-
- @Override
- public TestCase clone() throws CloneNotSupportedException {
- return (TestCase) super.clone();
- }
-
- void reset(String newNameLine) {
- nameLine = newNameLine;
- supportedLine = "";
- defaultLine = "";
- distanceLine = "";
- thresholdLine = "";
-
- supported = "";
- def = "";
- favor = "";
- threshold = "";
- }
-
- String toInputsKey() {
- return supported + '+' + def + '+' + favor + '+' + threshold + '+' + desired;
- }
-
- private static void appendLine(StringBuilder sb, String line) {
- if (!line.isEmpty()) {
- sb.append(ENDL).append(line);
- }
- }
-
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder(nameLine);
- appendLine(sb, supportedLine);
- appendLine(sb, defaultLine);
- appendLine(sb, distanceLine);
- appendLine(sb, thresholdLine);
- sb.append(ENDL).append("line ").append(lineNr).append(':');
- appendLine(sb, matchLine);
- return sb.toString();
- }
- }
-
- private static String getSuffixAfterPrefix(String s, int limit, String prefix) {
- if (prefix.length() <= limit && s.startsWith(prefix)) {
- return s.substring(prefix.length(), limit);
- } else {
- return null;
- }
- }
-
- // UsedReflectively, not private to avoid unused-warning
- static List<TestCase> readTestCases() throws Exception {
- List<TestCase> tests = new ArrayList<>();
- Map<String, Integer> uniqueTests = new HashMap<>();
- TestCase test = new TestCase();
- String filename = "data/localeMatcherTest.txt";
- try (BufferedReader in = FileUtilities.openFile(XLocaleMatcherTest.class, filename)) {
- String line;
- while ((line = in.readLine()) != null) {
- ++test.lineNr;
- // Start of comment, or end of line, minus trailing spaces.
- int limit = line.indexOf('#');
- if (limit < 0) {
- limit = line.length();
- }
- char c;
- while (limit > 0 && ((c = line.charAt(limit - 1)) == ' ' || c == '\t')) {
- --limit;
- }
- if (limit == 0) { // empty line
- continue;
- }
- String suffix;
- if (line.startsWith("** test: ")) {
- test.reset(line);
- } else if ((suffix = getSuffixAfterPrefix(line, limit, "@supported=")) != null) {
- test.supportedLine = line;
- test.supported = suffix;
- } else if ((suffix = getSuffixAfterPrefix(line, limit, "@default=")) != null) {
- test.defaultLine = line;
- test.def = suffix;
- } else if ((suffix = getSuffixAfterPrefix(line, limit, "@favor=")) != null) {
- test.distanceLine = line;
- test.favor = suffix;
- } else if ((suffix = getSuffixAfterPrefix(line, limit, "@threshold=")) != null) {
- test.thresholdLine = line;
- test.threshold = suffix;
- } else {
- int matchSep = line.indexOf(">>");
- // >> before an inline comment, and followed by more than white space.
- if (0 <= matchSep && (matchSep + 2) < limit) {
- test.matchLine = line;
- test.desired = line.substring(0, matchSep).trim();
- test.expDesired = test.expCombined = "";
- int start = matchSep + 2;
- int expLimit = line.indexOf('|', start);
- if (expLimit < 0) {
- test.expMatch = line.substring(start, limit).trim();
- } else {
- test.expMatch = line.substring(start, expLimit).trim();
- start = expLimit + 1;
- expLimit = line.indexOf('|', start);
- if (expLimit < 0) {
- test.expDesired = line.substring(start, limit).trim();
- } else {
- test.expDesired = line.substring(start, expLimit).trim();
- test.expCombined = line.substring(expLimit + 1, limit).trim();
- }
- }
- String inputs = test.toInputsKey();
- Integer prevIndex = uniqueTests.get(inputs);
- if (prevIndex == null) {
- uniqueTests.put(inputs, tests.size());
- } else {
- System.out.println("Locale matcher test case on line " + test.lineNr
- + " is a duplicate of line " + tests.get(prevIndex).lineNr);
- }
- tests.add(test.clone());
- } else {
- throw new IllegalArgumentException("test data syntax error on line "
- + test.lineNr + "\n" + line);
- }
- }
- }
- }
- System.out.println("Number of duplicate locale matcher test cases: " + (tests.size() - uniqueTests.size()));
- return tests;
- }
-
- private static ULocale getULocaleOrNull(String s) {
- if (s.equals("null")) {
- return null;
- } else {
- return new ULocale(s);
- }
- }
-
- @Test
- @Parameters(method = "readTestCases")
- public void dataDriven(TestCase test) {
- XLocaleMatcher matcher;
- if (test.def.isEmpty() && test.favor.isEmpty() && test.threshold.isEmpty()) {
- matcher = new XLocaleMatcher(test.supported);
- } else {
- XLocaleMatcher.Builder builder = XLocaleMatcher.builder();
- builder.setSupportedLocales(test.supported);
- if (!test.def.isEmpty()) {
- builder.setDefaultULocale(new ULocale(test.def));
- }
- if (!test.favor.isEmpty()) {
- FavorSubtag favor;
- switch (test.favor) {
- case "normal":
- favor = FavorSubtag.LANGUAGE;
- break;
- case "script":
- favor = FavorSubtag.SCRIPT;
- break;
- default:
- throw new IllegalArgumentException("unsupported FavorSubtag value " + test.favor);
- }
- builder.setFavorSubtag(favor);
- }
- if (!test.threshold.isEmpty()) {
- int threshold = Integer.valueOf(test.threshold);
- builder.internalSetThresholdDistance(threshold);
- }
- matcher = builder.build();
- }
-
- ULocale expMatch = getULocaleOrNull(test.expMatch);
- if (test.expDesired.isEmpty() && test.expCombined.isEmpty()) {
- ULocale bestSupported = matcher.getBestMatch(test.desired);
- assertEquals("bestSupported", expMatch, bestSupported);
- } else {
- LocalePriorityList desired = LocalePriorityList.add(test.desired).build();
- XLocaleMatcher.Result result = matcher.getBestMatchResult(desired);
- assertEquals("bestSupported", expMatch, result.getSupportedULocale());
- if (!test.expDesired.isEmpty()) {
- ULocale expDesired = getULocaleOrNull(test.expDesired);
- assertEquals("bestDesired", expDesired, result.getDesiredULocale());
- }
- if (!test.expCombined.isEmpty()) {
- ULocale expCombined = getULocaleOrNull(test.expCombined);
- ULocale combined = result.makeServiceULocale();
- assertEquals("combined", expCombined, combined);
- }
- }
- }
-}
<classpathentry kind="src" path="src"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-core"/>
- <classpathentry combineaccessrules="false" kind="src" path="/icu4j-translit-tests"/>
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-translit"/>
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-collate"/>
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-test-framework"/>
- <classpathentry combineaccessrules="false" kind="src" path="/icu4j-core-tests"/>
<classpathentry kind="output" path="out/bin"/>
</classpath>
<name>icu4j-tools</name>
<comment></comment>
<projects>
- <project>icu4j-core</project>
- <project>icu4j-core-tests</project>
- <project>icu4j-shared</project>
- <project>icu4j-test-framework</project>
</projects>
<buildSpec>
<buildCommand>
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.locale;
+package com.ibm.icu.dev.tool.locale;
import java.nio.ByteBuffer;
import java.util.Collection;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.UResource;
+import com.ibm.icu.impl.locale.LSR;
import com.ibm.icu.impl.locale.XCldrStub.HashMultimap;
import com.ibm.icu.impl.locale.XCldrStub.Multimap;
import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
-import com.ibm.icu.util.BytesTrie;
+import com.ibm.icu.impl.locale.XLikelySubtags;
import com.ibm.icu.util.BytesTrieBuilder;
import com.ibm.icu.util.ICUException;
* Builds data for XLikelySubtags.
* Reads source data from ICU resource bundles.
*/
-class LikelySubtagsBuilder {
+public class LikelySubtagsBuilder {
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
private static ICUResourceBundle getSupplementalDataBundle(String name) {
UResource.Key key = new UResource.Key();
for (int i = 0; aliases.getKeyAndValue(i, key, value); ++i) {
String aliasFrom = key.toString();
- if (aliasFrom.contains("_")) {
+ if (aliasFrom.contains("_") || aliasFrom.contains("-")) {
continue; // only simple aliasing
}
UResource.Table table = value.getTable();
}
}
- BytesTrie build() {
+ byte[] build() {
ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
// Allocate an array with just the necessary capacity,
// so that we do not hold on to a larger array for a long time.
if (DEBUG_OUTPUT) {
System.out.println("likely subtags trie size: " + bytes.length + " bytes");
}
- return new BytesTrie(bytes, 0);
+ return bytes;
}
}
- static XLikelySubtags.Data build() {
+ // VisibleForTesting
+ public static XLikelySubtags.Data build() {
AliasesBuilder languageAliasesBuilder = new AliasesBuilder("language");
AliasesBuilder regionAliasesBuilder = new AliasesBuilder("territory");
}
}
}
- BytesTrie trie = trieBuilder.build();
+ byte[] trie = trieBuilder.build();
LSR[] lsrs = lsrIndexes.keySet().toArray(new LSR[lsrIndexes.size()]);
return new XLikelySubtags.Data(
languageAliasesBuilder.toCanonical, regionAliasesBuilder.toCanonical, trie, lsrs);
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.locale;
-
+package com.ibm.icu.dev.tool.locale;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.UResource;
+import com.ibm.icu.impl.locale.LSR;
+import com.ibm.icu.impl.locale.LocaleDistance;
import com.ibm.icu.impl.locale.XCldrStub.Multimap;
import com.ibm.icu.impl.locale.XCldrStub.Predicate;
import com.ibm.icu.impl.locale.XCldrStub.Splitter;
import com.ibm.icu.impl.locale.XCldrStub.TreeMultimap;
-import com.ibm.icu.util.BytesTrie;
+import com.ibm.icu.impl.locale.XLikelySubtags;
import com.ibm.icu.util.BytesTrieBuilder;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
}
}
- BytesTrie build() {
+ byte[] build() {
ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
// Allocate an array with just the necessary capacity,
// so that we do not hold on to a larger array for a long time.
if (DEBUG_OUTPUT) {
System.out.println("distance trie size: " + bytes.length + " bytes");
}
- return new BytesTrie(bytes, 0);
+ return bytes;
}
}
return result;
}
- static LocaleDistance build() {
+ // VisibleForTesting
+ public static LocaleDistance.Data build() {
// From CLDR supplementalData/languageMatching/languageMatches type="written_new"/
// and then paradigmLocales, matchVariable, and the last languageMatch items.
ICUResourceBundle supplementalData = getSupplementalDataBundle("supplementalData");
TrieBuilder trieBuilder = new TrieBuilder();
defaultDistanceTable.toTrie(trieBuilder);
- BytesTrie trie = trieBuilder.build();
- return new LocaleDistance(
+ byte[] trie = trieBuilder.build();
+ return new LocaleDistance.Data(
trie, rmb.regionToPartitionsIndex, rmb.partitionArrays,
paradigmLSRs, distances);
}
}
}
}
+
+ private static final String TXT_PATH = "/tmp";
+ private static final String TXT_FILE_BASE_NAME = "langInfo";
+ private static final String TXT_FILE_NAME = TXT_FILE_BASE_NAME + ".txt";
+
+ private static PrintWriter openWriter() throws IOException {
+ File file = new File(TXT_PATH, TXT_FILE_NAME);
+ return new PrintWriter(
+ new BufferedWriter(
+ new OutputStreamWriter(
+ new FileOutputStream(file), StandardCharsets.UTF_8), 4096));
+ }
+
+ private static void printManyHexBytes(PrintWriter out, byte[] bytes) {
+ for (int i = 0;; ++i) {
+ if (i == bytes.length) {
+ out.println();
+ break;
+ }
+ if (i != 0 && (i & 0xf) == 0) {
+ out.println();
+ }
+ out.format("%02x", bytes[i] & 0xff);
+ }
+ }
+
+ public static final void main(String[] args) throws IOException {
+ XLikelySubtags.Data likelyData = LikelySubtagsBuilder.build();
+ LocaleDistance.Data distanceData = build();
+ System.out.println("Writing LocaleDistance.Data to " + TXT_PATH + '/' + TXT_FILE_NAME);
+ try (PrintWriter out = openWriter()) {
+ out.println("// © 2019 and later: Unicode, Inc. and others.\n" +
+ "// License & terms of use: http://www.unicode.org/copyright.html#License\n" +
+ "// Generated by ICU4J LocaleDistanceBuilder.\n" +
+ TXT_FILE_BASE_NAME + ":table(nofallback){");
+ out.println(" likely{");
+ out.println(" languageAliases{ // " + likelyData.languageAliases.size());
+ for (Map.Entry<String, String> entry :
+ new TreeMap<>(likelyData.languageAliases).entrySet()) {
+ out.println(" \"" + entry.getKey() + "\",\"" + entry.getValue() + "\",");
+ }
+ out.println(" } // languageAliases");
+
+ out.println(" regionAliases{ // " + likelyData.regionAliases.size());
+ for (Map.Entry<String, String> entry :
+ new TreeMap<>(likelyData.regionAliases).entrySet()) {
+ out.println(" \"" + entry.getKey() + "\",\"" + entry.getValue() + "\",");
+ }
+ out.println(" } // regionAliases");
+
+ out.println(" trie:bin{ // BytesTrie: " + likelyData.trie.length + " bytes");
+ printManyHexBytes(out, likelyData.trie);
+ out.println(" } // trie");
+
+ out.println(" lsrs{ // " + likelyData.lsrs.length);
+ for (LSR lsr : likelyData.lsrs) {
+ out.println(" \"" + lsr.language + "\",\"" +
+ lsr.script + "\",\"" + lsr.region + "\",");
+ }
+ out.println(" } // lsrs");
+ out.println(" } // likely");
+
+ out.println(" match{");
+ out.println(" trie:bin{ // BytesTrie: " + distanceData.trie.length + " bytes");
+ printManyHexBytes(out, distanceData.trie);
+ out.println(" } // trie");
+
+ out.println(" regionToPartitions:bin{ // " +
+ distanceData.regionToPartitionsIndex.length + " bytes");
+ printManyHexBytes(out, distanceData.regionToPartitionsIndex);
+ out.println(" } // regionToPartitions");
+
+ out.print(" partitions{");
+ boolean first = true;
+ for (String p : distanceData.partitionArrays) {
+ if (first) {
+ first = false;
+ } else {
+ out.append(',');
+ }
+ out.append('"').print(p);
+ out.append('"');
+ }
+ out.println("}");
+
+ out.println(" paradigms{");
+ for (LSR lsr : distanceData.paradigmLSRs) {
+ out.println(" \"" + lsr.language + "\",\"" +
+ lsr.script + "\",\"" + lsr.region + "\",");
+ }
+ out.println(" }");
+
+ out.print(" distances:intvector{");
+ first = true;
+ for (int d : distanceData.distances) {
+ if (first) {
+ first = false;
+ } else {
+ out.append(',');
+ }
+ out.print(d);
+ }
+ out.println("}");
+
+ out.println(" } // match");
+ out.println("}");
+ }
+ }
}