- Added code to use the parentLocales data in supplementalData.xml to determine the "parent locale ID" to use when
the requested resource bundle is not present (ICU-21126).
- Added code to change the parent-chain search path to handle the script better (ICU-21125; algorithm was described
in CLDR-15265):
- The base search patch is now ll_Ssss_RR -> ll_RR -> ll_Ssss -> ll -> root
- If the requested script is not the default script for the requested language and region, we automatically
avoid fallbacks that will implicitly change the script.
- Added new code to the CLDR-to-ICU data generation tool to generate source code, and used it to generate the lookup
tables for the new resource-fallback logic (we can't use the existing resource files for this, since that would
involve opening a resource bundle while trying to open another resource bundle). The data-generation stuff is
intended to be generic enough to allow for us to generate more static data tables in the future.
- Commented out a few collator tests, and changed one resource bundle test, because they're incompatible with the
new fallback logic (specifically, the default-script logic).
--- /dev/null
+// © 2022 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// Internal static data tables used by uresbund.cpp
+// WARNING: This file is mechanically generated by the CLDR-to-ICU tool
+// (see tools/cldr/cldr-to-icu/src/main/java/org/unicode/tool/cldrtoicu/generator/ResourcFallbackCodeGenerator.java).
+// DO NOT HAND EDIT!!!
+
+#ifdef INCLUDED_FROM_URESBUND_CPP
+
+//======================================================================
+// Default script table
+const char scriptCodeChars[] =
+ "Aghb\0Ahom\0Arab\0Armi\0Armn\0Avst\0Bamu\0Bass\0Beng\0Brah\0Cakm\0"
+ "Cans\0Cari\0Cham\0Cher\0Chrs\0Copt\0Cprt\0Cyrl\0Deva\0Egyp\0Ethi\0"
+ "Geor\0Gong\0Gonm\0Goth\0Grek\0Gujr\0Guru\0Hans\0Hant\0Hebr\0Hluw\0"
+ "Hmnp\0Ital\0Jpan\0Kali\0Kana\0Kawi\0Khar\0Khmr\0Kits\0Knda\0Kore\0"
+ "Lana\0Laoo\0Lepc\0Lina\0Lisu\0Lyci\0Lydi\0Mand\0Mani\0Medf\0Merc\0"
+ "Mlym\0Mong\0Mroo\0Mymr\0Narb\0Nkoo\0Nshu\0Ogam\0Olck\0Orkh\0Orya\0"
+ "Osge\0Ougr\0Pauc\0Phli\0Phnx\0Plrd\0Prti\0Rohg\0Runr\0Samr\0Sarb\0"
+ "Saur\0Sgnw\0Sinh\0Sogd\0Sora\0Soyo\0Syrc\0Tale\0Talu\0Taml\0Tang\0"
+ "Tavt\0Telu\0Tfng\0Thaa\0Thai\0Tibt\0Tnsa\0Toto\0Ugar\0Vaii\0Wcho\0"
+ "Xpeo\0Xsux\0Yiii\0";
+
+const char dsLocaleIDChars[] =
+ "ab\0abq\0adp\0ady\0ae\0aeb\0aho\0akk\0alt\0am\0apc\0apd\0ar\0"
+ "arc\0arq\0ars\0ary\0arz\0as\0ase\0av\0avl\0awa\0az_IQ\0az_IR\0"
+ "az_RU\0ba\0bal\0bap\0bax\0bcq\0be\0bej\0bfq\0bft\0bfy\0bg\0bgc\0"
+ "bgn\0bgx\0bhb\0bhi\0bho\0bji\0bjj\0blt\0bn\0bo\0bpy\0bqi\0bra\0"
+ "brh\0brx\0bsq\0bst\0btv\0bua\0byn\0ccp\0ce\0chm\0chr\0cja\0cjm\0"
+ "ckb\0cmg\0cop\0cr\0crh\0crk\0crl\0csw\0ctd\0cu\0cv\0dar\0dcc\0"
+ "dgl\0dmf\0doi\0drh\0drs\0dty\0dv\0dz\0egy\0eky\0el\0esg\0ett\0"
+ "fa\0fia\0fub\0gan\0gbm\0gbz\0gez\0ggn\0gjk\0gju\0glk\0gmv\0gof\0"
+ "gom\0gon\0got\0grc\0grt\0gu\0gvr\0gwc\0gwt\0ha_CM\0ha_SD\0hak\0"
+ "haz\0hdy\0he\0hi\0hlu\0hmd\0hnd\0hne\0hnj\0hno\0hoc\0hoj\0hsn\0"
+ "hy\0ii\0inh\0iu\0iw\0ja\0ji\0jml\0ka\0kaa\0kaw\0kbd\0kby\0kdt\0"
+ "kfr\0kfy\0khb\0khn\0kht\0khw\0kjg\0kk\0kk_AF\0kk_CN\0kk_IR\0kk_MN\0"
+ "km\0kn\0ko\0koi\0kok\0kqy\0krc\0kru\0ks\0ktb\0ku_LB\0kum\0kv\0"
+ "kvx\0kxc\0kxl\0kxm\0kxp\0ky\0ky_CN\0kzh\0lab\0lad\0lah\0lbe\0"
+ "lcp\0lep\0lez\0lif\0lis\0lki\0lmn\0lo\0lrc\0luz\0lwl\0lzh\0mag\0"
+ "mai\0man_GN\0mde\0mdf\0mdx\0mfa\0mgp\0mk\0mki\0ml\0mn\0mn_CN\0"
+ "mni\0mnw\0mr\0mrd\0mrj\0mro\0ms_CC\0mtr\0mvy\0mwr\0mww\0my\0mym\0"
+ "myv\0myz\0mzn\0nan\0ne\0new\0nnp\0nod\0noe\0non\0nqo\0nsk\0nst\0"
+ "oj\0ojs\0or\0oru\0os\0osa\0ota\0otk\0oui\0pa\0pa_PK\0pal\0peo\0"
+ "phl\0phn\0pka\0pnt\0ppa\0pra\0prd\0ps\0raj\0rhg\0rif\0rjs\0rkt\0"
+ "rmt\0ru\0rue\0ryu\0sa\0sah\0sat\0saz\0sck\0scl\0sd\0sd_IN\0sdh\0"
+ "sga\0sgw\0shi\0shn\0shu\0si\0skr\0smp\0sog\0sou\0sr\0srb\0srx\0"
+ "swb\0swv\0syl\0syr\0ta\0taj\0tcy\0tdd\0tdg\0tdh\0te\0tg\0tg_PK\0"
+ "th\0thl\0thq\0thr\0ti\0tig\0tkt\0trw\0tsd\0tsf\0tsj\0tt\0tts\0"
+ "txg\0txo\0tyv\0udi\0udm\0ug\0ug_KZ\0ug_MN\0uga\0uk\0unr\0unr_NP\0"
+ "unx\0ur\0uz_AF\0uz_CN\0vai\0wal\0wbq\0wbr\0wni\0wsg\0wtm\0wuu\0"
+ "xco\0xcr\0xlc\0xld\0xmf\0xmn\0xmr\0xna\0xnr\0xpr\0xsa\0xsr\0yi\0"
+ "yue\0yue_CN\0zdj\0zgh\0zh\0zh_AU\0zh_BN\0zh_GB\0zh_GF\0zh_HK\0"
+ "zh_ID\0zh_MO\0zh_PA\0zh_PF\0zh_PH\0zh_SR\0zh_TH\0zh_TW\0zh_US\0"
+ "zh_VN\0zhx\0zkt\0";
+
+const int32_t defaultScriptTable[] = {
+ 0, 90, // ab -> Cyrl
+ 3, 90, // abq -> Cyrl
+ 7, 465, // adp -> Tibt
+ 11, 90, // ady -> Cyrl
+ 15, 25, // ae -> Avst
+ 18, 10, // aeb -> Arab
+ 22, 5, // aho -> Ahom
+ 26, 500, // akk -> Xsux
+ 30, 90, // alt -> Cyrl
+ 34, 105, // am -> Ethi
+ 37, 10, // apc -> Arab
+ 41, 10, // apd -> Arab
+ 45, 10, // ar -> Arab
+ 48, 15, // arc -> Armi
+ 52, 10, // arq -> Arab
+ 56, 10, // ars -> Arab
+ 60, 10, // ary -> Arab
+ 64, 10, // arz -> Arab
+ 68, 40, // as -> Beng
+ 71, 390, // ase -> Sgnw
+ 75, 90, // av -> Cyrl
+ 78, 10, // avl -> Arab
+ 82, 95, // awa -> Deva
+ 86, 10, // az_IQ -> Arab
+ 92, 10, // az_IR -> Arab
+ 98, 90, // az_RU -> Cyrl
+ 104, 90, // ba -> Cyrl
+ 107, 10, // bal -> Arab
+ 111, 95, // bap -> Deva
+ 115, 30, // bax -> Bamu
+ 119, 105, // bcq -> Ethi
+ 123, 90, // be -> Cyrl
+ 126, 10, // bej -> Arab
+ 130, 430, // bfq -> Taml
+ 134, 10, // bft -> Arab
+ 138, 95, // bfy -> Deva
+ 142, 90, // bg -> Cyrl
+ 145, 95, // bgc -> Deva
+ 149, 10, // bgn -> Arab
+ 153, 130, // bgx -> Grek
+ 157, 95, // bhb -> Deva
+ 161, 95, // bhi -> Deva
+ 165, 95, // bho -> Deva
+ 169, 105, // bji -> Ethi
+ 173, 95, // bjj -> Deva
+ 177, 440, // blt -> Tavt
+ 181, 40, // bn -> Beng
+ 184, 465, // bo -> Tibt
+ 187, 40, // bpy -> Beng
+ 191, 10, // bqi -> Arab
+ 195, 95, // bra -> Deva
+ 199, 10, // brh -> Arab
+ 203, 95, // brx -> Deva
+ 207, 35, // bsq -> Bass
+ 211, 105, // bst -> Ethi
+ 215, 95, // btv -> Deva
+ 219, 90, // bua -> Cyrl
+ 223, 105, // byn -> Ethi
+ 227, 50, // ccp -> Cakm
+ 231, 90, // ce -> Cyrl
+ 234, 90, // chm -> Cyrl
+ 238, 70, // chr -> Cher
+ 242, 10, // cja -> Arab
+ 246, 65, // cjm -> Cham
+ 250, 10, // ckb -> Arab
+ 254, 410, // cmg -> Soyo
+ 258, 80, // cop -> Copt
+ 262, 55, // cr -> Cans
+ 265, 90, // crh -> Cyrl
+ 269, 55, // crk -> Cans
+ 273, 55, // crl -> Cans
+ 277, 55, // csw -> Cans
+ 281, 340, // ctd -> Pauc
+ 285, 90, // cu -> Cyrl
+ 288, 90, // cv -> Cyrl
+ 291, 90, // dar -> Cyrl
+ 295, 10, // dcc -> Arab
+ 299, 10, // dgl -> Arab
+ 303, 265, // dmf -> Medf
+ 307, 95, // doi -> Deva
+ 311, 280, // drh -> Mong
+ 315, 105, // drs -> Ethi
+ 319, 95, // dty -> Deva
+ 323, 455, // dv -> Thaa
+ 326, 465, // dz -> Tibt
+ 329, 100, // egy -> Egyp
+ 333, 180, // eky -> Kali
+ 337, 130, // el -> Grek
+ 340, 120, // esg -> Gonm
+ 344, 170, // ett -> Ital
+ 348, 10, // fa -> Arab
+ 351, 10, // fia -> Arab
+ 355, 10, // fub -> Arab
+ 359, 145, // gan -> Hans
+ 363, 95, // gbm -> Deva
+ 367, 10, // gbz -> Arab
+ 371, 105, // gez -> Ethi
+ 375, 95, // ggn -> Deva
+ 379, 10, // gjk -> Arab
+ 383, 10, // gju -> Arab
+ 387, 10, // glk -> Arab
+ 391, 105, // gmv -> Ethi
+ 395, 105, // gof -> Ethi
+ 399, 95, // gom -> Deva
+ 403, 445, // gon -> Telu
+ 407, 125, // got -> Goth
+ 411, 85, // grc -> Cprt
+ 415, 40, // grt -> Beng
+ 419, 135, // gu -> Gujr
+ 422, 95, // gvr -> Deva
+ 426, 10, // gwc -> Arab
+ 430, 10, // gwt -> Arab
+ 434, 10, // ha_CM -> Arab
+ 440, 10, // ha_SD -> Arab
+ 446, 145, // hak -> Hans
+ 450, 10, // haz -> Arab
+ 454, 105, // hdy -> Ethi
+ 458, 155, // he -> Hebr
+ 461, 95, // hi -> Deva
+ 464, 160, // hlu -> Hluw
+ 468, 355, // hmd -> Plrd
+ 472, 10, // hnd -> Arab
+ 476, 95, // hne -> Deva
+ 480, 165, // hnj -> Hmnp
+ 484, 10, // hno -> Arab
+ 488, 95, // hoc -> Deva
+ 492, 95, // hoj -> Deva
+ 496, 145, // hsn -> Hans
+ 500, 20, // hy -> Armn
+ 503, 505, // ii -> Yiii
+ 506, 90, // inh -> Cyrl
+ 510, 55, // iu -> Cans
+ 513, 155, // iw -> Hebr
+ 516, 175, // ja -> Jpan
+ 519, 155, // ji -> Hebr
+ 522, 95, // jml -> Deva
+ 526, 110, // ka -> Geor
+ 529, 90, // kaa -> Cyrl
+ 533, 190, // kaw -> Kawi
+ 537, 90, // kbd -> Cyrl
+ 541, 10, // kby -> Arab
+ 545, 460, // kdt -> Thai
+ 549, 95, // kfr -> Deva
+ 553, 95, // kfy -> Deva
+ 557, 425, // khb -> Talu
+ 561, 95, // khn -> Deva
+ 565, 290, // kht -> Mymr
+ 569, 10, // khw -> Arab
+ 573, 225, // kjg -> Laoo
+ 577, 90, // kk -> Cyrl
+ 580, 10, // kk_AF -> Arab
+ 586, 10, // kk_CN -> Arab
+ 592, 10, // kk_IR -> Arab
+ 598, 10, // kk_MN -> Arab
+ 604, 200, // km -> Khmr
+ 607, 210, // kn -> Knda
+ 610, 215, // ko -> Kore
+ 613, 90, // koi -> Cyrl
+ 617, 95, // kok -> Deva
+ 621, 105, // kqy -> Ethi
+ 625, 90, // krc -> Cyrl
+ 629, 95, // kru -> Deva
+ 633, 10, // ks -> Arab
+ 636, 105, // ktb -> Ethi
+ 640, 10, // ku_LB -> Arab
+ 646, 90, // kum -> Cyrl
+ 650, 90, // kv -> Cyrl
+ 653, 10, // kvx -> Arab
+ 657, 105, // kxc -> Ethi
+ 661, 95, // kxl -> Deva
+ 665, 460, // kxm -> Thai
+ 669, 10, // kxp -> Arab
+ 673, 90, // ky -> Cyrl
+ 676, 10, // ky_CN -> Arab
+ 682, 10, // kzh -> Arab
+ 686, 235, // lab -> Lina
+ 690, 155, // lad -> Hebr
+ 694, 10, // lah -> Arab
+ 698, 90, // lbe -> Cyrl
+ 702, 460, // lcp -> Thai
+ 706, 230, // lep -> Lepc
+ 710, 90, // lez -> Cyrl
+ 714, 95, // lif -> Deva
+ 718, 240, // lis -> Lisu
+ 722, 10, // lki -> Arab
+ 726, 445, // lmn -> Telu
+ 730, 225, // lo -> Laoo
+ 733, 10, // lrc -> Arab
+ 737, 10, // luz -> Arab
+ 741, 460, // lwl -> Thai
+ 745, 145, // lzh -> Hans
+ 749, 95, // mag -> Deva
+ 753, 95, // mai -> Deva
+ 757, 300, // man_GN -> Nkoo
+ 764, 10, // mde -> Arab
+ 768, 90, // mdf -> Cyrl
+ 772, 105, // mdx -> Ethi
+ 776, 10, // mfa -> Arab
+ 780, 95, // mgp -> Deva
+ 784, 90, // mk -> Cyrl
+ 787, 10, // mki -> Arab
+ 791, 275, // ml -> Mlym
+ 794, 90, // mn -> Cyrl
+ 797, 280, // mn_CN -> Mong
+ 803, 40, // mni -> Beng
+ 807, 290, // mnw -> Mymr
+ 811, 95, // mr -> Deva
+ 814, 95, // mrd -> Deva
+ 818, 90, // mrj -> Cyrl
+ 822, 285, // mro -> Mroo
+ 826, 10, // ms_CC -> Arab
+ 832, 95, // mtr -> Deva
+ 836, 10, // mvy -> Arab
+ 840, 95, // mwr -> Deva
+ 844, 165, // mww -> Hmnp
+ 848, 290, // my -> Mymr
+ 851, 105, // mym -> Ethi
+ 855, 90, // myv -> Cyrl
+ 859, 255, // myz -> Mand
+ 863, 10, // mzn -> Arab
+ 867, 145, // nan -> Hans
+ 871, 95, // ne -> Deva
+ 874, 95, // new -> Deva
+ 878, 490, // nnp -> Wcho
+ 882, 220, // nod -> Lana
+ 886, 95, // noe -> Deva
+ 890, 370, // non -> Runr
+ 894, 300, // nqo -> Nkoo
+ 898, 55, // nsk -> Cans
+ 902, 470, // nst -> Tnsa
+ 906, 55, // oj -> Cans
+ 909, 55, // ojs -> Cans
+ 913, 325, // or -> Orya
+ 916, 10, // oru -> Arab
+ 920, 90, // os -> Cyrl
+ 923, 330, // osa -> Osge
+ 927, 10, // ota -> Arab
+ 931, 320, // otk -> Orkh
+ 935, 335, // oui -> Ougr
+ 939, 140, // pa -> Guru
+ 942, 10, // pa_PK -> Arab
+ 948, 345, // pal -> Phli
+ 952, 495, // peo -> Xpeo
+ 956, 10, // phl -> Arab
+ 960, 350, // phn -> Phnx
+ 964, 45, // pka -> Brah
+ 968, 130, // pnt -> Grek
+ 972, 95, // ppa -> Deva
+ 976, 195, // pra -> Khar
+ 980, 10, // prd -> Arab
+ 984, 10, // ps -> Arab
+ 987, 95, // raj -> Deva
+ 991, 365, // rhg -> Rohg
+ 995, 450, // rif -> Tfng
+ 999, 95, // rjs -> Deva
+ 1003, 40, // rkt -> Beng
+ 1007, 10, // rmt -> Arab
+ 1011, 90, // ru -> Cyrl
+ 1014, 90, // rue -> Cyrl
+ 1018, 185, // ryu -> Kana
+ 1022, 95, // sa -> Deva
+ 1025, 90, // sah -> Cyrl
+ 1029, 315, // sat -> Olck
+ 1033, 385, // saz -> Saur
+ 1037, 95, // sck -> Deva
+ 1041, 10, // scl -> Arab
+ 1045, 10, // sd -> Arab
+ 1048, 95, // sd_IN -> Deva
+ 1054, 10, // sdh -> Arab
+ 1058, 310, // sga -> Ogam
+ 1062, 105, // sgw -> Ethi
+ 1066, 450, // shi -> Tfng
+ 1070, 290, // shn -> Mymr
+ 1074, 10, // shu -> Arab
+ 1078, 395, // si -> Sinh
+ 1081, 10, // skr -> Arab
+ 1085, 375, // smp -> Samr
+ 1089, 400, // sog -> Sogd
+ 1093, 460, // sou -> Thai
+ 1097, 90, // sr -> Cyrl
+ 1100, 405, // srb -> Sora
+ 1104, 95, // srx -> Deva
+ 1108, 10, // swb -> Arab
+ 1112, 95, // swv -> Deva
+ 1116, 40, // syl -> Beng
+ 1120, 415, // syr -> Syrc
+ 1124, 430, // ta -> Taml
+ 1127, 95, // taj -> Deva
+ 1131, 210, // tcy -> Knda
+ 1135, 420, // tdd -> Tale
+ 1139, 95, // tdg -> Deva
+ 1143, 95, // tdh -> Deva
+ 1147, 445, // te -> Telu
+ 1150, 90, // tg -> Cyrl
+ 1153, 10, // tg_PK -> Arab
+ 1159, 460, // th -> Thai
+ 1162, 95, // thl -> Deva
+ 1166, 95, // thq -> Deva
+ 1170, 95, // thr -> Deva
+ 1174, 105, // ti -> Ethi
+ 1177, 105, // tig -> Ethi
+ 1181, 95, // tkt -> Deva
+ 1185, 10, // trw -> Arab
+ 1189, 130, // tsd -> Grek
+ 1193, 95, // tsf -> Deva
+ 1197, 465, // tsj -> Tibt
+ 1201, 90, // tt -> Cyrl
+ 1204, 460, // tts -> Thai
+ 1208, 435, // txg -> Tang
+ 1212, 475, // txo -> Toto
+ 1216, 90, // tyv -> Cyrl
+ 1220, 0, // udi -> Aghb
+ 1224, 90, // udm -> Cyrl
+ 1228, 10, // ug -> Arab
+ 1231, 90, // ug_KZ -> Cyrl
+ 1237, 90, // ug_MN -> Cyrl
+ 1243, 480, // uga -> Ugar
+ 1247, 90, // uk -> Cyrl
+ 1250, 40, // unr -> Beng
+ 1254, 95, // unr_NP -> Deva
+ 1261, 40, // unx -> Beng
+ 1265, 10, // ur -> Arab
+ 1268, 10, // uz_AF -> Arab
+ 1274, 90, // uz_CN -> Cyrl
+ 1280, 485, // vai -> Vaii
+ 1284, 105, // wal -> Ethi
+ 1288, 445, // wbq -> Telu
+ 1292, 95, // wbr -> Deva
+ 1296, 10, // wni -> Arab
+ 1300, 115, // wsg -> Gong
+ 1304, 95, // wtm -> Deva
+ 1308, 145, // wuu -> Hans
+ 1312, 75, // xco -> Chrs
+ 1316, 60, // xcr -> Cari
+ 1320, 245, // xlc -> Lyci
+ 1324, 250, // xld -> Lydi
+ 1328, 110, // xmf -> Geor
+ 1332, 260, // xmn -> Mani
+ 1336, 270, // xmr -> Merc
+ 1340, 295, // xna -> Narb
+ 1344, 95, // xnr -> Deva
+ 1348, 360, // xpr -> Prti
+ 1352, 380, // xsa -> Sarb
+ 1356, 95, // xsr -> Deva
+ 1360, 155, // yi -> Hebr
+ 1363, 150, // yue -> Hant
+ 1367, 145, // yue_CN -> Hans
+ 1374, 10, // zdj -> Arab
+ 1378, 450, // zgh -> Tfng
+ 1382, 145, // zh -> Hans
+ 1385, 150, // zh_AU -> Hant
+ 1391, 150, // zh_BN -> Hant
+ 1397, 150, // zh_GB -> Hant
+ 1403, 150, // zh_GF -> Hant
+ 1409, 150, // zh_HK -> Hant
+ 1415, 150, // zh_ID -> Hant
+ 1421, 150, // zh_MO -> Hant
+ 1427, 150, // zh_PA -> Hant
+ 1433, 150, // zh_PF -> Hant
+ 1439, 150, // zh_PH -> Hant
+ 1445, 150, // zh_SR -> Hant
+ 1451, 150, // zh_TH -> Hant
+ 1457, 150, // zh_TW -> Hant
+ 1463, 150, // zh_US -> Hant
+ 1469, 150, // zh_VN -> Hant
+ 1475, 305, // zhx -> Nshu
+ 1479, 205, // zkt -> Kits
+};
+
+//======================================================================
+// Parent locale table
+const char parentLocaleChars[] =
+ "az_Arab\0az_Cyrl\0bal_Latn\0blt_Latn\0bm_Nkoo\0bs_Cyrl\0byn_Latn\0"
+ "cu_Glag\0dje_Arab\0dyo_Arab\0en_001\0en_150\0en_AG\0en_AI\0en_AT\0"
+ "en_AU\0en_BB\0en_BE\0en_BM\0en_BS\0en_BW\0en_BZ\0en_CC\0en_CH\0"
+ "en_CK\0en_CM\0en_CX\0en_CY\0en_DE\0en_DG\0en_DK\0en_DM\0en_Dsrt\0"
+ "en_ER\0en_FI\0en_FJ\0en_FK\0en_FM\0en_GB\0en_GD\0en_GG\0en_GH\0"
+ "en_GI\0en_GM\0en_GY\0en_HK\0en_IE\0en_IL\0en_IM\0en_IN\0en_IO\0"
+ "en_JE\0en_JM\0en_KE\0en_KI\0en_KN\0en_KY\0en_LC\0en_LR\0en_LS\0"
+ "en_MG\0en_MO\0en_MS\0en_MT\0en_MU\0en_MV\0en_MW\0en_MY\0en_NA\0"
+ "en_NF\0en_NG\0en_NL\0en_NR\0en_NU\0en_NZ\0en_PG\0en_PK\0en_PN\0"
+ "en_PW\0en_RW\0en_SB\0en_SC\0en_SD\0en_SE\0en_SG\0en_SH\0en_SI\0"
+ "en_SL\0en_SS\0en_SX\0en_SZ\0en_Shaw\0en_TC\0en_TK\0en_TO\0en_TT\0"
+ "en_TV\0en_TZ\0en_UG\0en_VC\0en_VG\0en_VU\0en_WS\0en_ZA\0en_ZM\0"
+ "en_ZW\0es_419\0es_AR\0es_BO\0es_BR\0es_BZ\0es_CL\0es_CO\0es_CR\0"
+ "es_CU\0es_DO\0es_EC\0es_GT\0es_HN\0es_MX\0es_NI\0es_PA\0es_PE\0"
+ "es_PR\0es_PY\0es_SV\0es_US\0es_UY\0es_VE\0ff_Adlm\0ff_Arab\0fr_HT\0"
+ "ha_Arab\0hi_Latn\0ht\0iu_Latn\0kk_Arab\0ks_Deva\0ku_Arab\0ky_Arab\0"
+ "ky_Latn\0ml_Arab\0mn_Mong\0mni_Mtei\0ms_Arab\0nb\0nn\0no\0pa_Arab\0"
+ "pt_AO\0pt_CH\0pt_CV\0pt_FR\0pt_GQ\0pt_GW\0pt_LU\0pt_MO\0pt_MZ\0"
+ "pt_PT\0pt_ST\0pt_TL\0root\0sat_Deva\0sd_Deva\0sd_Khoj\0sd_Sind\0"
+ "shi_Latn\0so_Arab\0sr_Latn\0sw_Arab\0tg_Arab\0ug_Cyrl\0uz_Arab\0"
+ "uz_Cyrl\0vai_Latn\0wo_Arab\0yo_Arab\0yue_Hans\0zh_Hant\0zh_Hant_HK\0"
+ "zh_Hant_MO\0";
+
+const int32_t parentLocaleTable[] = {
+ 0, 1017, // az_Arab -> root
+ 8, 1017, // az_Cyrl -> root
+ 16, 1017, // bal_Latn -> root
+ 25, 1017, // blt_Latn -> root
+ 34, 1017, // bm_Nkoo -> root
+ 42, 1017, // bs_Cyrl -> root
+ 50, 1017, // byn_Latn -> root
+ 59, 1017, // cu_Glag -> root
+ 67, 1017, // dje_Arab -> root
+ 76, 1017, // dyo_Arab -> root
+ 92, 85, // en_150 -> en_001
+ 99, 85, // en_AG -> en_001
+ 105, 85, // en_AI -> en_001
+ 111, 92, // en_AT -> en_150
+ 117, 85, // en_AU -> en_001
+ 123, 85, // en_BB -> en_001
+ 129, 92, // en_BE -> en_150
+ 135, 85, // en_BM -> en_001
+ 141, 85, // en_BS -> en_001
+ 147, 85, // en_BW -> en_001
+ 153, 85, // en_BZ -> en_001
+ 159, 85, // en_CC -> en_001
+ 165, 92, // en_CH -> en_150
+ 171, 85, // en_CK -> en_001
+ 177, 85, // en_CM -> en_001
+ 183, 85, // en_CX -> en_001
+ 189, 85, // en_CY -> en_001
+ 195, 92, // en_DE -> en_150
+ 201, 85, // en_DG -> en_001
+ 207, 92, // en_DK -> en_150
+ 213, 85, // en_DM -> en_001
+ 219, 1017, // en_Dsrt -> root
+ 227, 85, // en_ER -> en_001
+ 233, 92, // en_FI -> en_150
+ 239, 85, // en_FJ -> en_001
+ 245, 85, // en_FK -> en_001
+ 251, 85, // en_FM -> en_001
+ 257, 85, // en_GB -> en_001
+ 263, 85, // en_GD -> en_001
+ 269, 85, // en_GG -> en_001
+ 275, 85, // en_GH -> en_001
+ 281, 85, // en_GI -> en_001
+ 287, 85, // en_GM -> en_001
+ 293, 85, // en_GY -> en_001
+ 299, 85, // en_HK -> en_001
+ 305, 85, // en_IE -> en_001
+ 311, 85, // en_IL -> en_001
+ 317, 85, // en_IM -> en_001
+ 323, 85, // en_IN -> en_001
+ 329, 85, // en_IO -> en_001
+ 335, 85, // en_JE -> en_001
+ 341, 85, // en_JM -> en_001
+ 347, 85, // en_KE -> en_001
+ 353, 85, // en_KI -> en_001
+ 359, 85, // en_KN -> en_001
+ 365, 85, // en_KY -> en_001
+ 371, 85, // en_LC -> en_001
+ 377, 85, // en_LR -> en_001
+ 383, 85, // en_LS -> en_001
+ 389, 85, // en_MG -> en_001
+ 395, 85, // en_MO -> en_001
+ 401, 85, // en_MS -> en_001
+ 407, 85, // en_MT -> en_001
+ 413, 85, // en_MU -> en_001
+ 419, 85, // en_MV -> en_001
+ 425, 85, // en_MW -> en_001
+ 431, 85, // en_MY -> en_001
+ 437, 85, // en_NA -> en_001
+ 443, 85, // en_NF -> en_001
+ 449, 85, // en_NG -> en_001
+ 455, 92, // en_NL -> en_150
+ 461, 85, // en_NR -> en_001
+ 467, 85, // en_NU -> en_001
+ 473, 85, // en_NZ -> en_001
+ 479, 85, // en_PG -> en_001
+ 485, 85, // en_PK -> en_001
+ 491, 85, // en_PN -> en_001
+ 497, 85, // en_PW -> en_001
+ 503, 85, // en_RW -> en_001
+ 509, 85, // en_SB -> en_001
+ 515, 85, // en_SC -> en_001
+ 521, 85, // en_SD -> en_001
+ 527, 92, // en_SE -> en_150
+ 533, 85, // en_SG -> en_001
+ 539, 85, // en_SH -> en_001
+ 545, 92, // en_SI -> en_150
+ 551, 85, // en_SL -> en_001
+ 557, 85, // en_SS -> en_001
+ 563, 85, // en_SX -> en_001
+ 569, 85, // en_SZ -> en_001
+ 575, 1017, // en_Shaw -> root
+ 583, 85, // en_TC -> en_001
+ 589, 85, // en_TK -> en_001
+ 595, 85, // en_TO -> en_001
+ 601, 85, // en_TT -> en_001
+ 607, 85, // en_TV -> en_001
+ 613, 85, // en_TZ -> en_001
+ 619, 85, // en_UG -> en_001
+ 625, 85, // en_VC -> en_001
+ 631, 85, // en_VG -> en_001
+ 637, 85, // en_VU -> en_001
+ 643, 85, // en_WS -> en_001
+ 649, 85, // en_ZA -> en_001
+ 655, 85, // en_ZM -> en_001
+ 661, 85, // en_ZW -> en_001
+ 674, 667, // es_AR -> es_419
+ 680, 667, // es_BO -> es_419
+ 686, 667, // es_BR -> es_419
+ 692, 667, // es_BZ -> es_419
+ 698, 667, // es_CL -> es_419
+ 704, 667, // es_CO -> es_419
+ 710, 667, // es_CR -> es_419
+ 716, 667, // es_CU -> es_419
+ 722, 667, // es_DO -> es_419
+ 728, 667, // es_EC -> es_419
+ 734, 667, // es_GT -> es_419
+ 740, 667, // es_HN -> es_419
+ 746, 667, // es_MX -> es_419
+ 752, 667, // es_NI -> es_419
+ 758, 667, // es_PA -> es_419
+ 764, 667, // es_PE -> es_419
+ 770, 667, // es_PR -> es_419
+ 776, 667, // es_PY -> es_419
+ 782, 667, // es_SV -> es_419
+ 788, 667, // es_US -> es_419
+ 794, 667, // es_UY -> es_419
+ 800, 667, // es_VE -> es_419
+ 806, 1017, // ff_Adlm -> root
+ 814, 1017, // ff_Arab -> root
+ 828, 1017, // ha_Arab -> root
+ 836, 323, // hi_Latn -> en_IN
+ 844, 822, // ht -> fr_HT
+ 847, 1017, // iu_Latn -> root
+ 855, 1017, // kk_Arab -> root
+ 863, 1017, // ks_Deva -> root
+ 871, 1017, // ku_Arab -> root
+ 879, 1017, // ky_Arab -> root
+ 887, 1017, // ky_Latn -> root
+ 895, 1017, // ml_Arab -> root
+ 903, 1017, // mn_Mong -> root
+ 911, 1017, // mni_Mtei -> root
+ 920, 1017, // ms_Arab -> root
+ 928, 934, // nb -> no
+ 931, 934, // nn -> no
+ 937, 1017, // pa_Arab -> root
+ 945, 999, // pt_AO -> pt_PT
+ 951, 999, // pt_CH -> pt_PT
+ 957, 999, // pt_CV -> pt_PT
+ 963, 999, // pt_FR -> pt_PT
+ 969, 999, // pt_GQ -> pt_PT
+ 975, 999, // pt_GW -> pt_PT
+ 981, 999, // pt_LU -> pt_PT
+ 987, 999, // pt_MO -> pt_PT
+ 993, 999, // pt_MZ -> pt_PT
+ 1005, 999, // pt_ST -> pt_PT
+ 1011, 999, // pt_TL -> pt_PT
+ 1022, 1017, // sat_Deva -> root
+ 1031, 1017, // sd_Deva -> root
+ 1039, 1017, // sd_Khoj -> root
+ 1047, 1017, // sd_Sind -> root
+ 1055, 1017, // shi_Latn -> root
+ 1064, 1017, // so_Arab -> root
+ 1072, 1017, // sr_Latn -> root
+ 1080, 1017, // sw_Arab -> root
+ 1088, 1017, // tg_Arab -> root
+ 1096, 1017, // ug_Cyrl -> root
+ 1104, 1017, // uz_Arab -> root
+ 1112, 1017, // uz_Cyrl -> root
+ 1120, 1017, // vai_Latn -> root
+ 1129, 1017, // wo_Arab -> root
+ 1137, 1017, // yo_Arab -> root
+ 1145, 1017, // yue_Hans -> root
+ 1154, 1017, // zh_Hant -> root
+ 1173, 1162, // zh_Hant_MO -> zh_Hant_HK
+};
+
+
+#endif // INCLUDED_FROM_URESBUND_CPP
return false;
}
+static UBool hasVariant(const char* localeID) {
+ UErrorCode err = U_ZERO_ERROR;
+ int32_t variantLength = uloc_getVariant(localeID, NULL, 0, &err);
+ return variantLength != 0;
+}
+
+// This file contains the tables for doing locale fallback, which are generated
+// by the CLDR-to-ICU process directly from the CLDR data. This file should only
+// ever be included from here.
+#define INCLUDED_FROM_URESBUND_CPP
+#include "localefallback_data.h"
+
+static const char* performFallbackLookup(const char* key,
+ const char* keyStrs,
+ const char* valueStrs,
+ const int32_t* lookupTable,
+ int32_t lookupTableLength) {
+ const int32_t* bottom = lookupTable;
+ const int32_t* top = lookupTable + lookupTableLength;
+
+ while (bottom < top) {
+ // Effectively, divide by 2 and round down to an even index
+ const int32_t* middle = bottom + (((top - bottom) / 4) * 2);
+ const char* entryKey = &(keyStrs[*middle]);
+ int32_t strcmpResult = uprv_strcmp(key, entryKey);
+ if (strcmpResult == 0) {
+ return &(valueStrs[middle[1]]);
+ } else if (strcmpResult < 0) {
+ top = middle;
+ } else {
+ bottom = middle + 2;
+ }
+ }
+ return nullptr;
+}
+
+static CharString getDefaultScript(const CharString& language, const CharString& region) {
+ const char* defaultScript = nullptr;
+ UErrorCode err = U_ZERO_ERROR;
+
+ // the default script will be "Latn" if we don't find the locale ID in the tables
+ CharString result("Latn", err);
+
+ // if we were passed both language and region, make them into a locale ID and look that up in the default
+ // script table
+ if (!region.isEmpty()) {
+ CharString localeID;
+ localeID.append(language, err).append("_", err).append(region, err);
+ if (U_FAILURE(err)) {
+ return result;
+ }
+ defaultScript = performFallbackLookup(localeID.data(), dsLocaleIDChars, scriptCodeChars, defaultScriptTable, UPRV_LENGTHOF(defaultScriptTable));
+ }
+
+ // if we didn't find anything, look up just the language in the default script table
+ if (defaultScript == nullptr) {
+ defaultScript = performFallbackLookup(language.data(), dsLocaleIDChars, scriptCodeChars, defaultScriptTable, UPRV_LENGTHOF(defaultScriptTable));
+ }
+
+ // if either lookup above succeeded, copy the result from "defaultScript" into "result"; otherwise, return "Latn"
+ if (defaultScript != nullptr) {
+ result.clear();
+ result.append(defaultScript, err);
+ }
+ return result;
+}
+
+enum UResOpenType {
+ /**
+ * Open a resource bundle for the locale;
+ * if there is not even a base language bundle, then fall back to the default locale;
+ * if there is no bundle for that either, then load the root bundle.
+ *
+ * This is the default bundle loading behavior.
+ */
+ URES_OPEN_LOCALE_DEFAULT_ROOT,
+ // TODO: ICU ticket #11271 "consistent default locale across locale trees"
+ // Add an option to look at the main locale tree for whether to
+ // fall back to root directly (if the locale has main data) or
+ // fall back to the default locale first (if the locale does not even have main data).
+ /**
+ * Open a resource bundle for the locale;
+ * if there is not even a base language bundle, then load the root bundle;
+ * never fall back to the default locale.
+ *
+ * This is used for algorithms that have good pan-Unicode default behavior,
+ * such as case mappings, collation, and segmentation (BreakIterator).
+ */
+ URES_OPEN_LOCALE_ROOT,
+ /**
+ * Open a resource bundle for the exact bundle name as requested;
+ * no fallbacks, do not load parent bundles.
+ *
+ * This is used for supplemental (non-locale) data.
+ */
+ URES_OPEN_DIRECT
+};
+typedef enum UResOpenType UResOpenType;
+
+/**
+ * Internal function, determines the search path for resource bundle files.
+ * Currently, this function is used only by findFirstExisting() to help search for resource bundle files when a bundle for the specified
+ * locale doesn't exist. The code that supports inheritance of resources between existing resource bundle files continues to
+ * use chopLocale() below.
+ * @param name In-out parameter: On input, the locale ID to get a parent locale ID for (this is a locale's base name, without keywords); on output, the
+ * requested parent locale ID.
+ * @param origName The original locale ID the caller of findFirstExisting() requested. This is the same as `name` on the first call to this function,
+ * but as findFirstExisting() ascends the resource bundle's parent tree, this parameter will continue to be the original locale ID requested.
+ */
+static bool getParentLocaleID(char *name, const char *origName, UResOpenType openType) {
+ // early out if the locale ID has a variant code or ends with _
+ if (name[uprv_strlen(name) - 1] == '_' || hasVariant(name)) {
+ return chopLocale(name);
+ }
+
+ UErrorCode err = U_ZERO_ERROR;
+ const char* tempNamePtr = name;
+ CharString language = ulocimp_getLanguage(tempNamePtr, &tempNamePtr, err);
+ if (*tempNamePtr == '_') {
+ ++tempNamePtr;
+ }
+ CharString script = ulocimp_getScript(tempNamePtr, &tempNamePtr, err);
+ if (*tempNamePtr == '_') {
+ ++tempNamePtr;
+ }
+ CharString region = ulocimp_getCountry(tempNamePtr, &tempNamePtr, err);
+ CharString workingLocale;
+ if (U_FAILURE(err)) {
+ // hopefully this never happens...
+ return chopLocale(name);
+ }
+
+ // if the open type is URES_OPEN_LOCALE_DEFAULT_ROOT, first look the locale ID up in the parent locale table;
+ // if that table specifies a parent for it, return that (we don't do this for the other open types-- if we're not
+ // falling back through the system default locale, we also want to do straight truncation fallback instead
+ // of looking things up in the parent locale table-- see https://www.unicode.org/reports/tr35/tr35.html#Parent_Locales:
+ // "Collation data, however, is an exception...")
+ if (openType == URES_OPEN_LOCALE_DEFAULT_ROOT) {
+ const char* parentID = performFallbackLookup(name, parentLocaleChars, parentLocaleChars, parentLocaleTable, UPRV_LENGTHOF(parentLocaleTable));
+ if (parentID != NULL) {
+ uprv_strcpy(name, parentID);
+ return true;
+ }
+ }
+
+ // if it's not in the parent locale table, figure out the fallback script algorithmically
+ // (see CLDR-15265 for an explanation of the algorithm)
+ if (!script.isEmpty() && !region.isEmpty()) {
+ // if "name" has both script and region, is the script the default script?
+ // - if so, remove it and keep the region
+ // - if not, remove the region and keep the script
+ if (getDefaultScript(language, region) == script.toStringPiece()) {
+ workingLocale.append(language, err).append("_", err).append(region, err);
+ } else {
+ workingLocale.append(language, err).append("_", err).append(script, err);
+ }
+ } else if (!region.isEmpty()) {
+ // if "name" has region but not script, did the original locale ID specify a script?
+ // - if yes, replace the region with the script from the original locale ID
+ // - if no, replace the region with the default script for that language and region
+ UErrorCode err = U_ZERO_ERROR;
+ tempNamePtr = origName;
+ CharString origNameLanguage = ulocimp_getLanguage(tempNamePtr, &tempNamePtr, err);
+ if (*tempNamePtr == '_') {
+ ++tempNamePtr;
+ }
+ CharString origNameScript = ulocimp_getScript(origName, nullptr, err);
+ if (!origNameScript.isEmpty()) {
+ workingLocale.append(language, err).append("_", err).append(origNameScript, err);
+ } else {
+ workingLocale.append(language, err).append("_", err).append(getDefaultScript(language, region), err);
+ }
+ } else if (!script.isEmpty()) {
+ // if "name" has script but not region (and our open type if URES_OPEN_LOCALE_DEFAULT_ROOT), is the script
+ // the default script for the language?
+ // - if so, remove it from the locale ID
+ // - if not, return false to continue up the chain
+ // (we don't do this for other open types for the same reason we don't look things up in the parent
+ // locale table for other open types-- see the reference to UTS #35 above)
+ if (openType != URES_OPEN_LOCALE_DEFAULT_ROOT || getDefaultScript(language, CharString()) == script.toStringPiece()) {
+ workingLocale.append(language, err);
+ } else {
+ return false;
+ }
+ } else {
+ // if "name" just contains a language code, return false so the calling code falls back to "root"
+ return false;
+ }
+ if (U_SUCCESS(err) && !workingLocale.isEmpty()) {
+ uprv_strcpy(name, workingLocale.data());
+ return true;
+ } else {
+ return false;
+ }
+}
+
/**
* Called to check whether a name without '_' needs to be checked for a parent.
* Some code had assumed that locale IDs with '_' could not have a non-root parent.
/* INTERNAL: */
/* CAUTION: resbMutex must be locked when calling this function! */
static UResourceDataEntry *
-findFirstExisting(const char* path, char* name, const char* defaultLocale,
- UBool *isRoot, UBool *hasChopped, UBool *isDefault, UErrorCode* status) {
+findFirstExisting(const char* path, char* name, const char* defaultLocale, UResOpenType openType,
+ UBool *isRoot, UBool *foundParent, UBool *isDefault, UErrorCode* status) {
UResourceDataEntry *r = NULL;
UBool hasRealData = false;
- *hasChopped = true; /* we're starting with a fresh name */
+ *foundParent = true; /* we're starting with a fresh name */
+ char origName[ULOC_FULLNAME_CAPACITY];
- while(*hasChopped && !hasRealData) {
+ uprv_strcpy(origName, name);
+ while(*foundParent && !hasRealData) {
r = init_entry(name, path, status);
/* Null pointer test */
if (U_FAILURE(*status)) {
*isRoot = (UBool)(uprv_strcmp(name, kRootLocaleName) == 0);
/*Fallback data stuff*/
- *hasChopped = chopLocale(name);
- if (*hasChopped && *name == '\0') {
+ if (!hasRealData) {
+ *foundParent = getParentLocaleID(name, origName, openType);
+ } else {
+ // we've already found a real resource file; what we return to the caller is the parent
+ // locale ID for inheritance, which should come from chopLocale(), not getParentLocaleID()
+ *foundParent = chopLocale(name);
+ }
+ if (*foundParent && *name == '\0') {
uprv_strcpy(name, "und");
}
}
return true;
}
-enum UResOpenType {
- /**
- * Open a resource bundle for the locale;
- * if there is not even a base language bundle, then fall back to the default locale;
- * if there is no bundle for that either, then load the root bundle.
- *
- * This is the default bundle loading behavior.
- */
- URES_OPEN_LOCALE_DEFAULT_ROOT,
- // TODO: ICU ticket #11271 "consistent default locale across locale trees"
- // Add an option to look at the main locale tree for whether to
- // fall back to root directly (if the locale has main data) or
- // fall back to the default locale first (if the locale does not even have main data).
- /**
- * Open a resource bundle for the locale;
- * if there is not even a base language bundle, then load the root bundle;
- * never fall back to the default locale.
- *
- * This is used for algorithms that have good pan-Unicode default behavior,
- * such as case mappings, collation, and segmentation (BreakIterator).
- */
- URES_OPEN_LOCALE_ROOT,
- /**
- * Open a resource bundle for the exact bundle name as requested;
- * no fallbacks, do not load parent bundles.
- *
- * This is used for supplemental (non-locale) data.
- */
- URES_OPEN_DIRECT
-};
-typedef enum UResOpenType UResOpenType;
-
static UResourceDataEntry *entryOpen(const char* path, const char* localeID,
UResOpenType openType, UErrorCode* status) {
U_ASSERT(openType != URES_OPEN_DIRECT);
Mutex lock(&resbMutex); // Lock resbMutex until the end of this function.
/* We're going to skip all the locales that do not have any data */
- r = findFirstExisting(path, name, defaultLocale, &isRoot, &hasChopped, &isDefault, &intStatus);
+ r = findFirstExisting(path, name, defaultLocale, openType, &isRoot, &hasChopped, &isDefault, &intStatus);
// If we failed due to out-of-memory, report the failure and exit early.
if (intStatus == U_MEMORY_ALLOCATION_ERROR) {
if(r==NULL && openType == URES_OPEN_LOCALE_DEFAULT_ROOT && !isDefault && !isRoot) {
/* insert default locale */
uprv_strcpy(name, defaultLocale);
- r = findFirstExisting(path, name, defaultLocale, &isRoot, &hasChopped, &isDefault, &intStatus);
+ r = findFirstExisting(path, name, defaultLocale, openType, &isRoot, &hasChopped, &isDefault, &intStatus);
// If we failed due to out-of-memory, report the failure and exit early.
if (intStatus == U_MEMORY_ALLOCATION_ERROR) {
*status = intStatus;
/* present */
if(r == NULL) {
uprv_strcpy(name, kRootLocaleName);
- r = findFirstExisting(path, name, defaultLocale, &isRoot, &hasChopped, &isDefault, &intStatus);
+ r = findFirstExisting(path, name, defaultLocale, openType, &isRoot, &hasChopped, &isDefault, &intStatus);
// If we failed due to out-of-memory, report the failure and exit early.
if (intStatus == U_MEMORY_ALLOCATION_ERROR) {
*status = intStatus;
#include "filestrm.h"
#include <stdbool.h>
#include <stdlib.h>
+#include <stdio.h> // for sprintf()
#define RESTEST_HEAP_CHECK 0
static void TestFallback(void);
static void TestTable32(void);
static void TestFileStream(void);
+static void TestAlgorithmicParentFallback(void);
+
/*****************************************************************************/
const UChar kERROR[] = { 0x0045 /*E*/, 0x0052 /*'R'*/, 0x0052 /*'R'*/,
#endif
addTest(root, &TestFallback, "tsutil/crestst/TestFallback");
addTest(root, &TestAliasConflict, "tsutil/crestst/TestAliasConflict");
+ addTest(root, &TestAlgorithmicParentFallback, "tsutil/crestst/TestAlgorithmicParentFallback");
}
}
ures_close(res);
}
+
+static void TestAlgorithmicParentFallback(void) {
+ // Test for ICU-21125 and ICU-21126 -- cases where resource fallback isn't determined by lopping fields off
+ // the end of the locale ID (or following a %%Parent directive in a resource bundle)
+ // first column is input locale, second column is expected output locale
+ const char* testCases[] = {
+ "de_Latn_LI", "de_LI", "de_LI",
+// "en_VA", "en_150", "en",// TODO: put this back in after https://unicode-org.atlassian.net/browse/CLDR-15893 is fixed
+ "yi_Latn_DE", "root", "yi",
+ "yi_Hebr_DE", "yi", "yi",
+ "zh_Hant_SG", "zh_Hant", "zh_Hant"
+ // would be nice to test that sr_Latn_ME falls back to sr_Latn, or sr_ME to sr_Latn_ME,
+ // or sr_Latn to root, but all of these resource bundle files actually exist in the project
+ };
+
+ for (int32_t i = 0; i < UPRV_LENGTHOF(testCases); i += 3) {
+ const char* testLocale = testCases[i];
+ const char* regularExpected = testCases[i + 1];
+ const char* noDefaultExpected = testCases[i + 2];
+
+ UErrorCode err = U_ZERO_ERROR;
+ UResourceBundle* regularRB = ures_open(NULL, testLocale, &err);
+ char errorMessage[200];
+
+ sprintf(errorMessage, "Error %s opening resource bundle for locale %s and URES_OPEN_LOCALE_DEFAULT_ROOT", u_errorName(err), testLocale);
+ if (assertSuccess(errorMessage, &err)) {
+ const char* resourceLocale = ures_getLocaleByType(regularRB, ULOC_ACTUAL_LOCALE, &err);
+
+ sprintf(errorMessage, "Error %s getting resource locale for locale %s and URES_OPEN_LOCALE_DEFAULT_ROOT", u_errorName(err), testLocale);
+ if (assertSuccess(errorMessage, &err)) {
+ sprintf(errorMessage, "Mismatch for locale %s and URES_OPEN_LOCALE_DEFAULT_ROOT", testLocale);
+ if (uprv_strcmp(regularExpected, "root") == 0) {
+ // (the system default locale may have keywords-- just check if the resource locale (which won't) is a prefix of the system default)
+ assertTrue(errorMessage, uprv_strncmp(uloc_getDefault(), resourceLocale, uprv_strlen(resourceLocale)) == 0);
+ } else {
+ assertEquals(errorMessage, regularExpected, resourceLocale);
+ }
+ }
+ }
+ ures_close(regularRB);
+
+ err = U_ZERO_ERROR;
+ UResourceBundle* noDefaultRB = ures_openNoDefault(NULL, testLocale, &err);
+
+ sprintf(errorMessage, "Error %s opening resource bundle for locale %s and URES_OPEN_LOCALE_ROOT", u_errorName(err), testLocale);
+ if (assertSuccess(errorMessage, &err)) {
+ const char* resourceLocale = ures_getLocaleByType(noDefaultRB, ULOC_ACTUAL_LOCALE, &err);
+
+ sprintf(errorMessage, "Error %s getting resource locale for locale %s and URES_OPEN_LOCALE_ROOT", u_errorName(err), testLocale);
+ if (assertSuccess(errorMessage, &err)) {
+ sprintf(errorMessage, "Mismatch for locale %s and URES_OPEN_LOCALE_ROOT", testLocale);
+ assertEquals(errorMessage, noDefaultExpected, resourceLocale);
+ }
+ }
+ ures_close(noDefaultRB);
+ }
+}
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
+import java.util.Comparator;
import java.util.EnumMap;
import java.util.Enumeration;
import java.util.HashMap;
localeID = ULocale.getBaseName(localeID);
ICUResourceBundle b;
if (openType == OpenType.LOCALE_DEFAULT_ROOT) {
- b = instantiateBundle(baseName, localeID, ULocale.getDefault().getBaseName(),
+ b = instantiateBundle(baseName, localeID, null, ULocale.getDefault().getBaseName(),
root, openType);
} else {
- b = instantiateBundle(baseName, localeID, null, root, openType);
+ b = instantiateBundle(baseName, localeID, null, null, root, openType);
}
if(b==null){
throw new MissingResourceException(
(localeID.length() == lang.length() || localeID.charAt(lang.length()) == '_');
}
+ private static final Comparator<String[]> COMPARE_FIRST_ELEMENT = new Comparator<String[]>() {
+ @Override
+ public int compare(String[] pair1, String[] pair2) {
+ return pair1[0].compareTo(pair2[0]);
+ }
+ };
+
+ private static String getExplicitParent(String localeID) {
+ return LocaleFallbackData.PARENT_LOCALE_TABLE.get(localeID);
+ }
+
+ private static String getDefaultScript(String language, String region) {
+ String localeID = language + "_" + region;
+ String result = LocaleFallbackData.DEFAULT_SCRIPT_TABLE.get(localeID);
+ if (result == null) {
+ result = LocaleFallbackData.DEFAULT_SCRIPT_TABLE.get(language);
+ }
+ if (result == null) {
+ result = "Latn";
+ }
+ return result;
+ }
+
+ private static String getParentLocaleID(String name, String origName, OpenType openType) {
+ // early out if the locale ID has a variant code or ends with _
+ if (name.endsWith("_") || !ULocale.getVariant(name).isEmpty()) {
+ int lastUnderbarPos = name.lastIndexOf('_');
+ if (lastUnderbarPos >= 0) {
+ return name.substring(0, lastUnderbarPos);
+ } else {
+ return null;
+ }
+ }
+
+ // TODO: Is there a better way to break the locale ID up into its consituent parts?
+ ULocale nameLocale = new ULocale(name);
+ String language = nameLocale.getLanguage();
+ String script = nameLocale.getScript();
+ String region = nameLocale.getCountry();
+
+ // if our open type is LOCALE_DEFAULT_ROOT, first look the locale ID up in the parent locale table; if that
+ // table specifies a parent for it, return that (we don't do this for the other open types-- if we're not
+ // falling back through the system default locale, we also want to do straight truncation fallback instead
+ // of looking things up in the parent locale table-- see https://www.unicode.org/reports/tr35/tr35.html#Parent_Locales:
+ // "Collation data, however, is an exception...")
+ if (openType == OpenType.LOCALE_DEFAULT_ROOT) {
+ String parentID = getExplicitParent(name);
+ if (parentID != null) {
+ return parentID.equals("root") ? null : parentID;
+ }
+ }
+
+ // if it's not in the parent locale table, figure out the fallback script algorithmically
+ // (see CLDR-15265 for an explanation of the algorithm)
+ if (!script.isEmpty() && !region.isEmpty()) {
+ // if "name" has both script and region, is the script the default script?
+ // - if so, remove it and keep the region
+ // - if not, remove the region and keep the script
+ if (getDefaultScript(language, region).equals(script)) {
+ return language + "_" + region;
+ } else {
+ return language + "_" + script;
+ }
+ } else if (!region.isEmpty()) {
+ // if "name" has region but not script, did the original locale ID specify a script?
+ // - if yes, replace the region with the script from the original locale ID
+ // - if no, replace the region with the default script for that language and region
+ String origNameScript = ULocale.getScript(origName);
+ if (!origNameScript.isEmpty()) {
+ return language + "_" + origNameScript;
+ } else {
+ return language + "_" + getDefaultScript(language, region);
+ }
+ } else if (!script.isEmpty()) {
+ // if "name" has script but not region (and our open type is LOCALE_DEFAULT_ROOT), is the script the
+ // default script for the language?
+ // - if so, remove it from the locale ID
+ // - if not, return "root" (bypassing the system default locale ID)
+ // (we don't do this for other open types for the same reason we don't look things up in the parent
+ // locale table for other open types-- see the reference to UTS #35 above)
+ if (openType != OpenType.LOCALE_DEFAULT_ROOT || getDefaultScript(language, null).equals(script)) {
+ return language;
+ } else {
+ return /*"root"*/null;
+ }
+ } else {
+ // if "name" just contains a language code, return null so the calling code falls back to "root"
+ return null;
+ }
+ }
+
private static ICUResourceBundle instantiateBundle(
- final String baseName, final String localeID, final String defaultID,
+ final String baseName, final String localeID, final String origLocaleID, final String defaultID,
final ClassLoader root, final OpenType openType) {
assert localeID.indexOf('@') < 0;
assert defaultID == null || defaultID.indexOf('@') < 0;
// fallback to locale ID parent
if(b == null){
- int i = localeName.lastIndexOf('_');
- if (i != -1) {
- // Chop off the last underscore and the subtag after that.
- String temp = localeName.substring(0, i);
- b = instantiateBundle(baseName, temp, defaultID, root, openType);
+ String origLocaleName = (origLocaleID != null) ? origLocaleID : localeName;
+ String fallbackLocaleID = getParentLocaleID(localeName, origLocaleName, openType);
+ if (fallbackLocaleID != null) {
+ b = instantiateBundle(baseName, fallbackLocaleID, origLocaleName, defaultID, root, openType);
}else{
- // No underscore, only a base language subtag.
if(openType == OpenType.LOCALE_DEFAULT_ROOT &&
!localeIDStartsWithLangSubtag(defaultID, localeName)) {
// Go to the default locale before root.
- b = instantiateBundle(baseName, defaultID, defaultID, root, openType);
+ b = instantiateBundle(baseName, defaultID, null, defaultID, root, openType);
} else if(openType != OpenType.LOCALE_ONLY && !rootLocale.isEmpty()) {
// Ultimately go to root.
b = ICUResourceBundle.createBundle(baseName, rootLocale, root);
// TODO: C++ uresbund.cpp also checks for %%ParentIsRoot. Why not Java?
String parentLocaleName = ((ICUResourceBundleImpl.ResourceTable)b).findString("%%Parent");
if (parentLocaleName != null) {
- parent = instantiateBundle(baseName, parentLocaleName, defaultID, root, openType);
+ parent = instantiateBundle(baseName, parentLocaleName, null, defaultID, root, openType);
} else if (i != -1) {
- parent = instantiateBundle(baseName, localeName.substring(0, i), defaultID, root, openType);
+ parent = instantiateBundle(baseName, localeName.substring(0, i), null, defaultID, root, openType);
} else if (!localeName.equals(rootLocale)){
- parent = instantiateBundle(baseName, rootLocale, defaultID, root, openType);
+ parent = instantiateBundle(baseName, rootLocale, null, defaultID, root, openType);
}
if (!b.equals(parent)){
--- /dev/null
+// © 2022 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+//
+// Internal static data tables used by ICUResourceBundle.java
+// WARNING: This file is mechanically generated by the CLDR-to-ICU tool
+// (see tools/cldr/cldr-to-icu/src/main/java/org/unicode/tool/cldrtoicu/generator/ResourcFallbackCodeGenerator.java).
+// DO NOT HAND EDIT!!!
+
+package com.ibm.icu.impl;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+class LocaleFallbackData {
+ //======================================================================
+ // Default script table
+ public static final Map<String, String> DEFAULT_SCRIPT_TABLE = buildDefaultScriptTable();
+
+ private static Map<String, String> buildDefaultScriptTable() {
+ Map<String, String> t = new HashMap<>();
+ t.put("ab", "Cyrl");
+ t.put("abq", "Cyrl");
+ t.put("adp", "Tibt");
+ t.put("ady", "Cyrl");
+ t.put("ae", "Avst");
+ t.put("aeb", "Arab");
+ t.put("aho", "Ahom");
+ t.put("akk", "Xsux");
+ t.put("alt", "Cyrl");
+ t.put("am", "Ethi");
+ t.put("apc", "Arab");
+ t.put("apd", "Arab");
+ t.put("ar", "Arab");
+ t.put("arc", "Armi");
+ t.put("arq", "Arab");
+ t.put("ars", "Arab");
+ t.put("ary", "Arab");
+ t.put("arz", "Arab");
+ t.put("as", "Beng");
+ t.put("ase", "Sgnw");
+ t.put("av", "Cyrl");
+ t.put("avl", "Arab");
+ t.put("awa", "Deva");
+ t.put("az_IQ", "Arab");
+ t.put("az_IR", "Arab");
+ t.put("az_RU", "Cyrl");
+ t.put("ba", "Cyrl");
+ t.put("bal", "Arab");
+ t.put("bap", "Deva");
+ t.put("bax", "Bamu");
+ t.put("bcq", "Ethi");
+ t.put("be", "Cyrl");
+ t.put("bej", "Arab");
+ t.put("bfq", "Taml");
+ t.put("bft", "Arab");
+ t.put("bfy", "Deva");
+ t.put("bg", "Cyrl");
+ t.put("bgc", "Deva");
+ t.put("bgn", "Arab");
+ t.put("bgx", "Grek");
+ t.put("bhb", "Deva");
+ t.put("bhi", "Deva");
+ t.put("bho", "Deva");
+ t.put("bji", "Ethi");
+ t.put("bjj", "Deva");
+ t.put("blt", "Tavt");
+ t.put("bn", "Beng");
+ t.put("bo", "Tibt");
+ t.put("bpy", "Beng");
+ t.put("bqi", "Arab");
+ t.put("bra", "Deva");
+ t.put("brh", "Arab");
+ t.put("brx", "Deva");
+ t.put("bsq", "Bass");
+ t.put("bst", "Ethi");
+ t.put("btv", "Deva");
+ t.put("bua", "Cyrl");
+ t.put("byn", "Ethi");
+ t.put("ccp", "Cakm");
+ t.put("ce", "Cyrl");
+ t.put("chm", "Cyrl");
+ t.put("chr", "Cher");
+ t.put("cja", "Arab");
+ t.put("cjm", "Cham");
+ t.put("ckb", "Arab");
+ t.put("cmg", "Soyo");
+ t.put("cop", "Copt");
+ t.put("cr", "Cans");
+ t.put("crh", "Cyrl");
+ t.put("crk", "Cans");
+ t.put("crl", "Cans");
+ t.put("csw", "Cans");
+ t.put("ctd", "Pauc");
+ t.put("cu", "Cyrl");
+ t.put("cv", "Cyrl");
+ t.put("dar", "Cyrl");
+ t.put("dcc", "Arab");
+ t.put("dgl", "Arab");
+ t.put("dmf", "Medf");
+ t.put("doi", "Deva");
+ t.put("drh", "Mong");
+ t.put("drs", "Ethi");
+ t.put("dty", "Deva");
+ t.put("dv", "Thaa");
+ t.put("dz", "Tibt");
+ t.put("egy", "Egyp");
+ t.put("eky", "Kali");
+ t.put("el", "Grek");
+ t.put("esg", "Gonm");
+ t.put("ett", "Ital");
+ t.put("fa", "Arab");
+ t.put("fia", "Arab");
+ t.put("fub", "Arab");
+ t.put("gan", "Hans");
+ t.put("gbm", "Deva");
+ t.put("gbz", "Arab");
+ t.put("gez", "Ethi");
+ t.put("ggn", "Deva");
+ t.put("gjk", "Arab");
+ t.put("gju", "Arab");
+ t.put("glk", "Arab");
+ t.put("gmv", "Ethi");
+ t.put("gof", "Ethi");
+ t.put("gom", "Deva");
+ t.put("gon", "Telu");
+ t.put("got", "Goth");
+ t.put("grc", "Cprt");
+ t.put("grt", "Beng");
+ t.put("gu", "Gujr");
+ t.put("gvr", "Deva");
+ t.put("gwc", "Arab");
+ t.put("gwt", "Arab");
+ t.put("ha_CM", "Arab");
+ t.put("ha_SD", "Arab");
+ t.put("hak", "Hans");
+ t.put("haz", "Arab");
+ t.put("hdy", "Ethi");
+ t.put("he", "Hebr");
+ t.put("hi", "Deva");
+ t.put("hlu", "Hluw");
+ t.put("hmd", "Plrd");
+ t.put("hnd", "Arab");
+ t.put("hne", "Deva");
+ t.put("hnj", "Hmnp");
+ t.put("hno", "Arab");
+ t.put("hoc", "Deva");
+ t.put("hoj", "Deva");
+ t.put("hsn", "Hans");
+ t.put("hy", "Armn");
+ t.put("ii", "Yiii");
+ t.put("inh", "Cyrl");
+ t.put("iu", "Cans");
+ t.put("iw", "Hebr");
+ t.put("ja", "Jpan");
+ t.put("ji", "Hebr");
+ t.put("jml", "Deva");
+ t.put("ka", "Geor");
+ t.put("kaa", "Cyrl");
+ t.put("kaw", "Kawi");
+ t.put("kbd", "Cyrl");
+ t.put("kby", "Arab");
+ t.put("kdt", "Thai");
+ t.put("kfr", "Deva");
+ t.put("kfy", "Deva");
+ t.put("khb", "Talu");
+ t.put("khn", "Deva");
+ t.put("kht", "Mymr");
+ t.put("khw", "Arab");
+ t.put("kjg", "Laoo");
+ t.put("kk", "Cyrl");
+ t.put("kk_AF", "Arab");
+ t.put("kk_CN", "Arab");
+ t.put("kk_IR", "Arab");
+ t.put("kk_MN", "Arab");
+ t.put("km", "Khmr");
+ t.put("kn", "Knda");
+ t.put("ko", "Kore");
+ t.put("koi", "Cyrl");
+ t.put("kok", "Deva");
+ t.put("kqy", "Ethi");
+ t.put("krc", "Cyrl");
+ t.put("kru", "Deva");
+ t.put("ks", "Arab");
+ t.put("ktb", "Ethi");
+ t.put("ku_LB", "Arab");
+ t.put("kum", "Cyrl");
+ t.put("kv", "Cyrl");
+ t.put("kvx", "Arab");
+ t.put("kxc", "Ethi");
+ t.put("kxl", "Deva");
+ t.put("kxm", "Thai");
+ t.put("kxp", "Arab");
+ t.put("ky", "Cyrl");
+ t.put("ky_CN", "Arab");
+ t.put("kzh", "Arab");
+ t.put("lab", "Lina");
+ t.put("lad", "Hebr");
+ t.put("lah", "Arab");
+ t.put("lbe", "Cyrl");
+ t.put("lcp", "Thai");
+ t.put("lep", "Lepc");
+ t.put("lez", "Cyrl");
+ t.put("lif", "Deva");
+ t.put("lis", "Lisu");
+ t.put("lki", "Arab");
+ t.put("lmn", "Telu");
+ t.put("lo", "Laoo");
+ t.put("lrc", "Arab");
+ t.put("luz", "Arab");
+ t.put("lwl", "Thai");
+ t.put("lzh", "Hans");
+ t.put("mag", "Deva");
+ t.put("mai", "Deva");
+ t.put("man_GN", "Nkoo");
+ t.put("mde", "Arab");
+ t.put("mdf", "Cyrl");
+ t.put("mdx", "Ethi");
+ t.put("mfa", "Arab");
+ t.put("mgp", "Deva");
+ t.put("mk", "Cyrl");
+ t.put("mki", "Arab");
+ t.put("ml", "Mlym");
+ t.put("mn", "Cyrl");
+ t.put("mn_CN", "Mong");
+ t.put("mni", "Beng");
+ t.put("mnw", "Mymr");
+ t.put("mr", "Deva");
+ t.put("mrd", "Deva");
+ t.put("mrj", "Cyrl");
+ t.put("mro", "Mroo");
+ t.put("ms_CC", "Arab");
+ t.put("mtr", "Deva");
+ t.put("mvy", "Arab");
+ t.put("mwr", "Deva");
+ t.put("mww", "Hmnp");
+ t.put("my", "Mymr");
+ t.put("mym", "Ethi");
+ t.put("myv", "Cyrl");
+ t.put("myz", "Mand");
+ t.put("mzn", "Arab");
+ t.put("nan", "Hans");
+ t.put("ne", "Deva");
+ t.put("new", "Deva");
+ t.put("nnp", "Wcho");
+ t.put("nod", "Lana");
+ t.put("noe", "Deva");
+ t.put("non", "Runr");
+ t.put("nqo", "Nkoo");
+ t.put("nsk", "Cans");
+ t.put("nst", "Tnsa");
+ t.put("oj", "Cans");
+ t.put("ojs", "Cans");
+ t.put("or", "Orya");
+ t.put("oru", "Arab");
+ t.put("os", "Cyrl");
+ t.put("osa", "Osge");
+ t.put("ota", "Arab");
+ t.put("otk", "Orkh");
+ t.put("oui", "Ougr");
+ t.put("pa", "Guru");
+ t.put("pa_PK", "Arab");
+ t.put("pal", "Phli");
+ t.put("peo", "Xpeo");
+ t.put("phl", "Arab");
+ t.put("phn", "Phnx");
+ t.put("pka", "Brah");
+ t.put("pnt", "Grek");
+ t.put("ppa", "Deva");
+ t.put("pra", "Khar");
+ t.put("prd", "Arab");
+ t.put("ps", "Arab");
+ t.put("raj", "Deva");
+ t.put("rhg", "Rohg");
+ t.put("rif", "Tfng");
+ t.put("rjs", "Deva");
+ t.put("rkt", "Beng");
+ t.put("rmt", "Arab");
+ t.put("ru", "Cyrl");
+ t.put("rue", "Cyrl");
+ t.put("ryu", "Kana");
+ t.put("sa", "Deva");
+ t.put("sah", "Cyrl");
+ t.put("sat", "Olck");
+ t.put("saz", "Saur");
+ t.put("sck", "Deva");
+ t.put("scl", "Arab");
+ t.put("sd", "Arab");
+ t.put("sd_IN", "Deva");
+ t.put("sdh", "Arab");
+ t.put("sga", "Ogam");
+ t.put("sgw", "Ethi");
+ t.put("shi", "Tfng");
+ t.put("shn", "Mymr");
+ t.put("shu", "Arab");
+ t.put("si", "Sinh");
+ t.put("skr", "Arab");
+ t.put("smp", "Samr");
+ t.put("sog", "Sogd");
+ t.put("sou", "Thai");
+ t.put("sr", "Cyrl");
+ t.put("srb", "Sora");
+ t.put("srx", "Deva");
+ t.put("swb", "Arab");
+ t.put("swv", "Deva");
+ t.put("syl", "Beng");
+ t.put("syr", "Syrc");
+ t.put("ta", "Taml");
+ t.put("taj", "Deva");
+ t.put("tcy", "Knda");
+ t.put("tdd", "Tale");
+ t.put("tdg", "Deva");
+ t.put("tdh", "Deva");
+ t.put("te", "Telu");
+ t.put("tg", "Cyrl");
+ t.put("tg_PK", "Arab");
+ t.put("th", "Thai");
+ t.put("thl", "Deva");
+ t.put("thq", "Deva");
+ t.put("thr", "Deva");
+ t.put("ti", "Ethi");
+ t.put("tig", "Ethi");
+ t.put("tkt", "Deva");
+ t.put("trw", "Arab");
+ t.put("tsd", "Grek");
+ t.put("tsf", "Deva");
+ t.put("tsj", "Tibt");
+ t.put("tt", "Cyrl");
+ t.put("tts", "Thai");
+ t.put("txg", "Tang");
+ t.put("txo", "Toto");
+ t.put("tyv", "Cyrl");
+ t.put("udi", "Aghb");
+ t.put("udm", "Cyrl");
+ t.put("ug", "Arab");
+ t.put("ug_KZ", "Cyrl");
+ t.put("ug_MN", "Cyrl");
+ t.put("uga", "Ugar");
+ t.put("uk", "Cyrl");
+ t.put("unr", "Beng");
+ t.put("unr_NP", "Deva");
+ t.put("unx", "Beng");
+ t.put("ur", "Arab");
+ t.put("uz_AF", "Arab");
+ t.put("uz_CN", "Cyrl");
+ t.put("vai", "Vaii");
+ t.put("wal", "Ethi");
+ t.put("wbq", "Telu");
+ t.put("wbr", "Deva");
+ t.put("wni", "Arab");
+ t.put("wsg", "Gong");
+ t.put("wtm", "Deva");
+ t.put("wuu", "Hans");
+ t.put("xco", "Chrs");
+ t.put("xcr", "Cari");
+ t.put("xlc", "Lyci");
+ t.put("xld", "Lydi");
+ t.put("xmf", "Geor");
+ t.put("xmn", "Mani");
+ t.put("xmr", "Merc");
+ t.put("xna", "Narb");
+ t.put("xnr", "Deva");
+ t.put("xpr", "Prti");
+ t.put("xsa", "Sarb");
+ t.put("xsr", "Deva");
+ t.put("yi", "Hebr");
+ t.put("yue", "Hant");
+ t.put("yue_CN", "Hans");
+ t.put("zdj", "Arab");
+ t.put("zgh", "Tfng");
+ t.put("zh", "Hans");
+ t.put("zh_AU", "Hant");
+ t.put("zh_BN", "Hant");
+ t.put("zh_GB", "Hant");
+ t.put("zh_GF", "Hant");
+ t.put("zh_HK", "Hant");
+ t.put("zh_ID", "Hant");
+ t.put("zh_MO", "Hant");
+ t.put("zh_PA", "Hant");
+ t.put("zh_PF", "Hant");
+ t.put("zh_PH", "Hant");
+ t.put("zh_SR", "Hant");
+ t.put("zh_TH", "Hant");
+ t.put("zh_TW", "Hant");
+ t.put("zh_US", "Hant");
+ t.put("zh_VN", "Hant");
+ t.put("zhx", "Nshu");
+ t.put("zkt", "Kits");
+ return Collections.unmodifiableMap(t);
+ }
+
+ //======================================================================
+ // Parent locale table
+ public static final Map<String, String> PARENT_LOCALE_TABLE = buildParentLocaleTable();
+
+ private static Map<String, String> buildParentLocaleTable() {
+ Map<String, String> t = new HashMap<>();
+ t.put("az_Arab", "root");
+ t.put("az_Cyrl", "root");
+ t.put("bal_Latn", "root");
+ t.put("blt_Latn", "root");
+ t.put("bm_Nkoo", "root");
+ t.put("bs_Cyrl", "root");
+ t.put("byn_Latn", "root");
+ t.put("cu_Glag", "root");
+ t.put("dje_Arab", "root");
+ t.put("dyo_Arab", "root");
+ t.put("en_150", "en_001");
+ t.put("en_AG", "en_001");
+ t.put("en_AI", "en_001");
+ t.put("en_AT", "en_150");
+ t.put("en_AU", "en_001");
+ t.put("en_BB", "en_001");
+ t.put("en_BE", "en_150");
+ t.put("en_BM", "en_001");
+ t.put("en_BS", "en_001");
+ t.put("en_BW", "en_001");
+ t.put("en_BZ", "en_001");
+ t.put("en_CC", "en_001");
+ t.put("en_CH", "en_150");
+ t.put("en_CK", "en_001");
+ t.put("en_CM", "en_001");
+ t.put("en_CX", "en_001");
+ t.put("en_CY", "en_001");
+ t.put("en_DE", "en_150");
+ t.put("en_DG", "en_001");
+ t.put("en_DK", "en_150");
+ t.put("en_DM", "en_001");
+ t.put("en_Dsrt", "root");
+ t.put("en_ER", "en_001");
+ t.put("en_FI", "en_150");
+ t.put("en_FJ", "en_001");
+ t.put("en_FK", "en_001");
+ t.put("en_FM", "en_001");
+ t.put("en_GB", "en_001");
+ t.put("en_GD", "en_001");
+ t.put("en_GG", "en_001");
+ t.put("en_GH", "en_001");
+ t.put("en_GI", "en_001");
+ t.put("en_GM", "en_001");
+ t.put("en_GY", "en_001");
+ t.put("en_HK", "en_001");
+ t.put("en_IE", "en_001");
+ t.put("en_IL", "en_001");
+ t.put("en_IM", "en_001");
+ t.put("en_IN", "en_001");
+ t.put("en_IO", "en_001");
+ t.put("en_JE", "en_001");
+ t.put("en_JM", "en_001");
+ t.put("en_KE", "en_001");
+ t.put("en_KI", "en_001");
+ t.put("en_KN", "en_001");
+ t.put("en_KY", "en_001");
+ t.put("en_LC", "en_001");
+ t.put("en_LR", "en_001");
+ t.put("en_LS", "en_001");
+ t.put("en_MG", "en_001");
+ t.put("en_MO", "en_001");
+ t.put("en_MS", "en_001");
+ t.put("en_MT", "en_001");
+ t.put("en_MU", "en_001");
+ t.put("en_MV", "en_001");
+ t.put("en_MW", "en_001");
+ t.put("en_MY", "en_001");
+ t.put("en_NA", "en_001");
+ t.put("en_NF", "en_001");
+ t.put("en_NG", "en_001");
+ t.put("en_NL", "en_150");
+ t.put("en_NR", "en_001");
+ t.put("en_NU", "en_001");
+ t.put("en_NZ", "en_001");
+ t.put("en_PG", "en_001");
+ t.put("en_PK", "en_001");
+ t.put("en_PN", "en_001");
+ t.put("en_PW", "en_001");
+ t.put("en_RW", "en_001");
+ t.put("en_SB", "en_001");
+ t.put("en_SC", "en_001");
+ t.put("en_SD", "en_001");
+ t.put("en_SE", "en_150");
+ t.put("en_SG", "en_001");
+ t.put("en_SH", "en_001");
+ t.put("en_SI", "en_150");
+ t.put("en_SL", "en_001");
+ t.put("en_SS", "en_001");
+ t.put("en_SX", "en_001");
+ t.put("en_SZ", "en_001");
+ t.put("en_Shaw", "root");
+ t.put("en_TC", "en_001");
+ t.put("en_TK", "en_001");
+ t.put("en_TO", "en_001");
+ t.put("en_TT", "en_001");
+ t.put("en_TV", "en_001");
+ t.put("en_TZ", "en_001");
+ t.put("en_UG", "en_001");
+ t.put("en_VC", "en_001");
+ t.put("en_VG", "en_001");
+ t.put("en_VU", "en_001");
+ t.put("en_WS", "en_001");
+ t.put("en_ZA", "en_001");
+ t.put("en_ZM", "en_001");
+ t.put("en_ZW", "en_001");
+ t.put("es_AR", "es_419");
+ t.put("es_BO", "es_419");
+ t.put("es_BR", "es_419");
+ t.put("es_BZ", "es_419");
+ t.put("es_CL", "es_419");
+ t.put("es_CO", "es_419");
+ t.put("es_CR", "es_419");
+ t.put("es_CU", "es_419");
+ t.put("es_DO", "es_419");
+ t.put("es_EC", "es_419");
+ t.put("es_GT", "es_419");
+ t.put("es_HN", "es_419");
+ t.put("es_MX", "es_419");
+ t.put("es_NI", "es_419");
+ t.put("es_PA", "es_419");
+ t.put("es_PE", "es_419");
+ t.put("es_PR", "es_419");
+ t.put("es_PY", "es_419");
+ t.put("es_SV", "es_419");
+ t.put("es_US", "es_419");
+ t.put("es_UY", "es_419");
+ t.put("es_VE", "es_419");
+ t.put("ff_Adlm", "root");
+ t.put("ff_Arab", "root");
+ t.put("ha_Arab", "root");
+ t.put("hi_Latn", "en_IN");
+ t.put("ht", "fr_HT");
+ t.put("iu_Latn", "root");
+ t.put("kk_Arab", "root");
+ t.put("ks_Deva", "root");
+ t.put("ku_Arab", "root");
+ t.put("ky_Arab", "root");
+ t.put("ky_Latn", "root");
+ t.put("ml_Arab", "root");
+ t.put("mn_Mong", "root");
+ t.put("mni_Mtei", "root");
+ t.put("ms_Arab", "root");
+ t.put("nb", "no");
+ t.put("nn", "no");
+ t.put("pa_Arab", "root");
+ t.put("pt_AO", "pt_PT");
+ t.put("pt_CH", "pt_PT");
+ t.put("pt_CV", "pt_PT");
+ t.put("pt_FR", "pt_PT");
+ t.put("pt_GQ", "pt_PT");
+ t.put("pt_GW", "pt_PT");
+ t.put("pt_LU", "pt_PT");
+ t.put("pt_MO", "pt_PT");
+ t.put("pt_MZ", "pt_PT");
+ t.put("pt_ST", "pt_PT");
+ t.put("pt_TL", "pt_PT");
+ t.put("sat_Deva", "root");
+ t.put("sd_Deva", "root");
+ t.put("sd_Khoj", "root");
+ t.put("sd_Sind", "root");
+ t.put("shi_Latn", "root");
+ t.put("so_Arab", "root");
+ t.put("sr_Latn", "root");
+ t.put("sw_Arab", "root");
+ t.put("tg_Arab", "root");
+ t.put("ug_Cyrl", "root");
+ t.put("uz_Arab", "root");
+ t.put("uz_Cyrl", "root");
+ t.put("vai_Latn", "root");
+ t.put("wo_Arab", "root");
+ t.put("yo_Arab", "root");
+ t.put("yue_Hans", "root");
+ t.put("zh_Hant", "root");
+ t.put("zh_Hant_MO", "zh_Hant_HK");
+ return Collections.unmodifiableMap(t);
+ }
+}
public void TestNameList() {
String[][][] tests = {
/* name in French, name in self, minimized, modified */
- {{"fr-Cyrl-BE", "fr-Cyrl-CA"},
- {"Français (cyrillique, Belgique)", "Français (cyrillique, Belgique)", "fr_Cyrl_BE", "fr_Cyrl_BE"},
- {"Français (cyrillique, Canada)", "Français (cyrillique, Canada)", "fr_Cyrl_CA", "fr_Cyrl_CA"},
- },
+// {{"fr-Cyrl-BE", "fr-Cyrl-CA"},
+// {"Français (cyrillique, Belgique)", "Français (cyrillique, Belgique)", "fr_Cyrl_BE", "fr_Cyrl_BE"},
+// {"Français (cyrillique, Canada)", "Français (cyrillique, Canada)", "fr_Cyrl_CA", "fr_Cyrl_CA"},
+// },
{{"en", "de", "fr", "zh"},
{"Allemand", "Deutsch", "de", "de"},
{"Anglais", "English", "en", "en"},
{"Français", "Français", "fr", "fr"},
},
// some non-canonical names
- {{"iw", "iw-US", "no", "no-Cyrl", "in", "in-YU"},
- {"Hébreu (États-Unis)", "עברית (ארצות הברית)", "iw_US", "iw_US"},
- {"Hébreu (Israël)", "עברית (ישראל)", "iw", "iw_IL"},
- {"Indonésien (Indonésie)", "Indonesia (Indonesia)", "in", "in_ID"},
- {"Indonésien (Serbie)", "Indonesia (Serbia)", "in_YU", "in_YU"},
- {"Norvégien (cyrillique)", "Norsk (kyrillisk)", "no_Cyrl", "no_Cyrl"},
- {"Norvégien (latin)", "Norsk (latinsk)", "no", "no_Latn"},
- },
+// {{"iw", "iw-US", "no", "no-Cyrl", "in", "in-YU"},
+// {"Hébreu (États-Unis)", "עברית (ארצות הברית)", "iw_US", "iw_US"},
+// {"Hébreu (Israël)", "עברית (ישראל)", "iw", "iw_IL"},
+// {"Indonésien (Indonésie)", "Indonesia (Indonesia)", "in", "in_ID"},
+// {"Indonésien (Serbie)", "Indonesia (Serbia)", "in_YU", "in_YU"},
+// {"Norvégien (cyrillique)", "Norsk (kyrillisk)", "no_Cyrl", "no_Cyrl"},
+// {"Norvégien (latin)", "Norsk (latinsk)", "no", "no_Latn"},
+// },
{{"zh-Hant-TW", "en", "en-gb", "fr", "zh-Hant", "de", "de-CH", "zh-TW"},
{"Allemand (Allemagne)", "Deutsch (Deutschland)", "de", "de_DE"},
{"Allemand (Suisse)", "Deutsch (Schweiz)", "de_CH", "de_CH"},
{"Serbe (cyrillique)", "Српски (ћирилица)", "sr", "sr_Cyrl"},
{"Serbe (latin)", "Srpski (latinica)", "sr_Latn", "sr_Latn"},
},
- {{"fr-Cyrl", "fr-Arab"},
- {"Français (arabe)", "Français (arabe)", "fr_Arab", "fr_Arab"},
- {"Français (cyrillique)", "Français (cyrillique)", "fr_Cyrl", "fr_Cyrl"},
- },
- {{"fr-Cyrl-BE", "fr-Arab-CA"},
- {"Français (arabe, Canada)", "Français (arabe, Canada)", "fr_Arab_CA", "fr_Arab_CA"},
- {"Français (cyrillique, Belgique)", "Français (cyrillique, Belgique)", "fr_Cyrl_BE", "fr_Cyrl_BE"},
- }
+// {{"fr-Cyrl", "fr-Arab"},
+// {"Français (arabe)", "Français (arabe)", "fr_Arab", "fr_Arab"},
+// {"Français (cyrillique)", "Français (cyrillique)", "fr_Cyrl", "fr_Cyrl"},
+// },
+// {{"fr-Cyrl-BE", "fr-Arab-CA"},
+// {"Français (arabe, Canada)", "Français (arabe, Canada)", "fr_Arab_CA", "fr_Arab_CA"},
+// {"Français (cyrillique, Belgique)", "Français (cyrillique, Belgique)", "fr_Cyrl_BE", "fr_Cyrl_BE"},
+// }
};
ULocale french = ULocale.FRENCH;
LocaleDisplayNames names = LocaleDisplayNames.getInstance(french,
} catch (NoSuchElementException ex) {
}
}
+
+ @Test
+ public void TestAlgorithmicParentFallback() {
+ // Test for ICU-21125 and ICU-21126 -- cases where resource fallback isn't determined by lopping fields off
+ // the end of the locale ID (or following a %%Parent directive in a resource bundle)
+ // first column is input locale, second column is expected output locale
+ String[][] testCases = {
+ { "de_Latn_LI", "de_LI", "de_LI" },
+// { "en_VA", "en_150", "en" }, // TODO: put this back in after https://unicode-org.atlassian.net/browse/CLDR-15893 is fixed
+ { "yi_Latn_DE", "", "yi" }, // "" is just "root"-- or, actually, the system default locale
+ { "yi_Hebr_DE", "yi", "yi" },
+ { "zh_Hant_SG", "zh_Hant", "zh_Hant" },
+ // would be nice to test that sr_Latn_ME falls back to sr_Latn, or sr_ME to sr_Latn_ME,
+ // or sr_Latn to root, but all of these resource bundle files actually exist in the project
+ };
+
+ for (String[] testCase : testCases) {
+ String localeID = testCase[0];
+ String localeDefaultRootExpected = testCase[1];
+ String localeRootExpected = testCase[2];
+
+ ULocale locale = new ULocale(localeID);
+ ICUResourceBundle localeDefaultRootRB = ICUResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME, locale, ICUResourceBundle.OpenType.LOCALE_DEFAULT_ROOT);
+ ICUResourceBundle localeRootRB = ICUResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME, locale, ICUResourceBundle.OpenType.LOCALE_ROOT);
+ String localeDefaultRootActual = localeDefaultRootRB.getULocale().toString();
+ String localeRootActual = localeRootRB.getULocale().toString();
+
+ if (localeDefaultRootExpected.isEmpty()) {
+ assertEquals("Got wrong locale with LOCALE_DEFAULT_ROOT", ULocale.getDefault().toString(), localeDefaultRootActual);
+ } else {
+ assertEquals("Got wrong locale with LOCALE_DEFAULT_ROOT", localeDefaultRootExpected, localeDefaultRootActual);
+ }
+ assertEquals("Got wrong locale with LOCALE_ROOT", localeRootExpected, localeRootActual);
+ }
+ }
}
$ ./install-cldr-jars.sh "$CLDR_DIR"
-Generating all ICU data
------------------------
+Generating all ICU data and source code
+---------------------------------------
$ cd "$TOOLS_ROOT/cldr/cldr-to-icu"
$ ant -f build-icu-data.xml
* Outputting a subset of the supplemental data into a specified directory:
- $ ant -f build-icu-data.xml -DoutDir=/tmp/cldr -DoutputTypes=plurals,dayPeriods
+ $ ant -f build-icu-data.xml -DoutDir=/tmp/cldr -DoutputTypes=plurals,dayPeriods -DdontGenCode=true
Note: Output types can be listed with mixedCase, lower_underscore or UPPER_UNDERSCORE.
Pass '-DoutputTypes=help' to see the full list.
* Outputting only a subset of locale IDs (and all the supplemental data):
- $ ant -f build-icu-data.xml -DoutDir=/tmp/cldr -DlocaleIdFilter='(zh|yue).*'
+ $ ant -f build-icu-data.xml -DoutDir=/tmp/cldr -DlocaleIdFilter='(zh|yue).*' -DdontGenCode=true
* Overriding the default CLDR version string (which normally matches the CLDR library code):
<!-- TODO: Add things like copying of a template directory and deleting previous files
(perhaps always generate into a temporary directory and copy back to avoid having
inconsistent state when the conversion is cancelled). -->
-<project name="Convert" default="all" basedir=".">
+<project name="Convert" default="all" basedir="." xmlns:if="ant:if" xmlns:unless="ant:unless">
<target name="all" depends="init-args, prepare-jar, clean, convert"/>
so it is recommended that for testing, it be set to another value. -->
<property name="outDir" value="${basedir}/../../../icu4c/source/data/"/>
+ <!-- The output directory into which to write generated C/C++ code. By default
+ this will overwrite (without deletion) the generated C/C++ files in this
+ ICU release, so it is recommended that for testing, it be set to another value. -->
+ <property name="genCCodeDir" value="${basedir}/../../../icu4c/source/"/>
+
+ <!-- The output directory into which to write generated Java code. By default
+ this will overwrite (without deletion) the generated Java files in this
+ ICU release, so it is recommended that for testing, it be set to another value. -->
+ <property name="genJavaCodeDir" value="${basedir}/../../../icu4j/main/classes/core"/>
+
+ <!-- Set this to true to prevent build-icu-data.xml from generating the generated
+ ICU source files -->
+ <property name="dontGenCode" value="false" />
+
<!-- The directory in which the additional ICU XML data is stored. -->
<property name="specialsDir" value="${basedir}/../../../icu4c/source/data/xml"/>
<arg line="-f build-icu-data.xml convert-impl -DcldrDir=${cldrDataDir}"/>
<!-- List all properties in the "convert-impl" task (except cldrDir). -->
<arg value="-DoutDir=${outDir}"/>
+ <arg value="-DgenCCodeDir=${genCCodeDir}"/>
+ <arg value="-DgenJavaCodeDir=${genJavaCodeDir}"/>
+ <arg value="-DdontGenCode=${dontGenCode}"/>
<arg value="-DspecialsDir=${specialsDir}"/>
<arg value="-DoutputTypes=${outputTypes}"/>
<arg value="-DicuVersion=${icuVersion}"/>
<pathelement path="target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar"/>
</classpath>
</taskdef>
+ <taskdef name="generateCode" classname="org.unicode.icu.tool.cldrtoicu.ant.GenerateCodeTask">
+ <classpath>
+ <pathelement path="target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar"/>
+ </classpath>
+ </taskdef>
<convert cldrDir="${cldrDir}" outputDir="${outDir}" specialsDir="${specialsDir}"
outputTypes="${outputTypes}" cldrVersion="${cldrVersion}"
icuVersion="${icuVersion}" icuDataVersion="${icuDataVersion}"
source="//path/to/value[@attr='bar']"
locales="xx,yy_ZZ"/> -->
</convert>
+
+ <generateCode cldrDir="${cldrDir}" cOutDir="${genCCodeDir}" javaOutDir="${genJavaCodeDir}" unless:true="${dontGenCode}" />
</target>
<target name="clean" depends="init-args, prepare-jar">
<pathelement path="target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar"/>
</classpath>
</taskdef>
+ <taskdef name="generateCode" classname="org.unicode.icu.tool.cldrtoicu.ant.GenerateCodeTask">
+ <classpath>
+ <pathelement path="target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar"/>
+ </classpath>
+ </taskdef>
<!-- If a directory is listed here, then every file in it is assumed to be automatically
generated by the conversion tool, unless it is explicitly listed in a <retain> element.
<retain path="tzdbNames.txt"/>
</dir>
</outputDirectories>
+
+ <generateCode cOutDir="${genCCodeDir}" javaOutDir="${genJavaCodeDir}" action="clean" />
</target>
</project>
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package main.java.org.unicode.icu.tool.cldrtoicu;
+
+import java.io.PrintWriter;
+import java.nio.file.Path;
+
+public interface CodeGenerator {
+ public void generateCode(Path cldrPath, PrintWriter cFileOut, PrintWriter javaFileOut);
+}
--- /dev/null
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+package org.unicode.icu.tool.cldrtoicu.ant;
+
+import main.java.org.unicode.icu.tool.cldrtoicu.CodeGenerator;
+import main.java.org.unicode.icu.tool.cldrtoicu.generator.ResourceFallbackCodeGenerator;
+import org.apache.tools.ant.BuildException;
+import org.apache.tools.ant.Task;
+
+import java.io.*;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+// Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed.
+public final class GenerateCodeTask extends Task {
+ private Path cldrPath;
+ private Path cOutDir;
+ private Path javaOutDir;
+ private String action;
+
+ private class GeneratedFileDef {
+ String cRelativePath;
+ String javaRelativePath;
+ CodeGenerator generator;
+
+ public GeneratedFileDef(String cRelativePath, String javaRelativePath, CodeGenerator generator) {
+ this.cRelativePath = cRelativePath;
+ this.javaRelativePath = javaRelativePath;
+ this.generator = generator;
+ }
+ }
+
+ private GeneratedFileDef[] generatedFileDefs = {
+ new GeneratedFileDef("common/localefallback_data.h", "src/com/ibm/icu/impl/LocaleFallbackData.java", new ResourceFallbackCodeGenerator()),
+ };
+
+ @SuppressWarnings("unused")
+ public void setCldrDir(String path) {
+ // Use String here since on some systems Ant doesn't support automatically converting Path instances.
+ this.cldrPath = checkNotNull(Paths.get(path));
+ }
+
+ @SuppressWarnings("unused")
+ public void setCOutDir(String path) {
+ // Use String here since on some systems Ant doesn't support automatically converting Path instances.
+ this.cOutDir = Paths.get(path);
+ }
+
+ @SuppressWarnings("unused")
+ public void setJavaOutDir(String path) {
+ // Use String here since on some systems Ant doesn't support automatically converting Path instances.
+ this.javaOutDir = Paths.get(path);
+ }
+
+ @SuppressWarnings("unused")
+ public void setAction(String action) {
+ // Use String here since on some systems Ant doesn't support automatically converting Path instances.
+ this.action = action;
+ }
+
+ @SuppressWarnings("unused")
+ public void execute() throws BuildException {
+ for (GeneratedFileDef task : generatedFileDefs) {
+ Path cOutPath = cOutDir.resolve(task.cRelativePath);
+ Path javaOutPath = javaOutDir.resolve(task.javaRelativePath);
+
+ try {
+ if (this.action != null && this.action.equals("clean")) {
+ log("Deleting " + cOutPath + " and " + javaOutPath + "...");
+ Files.deleteIfExists(cOutPath);
+ Files.deleteIfExists(javaOutPath);
+ } else {
+ Files.createDirectories(cOutPath.getParent());
+ Files.createDirectories(javaOutPath.getParent());
+
+ try (PrintWriter cOut = new PrintWriter(new OutputStreamWriter(new FileOutputStream(cOutPath.toFile())));
+ PrintWriter javaOut = new PrintWriter(new OutputStreamWriter(new FileOutputStream(javaOutPath.toFile())))) {
+
+ log("Generating " + cOutPath + " and " + javaOutPath + "...");
+ task.generator.generateCode(cldrPath, cOut, javaOut);
+ }
+ }
+ } catch (IOException ioException) {
+ throw new BuildException("IOException: " + ioException.toString());
+ }
+ }
+ }
+
+}
--- /dev/null
+package main.java.org.unicode.icu.tool.cldrtoicu.generator;
+
+import com.google.common.base.Splitter;
+import main.java.org.unicode.icu.tool.cldrtoicu.CodeGenerator;
+import org.unicode.cldr.api.*;
+
+import java.io.PrintWriter;
+import java.nio.file.Path;
+import java.util.*;
+
+import static com.google.common.base.CharMatcher.whitespace;
+
+public class ResourceFallbackCodeGenerator implements CodeGenerator {
+ private Map<String, String> defaultScripts;
+ private Map<String, String> parentLocales;
+ private Splitter localeIDSplitter;
+ private Splitter childLocaleSplitter;
+
+ @Override
+ public void generateCode(Path cldrPath, PrintWriter cFileOut, PrintWriter javaFileOut) {
+ defaultScripts = new TreeMap<String, String>();
+ parentLocales = new TreeMap<String, String>();
+ localeIDSplitter = Splitter.on('_');
+ childLocaleSplitter = Splitter.on(whitespace()).omitEmptyStrings();
+
+ CldrDataSupplier supplier = CldrDataSupplier.forCldrFilesIn(cldrPath);
+ CldrData supplementalData = supplier.getDataForType(CldrDataType.SUPPLEMENTAL);
+ supplementalData.accept(CldrData.PathOrder.NESTED_GROUPING, new CldrData.PrefixVisitor() {
+ @Override
+ public void visitPrefixStart(CldrPath prefix, Context context) {
+ if (prefix.getName().endsWith("likelySubtags")) {
+ context.install(cldrValue -> handleLikelySubtag(cldrValue));
+ } else if (prefix.getName().endsWith("parentLocales")) {
+ context.install(cldrValue -> handleParentLocale(cldrValue));
+ }
+ }
+ });
+
+ generateCFile(cFileOut);
+ generateJavaFile(javaFileOut);
+ }
+
+ private void handleLikelySubtag(CldrValue value) {
+ String from = value.get(AttributeKey.keyOf("likelySubtag", "from"));
+ String to = value.get(AttributeKey.keyOf("likelySubtag", "to"));
+
+ String[] fromPieces = localeIDSplitter.splitToList(from).toArray(new String[] {});
+ String[] toPieces = localeIDSplitter.splitToList(to).toArray(new String[] {});
+
+ if (toPieces.length != 3) {
+ throw new IllegalArgumentException("Didn't get 3 segments in 'to' value: from=" + from + ", to=" + to);
+ }
+ if (fromPieces[0].equals("und")) {
+ // ignore "und" entries-- they don't yield useful default-script information
+ return;
+ }
+ if (fromPieces.length >= 3) {
+ throw new IllegalArgumentException("'from' entry has a non-'und' language and also has a script code: from=" + from + ", to=" + to);
+ }
+ if (fromPieces.length == 2 && fromPieces[1].length() > 3) {
+ // the locale ID consists of just a language and a script-- the script code is redundant and doesn't
+ // supply any default-script info
+ return;
+ }
+
+ String defaultScript = toPieces[1]; // toPieces is always three elements, so the second one is always the script
+ if (!defaultScript.equals("Latn")) {
+ // to save room, don't include all the entries where the default script is Latn
+ defaultScripts.put(from, defaultScript);
+ }
+ }
+
+ private void handleParentLocale(CldrValue value) {
+ String parent = value.get(AttributeKey.keyOf("parentLocale", "parent"));
+ String childrenStr = value.get(AttributeKey.keyOf("parentLocale", "locales"));
+
+ for (String child : childLocaleSplitter.split(childrenStr)) {
+ parentLocales.put(child, parent);
+ }
+ }
+
+ private void generateCFile(PrintWriter out) {
+ out.println("// © 2022 and later: Unicode, Inc. and others.");
+ out.println("// License & terms of use: http://www.unicode.org/copyright.html");
+ out.println("//");
+ out.println("// Internal static data tables used by uresbund.cpp");
+ out.println("// WARNING: This file is mechanically generated by the CLDR-to-ICU tool");
+ out.println("// (see tools/cldr/cldr-to-icu/src/main/java/org/unicode/tool/cldrtoicu/generator/ResourcFallbackCodeGenerator.java).");
+ out.println("// DO NOT HAND EDIT!!!");
+ out.println();
+ out.println("#ifdef INCLUDED_FROM_URESBUND_CPP");
+ out.println();
+
+ out.println("//======================================================================");
+ out.println("// Default script table");
+ Map<String, Integer> scriptIndex = buildCompositeString(defaultScripts.values(), "scriptCodeChars", out);
+ Map<String, Integer> localeIDIndex = buildCompositeString(defaultScripts.keySet(), "dsLocaleIDChars", out);
+ writeStringToStringIndex(defaultScripts, localeIDIndex, scriptIndex, "defaultScriptTable", out);
+
+ out.println("//======================================================================");
+ out.println("// Parent locale table");
+ TreeSet<String> combinedLocaleIDs = new TreeSet<>();
+ combinedLocaleIDs.addAll(parentLocales.keySet());
+ combinedLocaleIDs.addAll(parentLocales.values());
+ localeIDIndex = buildCompositeString(combinedLocaleIDs, "parentLocaleChars", out);
+ writeStringToStringIndex(parentLocales, localeIDIndex, localeIDIndex, "parentLocaleTable", out);
+
+ out.println();
+ out.println("#endif // INCLUDED_FROM_URESBUND_CPP");
+ }
+
+ private Map<String, Integer> buildCompositeString(Collection<String> strings, String variableName, PrintWriter out) {
+ Map<String, Integer> stringIndex = new TreeMap<>();
+ for (String string : strings) {
+ stringIndex.putIfAbsent(string, 0);
+ }
+ out.println("const char " + variableName + "[] =");
+ out.print(" \"");
+ int nextStringOffset = 0;
+ int charsOnLine = 0;
+ for (String string : stringIndex.keySet()) {
+ out.print(string);
+ out.print("\\0");
+ stringIndex.put(string, nextStringOffset);
+ nextStringOffset += string.length() + 1;
+ charsOnLine += string.length() + 2;
+
+ if (charsOnLine > 60) {
+ out.println("\"");
+ out.print(" \"");
+ charsOnLine = 0;
+ }
+ }
+ out.println("\";");
+ out.println();
+ return stringIndex;
+ }
+
+ private void writeStringToStringIndex(Map<String, String> index, Map<String, Integer> keyIndex, Map<String, Integer> valueIndex, String variableName, PrintWriter out) {
+ out.println("const int32_t " + variableName + "[] = {");
+ for (Map.Entry<String, String> entry : index.entrySet()) {
+ String key = entry.getKey();
+ String value = entry.getValue();
+ out.println(" " + keyIndex.get(key) + ", " + valueIndex.get(value) + ", // " + key + " -> " + value);
+ }
+ out.println("};");
+ out.println();
+ }
+
+ private void generateJavaFile(PrintWriter out) {
+ out.println("// © 2022 and later: Unicode, Inc. and others.");
+ out.println("// License & terms of use: http://www.unicode.org/copyright.html");
+ out.println("//");
+ out.println("// Internal static data tables used by ICUResourceBundle.java");
+ out.println("// WARNING: This file is mechanically generated by the CLDR-to-ICU tool");
+ out.println("// (see tools/cldr/cldr-to-icu/src/main/java/org/unicode/tool/cldrtoicu/generator/ResourcFallbackCodeGenerator.java).");
+ out.println("// DO NOT HAND EDIT!!!");
+ out.println();
+
+ out.println("package com.ibm.icu.impl;");
+ out.println();
+ out.println("import java.util.Collections;");
+ out.println("import java.util.HashMap;");
+ out.println("import java.util.Map;");
+ out.println();
+ out.println("class LocaleFallbackData {");
+
+ out.println(" //======================================================================");
+ out.println(" // Default script table");
+ out.println(" public static final Map<String, String> DEFAULT_SCRIPT_TABLE = buildDefaultScriptTable();");
+ out.println();
+ out.println(" private static Map<String, String> buildDefaultScriptTable() {");
+ out.println(" Map<String, String> t = new HashMap<>();");
+ for (Map.Entry<String, String> entry : defaultScripts.entrySet()) {
+ out.println(" t.put(\"" + entry.getKey() + "\", \"" + entry.getValue() + "\");");
+ }
+ out.println(" return Collections.unmodifiableMap(t);");
+ out.println(" }");
+ out.println();
+
+ out.println(" //======================================================================");
+ out.println(" // Parent locale table");
+ out.println(" public static final Map<String, String> PARENT_LOCALE_TABLE = buildParentLocaleTable();");
+ out.println();
+ out.println(" private static Map<String, String> buildParentLocaleTable() {");
+ out.println(" Map<String, String> t = new HashMap<>();");
+ for (Map.Entry<String, String> entry : parentLocales.entrySet()) {
+ out.println(" t.put(\"" + entry.getKey() + "\", \"" + entry.getValue() + "\");");
+ }
+ out.println(" return Collections.unmodifiableMap(t);");
+ out.println(" }");
+ out.println("}");
+ }
+}