]> granicus.if.org Git - icu/commitdiff
ICU-20627 Adding alias locales to new structure in res_index.
authorShane Carr <shane@unicode.org>
Wed, 5 Jun 2019 21:03:55 +0000 (21:03 +0000)
committerShane F. Carr <shane@unicode.org>
Thu, 6 Jun 2019 22:58:06 +0000 (15:58 -0700)
icu4c/source/data/BUILDRULES.py
icu4c/source/python/icutools/databuilder/__main__.py
icu4c/source/python/icutools/databuilder/request_types.py

index 686e0feebdc3924d016fa8050a1bb7f1405f0b38..56eb0e48fea1f15a410b5ba63587e82cfe0cbe7b 100644 (file)
@@ -12,7 +12,6 @@ from icutools.databuilder.request_types import *
 
 import os
 import sys
-import xml.etree.ElementTree as ET
 
 
 def generate(config, glob, common_vars):
@@ -517,26 +516,28 @@ def generate_tree(
     ]
 
     # Generate res_index file
-    synthetic_locales = set()
-    if not config.ignore_xml_deprecates:
-        deprecates_xml_path = os.path.join(
-            os.path.dirname(__file__), xml_filename)
-        deprecates_xml = ET.parse(deprecates_xml_path)
-        for child in deprecates_xml.getroot():
-            if child.tag == "alias":
-                synthetic_locales.add(child.attrib["from"])
-            elif child.tag == "emptyLocale":
-                synthetic_locales.add(child.attrib["locale"])
-            else:
-                raise ValueError("Unknown tag in deprecates XML: %s" % child.tag)
-    index_input_files = []
+    # Exclude the deprecated locale variants and root; see ICU-20628. This
+    # could be data-driven, but we do not want to perform I/O in this script
+    # (for example, we do not want to read from an XML file).
+    excluded_locales = set([
+        "ja_JP_TRADITIONAL",
+        "th_TH_TRADITIONAL",
+        "de_",
+        "de__PHONEBOOK",
+        "es_",
+        "es__TRADITIONAL",
+        "root",
+    ])
+    # Put alias locales in a separate structure; see ICU-20627
+    alias_locales = set(locale_dependencies.data["aliases"].keys())
+    alias_files = []
+    installed_files = []
     for f in input_files:
-        file_stem = f.filename[f.filename.rfind("/")+1:-4]
-        if file_stem == "root":
+        file_stem = IndexRequest.locale_file_stem(f)
+        if file_stem in excluded_locales:
             continue
-        if file_stem in synthetic_locales:
-            continue
-        index_input_files.append(f)
+        destination = alias_files if file_stem in alias_locales else installed_files
+        destination.append(f)
     cldr_version = locale_dependencies.data["cldrVersion"] if sub_dir == "locales" else None
     index_file_txt = TmpFile("{IN_SUB_DIR}/{INDEX_NAME}.txt".format(
         IN_SUB_DIR = sub_dir,
@@ -551,7 +552,8 @@ def generate_tree(
         IndexRequest(
             name = index_file_target_name,
             category = category,
-            input_files = index_input_files,
+            installed_files = installed_files,
+            alias_files = alias_files,
             txt_file = index_file_txt,
             output_file = index_res_file,
             cldr_version = cldr_version,
index 0b043f0f101a4a0043c36f17a863df24e1a7f0ac..95d70a0ac078517a52e5963ac2c943fd22172b5f 100644 (file)
@@ -83,12 +83,6 @@ flag_parser.add_argument(
     default = False,
     action = "store_true"
 )
-flag_parser.add_argument(
-    "--ignore_xml_deprecates",
-    help = "Whether to ignore XML deprecates files for building res_index.",
-    default = False,
-    action = "store_true"
-)
 flag_parser.add_argument(
     "--seqmode",
     help = "Whether to optimize rules to be run sequentially (fewer threads) or in parallel (many threads). Defaults to 'sequential', which is better for unix-exec and windows-exec modes. 'parallel' is often better for massively parallel build systems.",
@@ -134,9 +128,6 @@ class Config(object):
         # Boolean: Whether to include core Unicode data files in the .dat file
         self.include_uni_core_data = args.include_uni_core_data
 
-        # Boolean: Whether to ignore the XML files
-        self.ignore_xml_deprecates = args.ignore_xml_deprecates
-
         # Default fields before processing filter file
         self.filters_json_data = {}
 
index 72b3991fbae2093e2946194d19e3e65aa84a39c9..14c2edb87c8c3e444302019edb4b17eff231aa56 100644 (file)
@@ -100,8 +100,8 @@ class AbstractExecutionRequest(AbstractRequest):
         while i < len(self.input_files):
             if filter.match(self.input_files[i]):
                 i += 1
-                continue
-            self._del_at(i)
+            else:
+                self._del_at(i)
         return i > 0
 
     def _del_at(self, i):
@@ -286,7 +286,8 @@ class ListRequest(AbstractRequest):
 
 class IndexRequest(AbstractRequest):
     def __init__(self, **kwargs):
-        self.input_files = []
+        self.installed_files = []
+        self.alias_files = []
         self.txt_file = None
         self.output_file = None
         self.cldr_version = ""
@@ -296,12 +297,18 @@ class IndexRequest(AbstractRequest):
 
     def apply_file_filter(self, filter):
         i = 0
-        while i < len(self.input_files):
-            if filter.match(self.input_files[i]):
+        while i < len(self.installed_files):
+            if filter.match(self.installed_files[i]):
                 i += 1
-                continue
-            del self.input_files[i]
-        return i > 0
+            else:
+                del self.installed_files[i]
+        j = 0
+        while j < len(self.alias_files):
+            if filter.match(self.alias_files[i]):
+                j += 1
+            else:
+                del self.alias_files[j]
+        return i + j > 0
 
     def flatten(self, config, all_requests, common_vars):
         return (
@@ -322,24 +329,34 @@ class IndexRequest(AbstractRequest):
         )
 
     def _generate_index_file(self, common_vars):
-        locales = [f.filename[f.filename.rfind("/")+1:-4] for f in self.input_files]
+        installed_locales = [IndexRequest.locale_file_stem(f) for f in self.installed_files]
+        alias_locales = [IndexRequest.locale_file_stem(f) for f in self.alias_files]
         formatted_version = "    CLDRVersion { \"%s\" }\n" % self.cldr_version if self.cldr_version else ""
-        formatted_locales = "\n".join(["        %s {\"\"}" % v for v in locales])
+        formatted_installed_locales = "\n".join(["        %s {\"\"}" % v for v in installed_locales])
+        formatted_alias_locales = "\n".join(["        %s {\"\"}" % v for v in alias_locales])
         # TODO: CLDRVersion is required only in the base file
         return ("// Warning this file is automatically generated\n"
                 "{INDEX_NAME}:table(nofallback) {{\n"
                 "{FORMATTED_VERSION}"
                 "    InstalledLocales {{\n"
-                "{FORMATTED_LOCALES}\n"
+                "{FORMATTED_INSTALLED_LOCALES}\n"
+                "    }}\n"
+                "    AliasLocales {{\n"
+                "{FORMATTED_ALIAS_LOCALES}\n"
                 "    }}\n"
                 "}}").format(
                     FORMATTED_VERSION = formatted_version,
-                    FORMATTED_LOCALES = formatted_locales,
+                    FORMATTED_INSTALLED_LOCALES = formatted_installed_locales,
+                    FORMATTED_ALIAS_LOCALES = formatted_alias_locales,
                     **common_vars
                 )
 
     def all_input_files(self):
-        return self.input_files
+        return self.installed_files + self.alias_files
 
     def all_output_files(self):
         return [self.output_file]
+
+    @staticmethod
+    def locale_file_stem(f):
+        return f.filename[f.filename.rfind("/")+1:-4]