]> granicus.if.org Git - icu/commitdiff
ICU-20298 Adding improved locale filtering to buildtool.
authorShane Carr <shane@unicode.org>
Thu, 24 Jan 2019 04:49:33 +0000 (20:49 -0800)
committerShane F. Carr <shane@unicode.org>
Mon, 28 Jan 2019 21:43:17 +0000 (13:43 -0800)
- Integrates changes from cldrbug 11802 to ICU.
- Adds test suite for buildtool.
- Adds new filter type "union".

17 files changed:
icu4c/source/configure
icu4c/source/configure.ac
icu4c/source/data/Makefile.in
icu4c/source/data/build.xml
icu4c/source/data/buildtool/__main__.py
icu4c/source/data/buildtool/filtration.py
icu4c/source/data/buildtool/filtration_schema.json
icu4c/source/data/buildtool/locale_dependencies.py [new file with mode: 0644]
icu4c/source/data/buildtool/test/__init__.py [new file with mode: 0644]
icu4c/source/data/buildtool/test/__main__.py [new file with mode: 0644]
icu4c/source/data/buildtool/test/filtration_test.py [new file with mode: 0644]
icu4c/source/data/curr/resfiles.mk
icu4c/source/data/lang/resfiles.mk
icu4c/source/data/locales/resfiles.mk
icu4c/source/data/region/resfiles.mk
icu4c/source/data/unit/resfiles.mk
icu4c/source/data/zone/resfiles.mk

index 29bcec408d5d8a37c76e8a73280f185429a17cd6..0853e07149ac0449b4a14f34199a9797925c39db 100755 (executable)
@@ -4244,6 +4244,7 @@ fi
 done
 
 
+
 # Check for the platform make
 for ac_prog in gmake gnumake
 do
index b2f8446a054f74f42ce4ae2d2202b9d9a35c1edc..170e86909f95df2f4f97c08244215cfc7da9f444 100644 (file)
@@ -197,6 +197,7 @@ fi
 
 # TODO(ICU-20301): Remove fallback to Python 2.
 AC_CHECK_PROGS(PYTHON, python3 "py -3" python "py")
+AC_SUBST(PYTHON)
 
 # Check for the platform make
 AC_PATH_PROGS(U_MAKE, gmake gnumake, make)
index d96351164f93c7b54a5540787eeca6b2590de091..0ec265d6c7e57407e3a70d1a1983fd789a93bab1 100644 (file)
@@ -135,7 +135,7 @@ install: all-local install-local
 clean: clean-local
 distclean : distclean-local
 dist: 
-check: all
+check: all check-local
 
 check-exhaustive: check
 
@@ -163,6 +163,7 @@ cleanpackage:
        $(RMV) $(LIBDIR)/*$(LIB_ICUDATA_NAME)*.$(SO)* $(LIBDIR)/$(LIB_STATIC_ICUDATA_NAME).$(A)
 
 check-local:
+       @PYTHON@ -m buildtool.test
 
 # Find out if we have a source archive.
 # If we have that, then use that instead of building everything from scratch.
index 20f7b5be2de43562bf8a60ae731a80d57e1fe936..5fd1a993a12236e3d26b89d680631791049aefad 100644 (file)
                     <arg name="--supplementaldir" value="${env.CLDR_DIR}/common/supplemental" />
                     <arg name="--type" value="locales"/>
                     <arg name="--makefile" value="resfiles.mk"/>
+                    <arg name="--depgraphfile" value="../buildtool/locale_dependencies.py"/>
                 </args>
                <remapper>
                  <remap sourcePath="/Keys" targetDir="lang" />
             <fileset id="resfiles" dir="${env.ICU4C_DIR}/source/data/locales">
                 <include name="resfiles.mk" />
             </fileset>
+            <fileset id="dependencies_py" dir="${env.ICU4C_DIR}/source/data/buildtool">
+                <include name="locale_dependencies.py" />
+            </fileset>
             <fileset id="locales_split" dir="${env.ICU4C_DIR}/source/data">
                <include name="curr/*.txt" /> 
                <include name="curr/resfiles.mk" /> 
index f30e6ff7593dfa278d8e1d4435ae23491216c60b..40c33a8f14803324f74833a3c3e1b5d30bb472f9 100644 (file)
@@ -159,6 +159,7 @@ class Config(object):
                     ),
                     file=sys.stderr)
         except ImportError:
+            print("Tip: to validate your filter file, install the Pip package 'jsonschema'", file=sys.stderr)
             pass
 
 
index 26530581fbf51389e4362f2467c981276f79b3e5..1e5563561577f552fb305b1763ee39322c246d6e 100644 (file)
@@ -12,6 +12,7 @@ import sys
 
 from . import *
 from . import utils
+from .locale_dependencies import data as DEPENDENCY_DATA
 from .request_types import *
 
 
@@ -34,6 +35,10 @@ class Filter(object):
             return RegexFilter(json_data)
         elif filter_type == "exclude":
             return ExclusionFilter()
+        elif filter_type == "union":
+            return UnionFilter(json_data)
+        elif filter_type == "locale":
+            return LocaleFilter(json_data)
         else:
             print("Error: Unknown filterType option: %s" % filter_type, file=sys.stderr)
             return None
@@ -45,6 +50,12 @@ class Filter(object):
             assert self.match(file)
         return [request]
 
+    @classmethod
+    def _file_to_file_stem(cls, file):
+        start = file.filename.rfind("/")
+        limit = file.filename.rfind(".")
+        return file.filename[start+1:limit]
+
     @abstractmethod
     def match(self, file):
         pass
@@ -65,7 +76,8 @@ class WhitelistBlacklistFilter(Filter):
         if "whitelist" in json_data:
             self.is_whitelist = True
             self.whitelist = json_data["whitelist"]
-        elif "blacklist" in json_data:
+        else:
+            assert "blacklist" in json_data, "Need either whitelist or blacklist: %s" % str(json_data)
             self.is_whitelist = False
             self.blacklist = json_data["blacklist"]
 
@@ -73,12 +85,6 @@ class WhitelistBlacklistFilter(Filter):
         file_stem = self._file_to_file_stem(file)
         return self._should_include(file_stem)
 
-    @classmethod
-    def _file_to_file_stem(cls, file):
-        start = file.filename.rfind("/")
-        limit = file.filename.rfind(".")
-        return file.filename[start+1:limit]
-
     @abstractmethod
     def _should_include(self, file_stem):
         pass
@@ -126,6 +132,92 @@ class RegexFilter(WhitelistBlacklistFilter):
             return True
 
 
+class UnionFilter(Filter):
+    def __init__(self, json_data):
+        # Collect the sub-filters.
+        self.sub_filters = []
+        for filter_json in json_data["unionOf"]:
+            self.sub_filters.append(Filter.create_from_json(filter_json))
+
+    def match(self, file):
+        """Match iff any of the sub-filters match."""
+        for filter in self.sub_filters:
+            if filter.match(file):
+                return True
+        return False
+
+
+LANGUAGE_SCRIPT_REGEX = re.compile(r"^([a-z]{2,3})_[A-Z][a-z]{3}$")
+LANGUAGE_ONLY_REGEX = re.compile(r"^[a-z]{2,3}$")
+
+class LocaleFilter(Filter):
+    def __init__(self, json_data):
+        self.locales_requested = set()
+        self.locales_required = set()
+        self.include_children = json_data.get("includeChildren", True)
+        self.include_scripts = json_data.get("includeScripts", False)
+
+        # Compute the requested and required locales.
+        for locale in json_data["whitelist"]:
+            self._add_locale_and_parents(locale)
+
+    def _add_locale_and_parents(self, locale):
+        # Store the locale as *requested*
+        self.locales_requested.add(locale)
+        # Store the locale and its dependencies as *required*
+        while locale is not None:
+            self.locales_required.add(locale)
+            locale = self._get_parent_locale(locale)
+
+    def match(self, file):
+        locale = self._file_to_file_stem(file)
+
+        # A locale is *required* if it is *requested* or an ancestor of a
+        # *requested* locale.
+        if locale in self.locales_required:
+            return True
+
+        # Resolve include_scripts and include_children.
+        return self._match_recursive(locale)
+
+    def _match_recursive(self, locale):
+        # Base case: return True if we reached a *requested* locale,
+        # or False if we ascend out of the locale tree.
+        if locale is None:
+            return False
+        if locale in self.locales_requested:
+            return True
+
+        # Check for alternative scripts.
+        # This causes sr_Latn to check sr instead of going directly to root.
+        if self.include_scripts:
+            match = LANGUAGE_SCRIPT_REGEX.match(locale)
+            if match and self._match_recursive(match.group(1)):
+                return True
+
+        # Check if we are a descendant of a *requested* locale.
+        if self.include_children:
+            parent = self._get_parent_locale(locale)
+            if self._match_recursive(parent):
+                return True
+
+        # No matches.
+        return False
+
+    @classmethod
+    def _get_parent_locale(cls, locale):
+        if locale in DEPENDENCY_DATA["parents"]:
+            return DEPENDENCY_DATA["parents"][locale]
+        if locale in DEPENDENCY_DATA["aliases"]:
+            return DEPENDENCY_DATA["aliases"][locale]
+        if LANGUAGE_ONLY_REGEX.match(locale):
+            return "root"
+        i = locale.rfind("_")
+        if i < 0:
+            return None
+        return locale[:i]
+
+
 def apply_filters(requests, config):
     """Runs the filters and returns a new list of requests."""
     requests = _apply_file_filters(requests, config)
index 619ae2afc4b9bc627f19f2c463bd9028db3e5e30..48e674e640a5f95f4814177344e1f6191fc4a388 100644 (file)
@@ -42,7 +42,9 @@
             "oneOf": [
                 {
                     "properties": {
-                        "filterType": { "$ref": "#/definitions/filterType" },
+                        "filterType": {
+                            "$ref": "#/definitions/blacklistWhitelistFilterTypes"
+                        },
                         "whitelist": { "$ref": "#/definitions/stringList" }
                     },
                     "required": ["whitelist"],
@@ -50,7 +52,9 @@
                 },
                 {
                     "properties": {
-                        "filterType": { "$ref": "#/definitions/filterType" },
+                        "filterType": {
+                            "$ref": "#/definitions/blacklistWhitelistFilterTypes"
+                        },
                         "blacklist": { "$ref": "#/definitions/stringList" }
                     },
                     "required": ["blacklist"],
                 },
                 {
                     "properties": {
-                        "filterType": { "$ref": "#/definitions/filterType" }
+                        "filterType": {
+                            "type": "string",
+                            "enum": ["exclude"]
+                        }
+                    },
+                    "required": ["filterType"],
+                    "additionalProperties": false
+                },
+                {
+                    "properties": {
+                        "filterType": {
+                            "type": "string",
+                            "enum": ["locale"]
+                        },
+                        "includeChildren": {
+                            "type": "boolean"
+                        },
+                        "includeScripts": {
+                            "type": "boolean"
+                        },
+                        "whitelist": { "$ref": "#/definitions/stringList" }
+                    },
+                    "required": ["filterType", "whitelist"],
+                    "additionalProperties": false
+                },
+                {
+                    "properties": {
+                        "filterType": {
+                            "type": "string",
+                            "enum": ["union"]
+                        },
+                        "unionOf": {
+                            "type": "array",
+                            "items": { "$ref": "#/definitions/filter" }
+                        }
                     },
+                    "required": ["filterType", "unionOf"],
                     "additionalProperties": false
                 }
             ]
         },
-        "filterType": {
+        "blacklistWhitelistFilterTypes": {
             "type": "string",
             "enum": [
-                "file-stem",
                 "language",
-                "regex",
-                "exclude"
+                "regex"
             ]
         },
         "stringList": {
diff --git a/icu4c/source/data/buildtool/locale_dependencies.py b/icu4c/source/data/buildtool/locale_dependencies.py
new file mode 100644 (file)
index 0000000..fea5b87
--- /dev/null
@@ -0,0 +1,198 @@
+# -*- coding: utf-8 -*-
+# © 2019 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html#License
+
+data = {
+    "aliases": {
+        "ars": "ar_SA",
+        "az_AZ": "az_Latn_AZ",
+        "bs_BA": "bs_Latn_BA",
+        "en_NH": "en_VU",
+        "en_RH": "en_ZW",
+        "in": "id",
+        "in_ID": "id_ID",
+        "iw": "he",
+        "iw_IL": "he_IL",
+        "mo": "ro_MD",
+        "no_NO": "nb_NO",
+        "no_NO_NY": "nn_NO",
+        "no": "nb",
+        "pa_IN": "pa_Guru_IN",
+        "pa_PK": "pa_Arab_PK",
+        "sh": "sr_Latn",
+        "sh_BA": "sr_Latn_BA",
+        "sh_CS": "sr_Latn_RS",
+        "sh_YU": "sr_Latn_RS",
+        "shi_MA": "shi_Tfng_MA",
+        "sr_BA": "sr_Cyrl_BA",
+        "sr_CS": "sr_Cyrl_RS",
+        "sr_ME": "sr_Latn_ME",
+        "sr_RS": "sr_Cyrl_RS",
+        "sr_XK": "sr_Cyrl_XK",
+        "sr_YU": "sr_Cyrl_RS",
+        "sr_Cyrl_YU": "sr_Cyrl_RS",
+        "sr_Cyrl_CS": "sr_Cyrl_RS",
+        "sr_Latn_YU": "sr_Latn_RS",
+        "sr_Latn_CS": "sr_Latn_RS",
+        "tl": "fil",
+        "tl_PH": "fil_PH",
+        "uz_AF": "uz_Arab_AF",
+        "uz_UZ": "uz_Latn_UZ",
+        "vai_LR": "vai_Vaii_LR",
+        "yue_CN": "yue_Hans_CN",
+        "yue_HK": "yue_Hant_HK",
+        "zh_CN": "zh_Hans_CN",
+        "zh_HK": "zh_Hant_HK",
+        "zh_MO": "zh_Hant_MO",
+        "zh_SG": "zh_Hans_SG",
+        "zh_TW": "zh_Hant_TW"
+    },
+    "parents": {
+        "ff_Adlm": "root",
+        "en_CM": "en_001",
+        "en_KY": "en_001",
+        "en_TC": "en_001",
+        "yue_Hans": "root",
+        "en_CX": "en_001",
+        "es_EC": "es_419",
+        "es_US": "es_419",
+        "en_CY": "en_001",
+        "en_LC": "en_001",
+        "en_TK": "en_001",
+        "es_UY": "es_419",
+        "en_TO": "en_001",
+        "en_TT": "en_001",
+        "en_DE": "en_150",
+        "es_MX": "es_419",
+        "en_TV": "en_001",
+        "en_DG": "en_001",
+        "pt_ST": "pt_PT",
+        "en_DM": "en_001",
+        "en_LR": "en_001",
+        "en_TZ": "en_001",
+        "en_LS": "en_001",
+        "en_DK": "en_150",
+        "es_VE": "es_419",
+        "es_NI": "es_419",
+        "pt_AO": "pt_PT",
+        "en_UG": "en_001",
+        "en_MG": "en_001",
+        "en_MO": "en_001",
+        "en_MU": "en_001",
+        "en_MS": "en_001",
+        "en_MT": "en_001",
+        "shi_Latn": "root",
+        "es_BR": "es_419",
+        "en_AU": "en_001",
+        "en_ZM": "en_001",
+        "en_AT": "en_150",
+        "es_BZ": "es_419",
+        "uz_Arab": "root",
+        "az_Cyrl": "root",
+        "es_SV": "es_419",
+        "en_ZW": "en_001",
+        "en_JE": "en_001",
+        "en_BB": "en_001",
+        "pa_Arab": "root",
+        "en_RW": "en_001",
+        "es_CO": "es_419",
+        "en_JM": "en_001",
+        "en_BE": "en_150",
+        "es_CL": "es_419",
+        "en_BM": "en_001",
+        "en_SC": "en_001",
+        "es_CR": "es_419",
+        "en_150": "en_001",
+        "en_BS": "en_001",
+        "en_SD": "en_001",
+        "pt_GQ": "pt_PT",
+        "en_SB": "en_001",
+        "es_CU": "es_419",
+        "en_SG": "en_001",
+        "uz_Cyrl": "root",
+        "en_BW": "en_001",
+        "en_SH": "en_001",
+        "en_SE": "en_150",
+        "pt_GW": "pt_PT",
+        "en_BZ": "en_001",
+        "en_SL": "en_001",
+        "en_SI": "en_150",
+        "en_KE": "en_001",
+        "bm_Nkoo": "root",
+        "en_CC": "en_001",
+        "en_SS": "en_001",
+        "iu_Latn": "root",
+        "en_CA": "en_001",
+        "en_KI": "en_001",
+        "es_DO": "es_419",
+        "en_SX": "en_001",
+        "en_CH": "en_150",
+        "en_KN": "en_001",
+        "en_CK": "en_001",
+        "en_SZ": "en_001",
+        "en_GY": "en_001",
+        "en_PH": "en_001",
+        "en_PG": "en_001",
+        "en_PK": "en_001",
+        "en_PN": "en_001",
+        "en_HK": "en_001",
+        "zh_Hant": "root",
+        "en_PW": "en_001",
+        "es_AR": "es_419",
+        "pt_MZ": "pt_PT",
+        "en_Shaw": "root",
+        "en_IE": "en_001",
+        "ms_Arab": "root",
+        "en_IM": "en_001",
+        "en_IN": "en_001",
+        "es_BO": "es_419",
+        "en_IL": "en_001",
+        "en_AI": "en_001",
+        "az_Arab": "root",
+        "en_AG": "en_001",
+        "en_IO": "en_001",
+        "en_ZA": "en_001",
+        "en_MY": "en_001",
+        "en_ER": "en_001",
+        "en_VC": "en_001",
+        "mn_Mong": "root",
+        "vai_Latn": "root",
+        "en_MW": "en_001",
+        "pt_LU": "pt_PT",
+        "bs_Cyrl": "root",
+        "en_VG": "en_001",
+        "en_NA": "en_001",
+        "en_NF": "en_001",
+        "en_NG": "en_001",
+        "ha_Arab": "root",
+        "en_NL": "en_150",
+        "zh_Hant_MO": "zh_Hant_HK",
+        "en_VU": "en_001",
+        "en_FJ": "en_001",
+        "en_NR": "en_001",
+        "en_FK": "en_001",
+        "es_GT": "es_419",
+        "en_FI": "en_150",
+        "pt_MO": "pt_PT",
+        "en_FM": "en_001",
+        "en_NU": "en_001",
+        "en_NZ": "en_001",
+        "pt_CH": "pt_PT",
+        "en_Dsrt": "root",
+        "es_PE": "es_419",
+        "es_PA": "es_419",
+        "pt_CV": "pt_PT",
+        "en_WS": "en_001",
+        "en_GD": "en_001",
+        "en_GB": "en_001",
+        "es_HN": "es_419",
+        "pt_TL": "pt_PT",
+        "en_GG": "en_001",
+        "en_GH": "en_001",
+        "es_PR": "es_419",
+        "en_GI": "en_001",
+        "sr_Latn": "root",
+        "en_GM": "en_001",
+        "es_PY": "es_419"
+    }
+}
diff --git a/icu4c/source/data/buildtool/test/__init__.py b/icu4c/source/data/buildtool/test/__init__.py
new file mode 100644 (file)
index 0000000..dd12bfa
--- /dev/null
@@ -0,0 +1,2 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
diff --git a/icu4c/source/data/buildtool/test/__main__.py b/icu4c/source/data/buildtool/test/__main__.py
new file mode 100644 (file)
index 0000000..6ae2c0f
--- /dev/null
@@ -0,0 +1,14 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+import unittest
+
+from . import filtration_test
+
+def load_tests(loader, tests, pattern):
+    suite = unittest.TestSuite()
+    suite.addTest(filtration_test.suite)
+    return suite
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/icu4c/source/data/buildtool/test/filtration_test.py b/icu4c/source/data/buildtool/test/filtration_test.py
new file mode 100644 (file)
index 0000000..5687006
--- /dev/null
@@ -0,0 +1,353 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+import unittest
+
+from .. import InFile
+from ..filtration import Filter
+
+EXAMPLE_FILE_STEMS = [
+    "af_NA",
+    "af_ZA",
+    "af",
+    "ar",
+    "ar_SA",
+    "ars",
+    "bs_BA",
+    "bs_Cyrl_BA",
+    "bs_Cyrl",
+    "bs_Latn_BA",
+    "bs_Latn",
+    "bs",
+    "en_001",
+    "en_150",
+    "en_DE",
+    "en_GB",
+    "en_US",
+    "root",
+    "sr_BA",
+    "sr_CS",
+    "sr_Cyrl_BA",
+    "sr_Cyrl_CS",
+    "sr_Cyrl_ME",
+    "sr_Cyrl",
+    "sr_Latn_BA",
+    "sr_Latn_CS",
+    "sr_Latn_ME",
+    "sr_Latn",
+    "sr_ME",
+    "sr",
+    "vai_Latn_LR",
+    "vai_Latn",
+    "vai_LR",
+    "vai_Vaii_LR",
+    "vai_Vaii",
+    "vai",
+    "zh_CN",
+    "zh_Hans_CN",
+    "zh_Hans_HK",
+    "zh_Hans_MO",
+    "zh_Hans_SG",
+    "zh_Hans",
+    "zh_Hant_HK",
+    "zh_Hant_MO",
+    "zh_Hant_TW",
+    "zh_Hant",
+    "zh_HK",
+    "zh_MO",
+    "zh_SG",
+    "zh_TW",
+    "zh"
+]
+
+class FiltrationTest(unittest.TestCase):
+
+    def test_exclude(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "exclude"
+        }), [
+        ])
+
+    def test_default_whitelist(self):
+        self._check_filter(Filter.create_from_json({
+            "whitelist": [
+                "ars",
+                "zh_Hans"
+            ]
+        }), [
+            "ars",
+            "zh_Hans"
+        ])
+
+    def test_default_blacklist(self):
+        expected_matches = set(EXAMPLE_FILE_STEMS)
+        expected_matches.remove("ars")
+        expected_matches.remove("zh_Hans")
+        self._check_filter(Filter.create_from_json({
+            "blacklist": [
+                "ars",
+                "zh_Hans"
+            ]
+        }), expected_matches)
+
+    def test_language_whitelist(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "language",
+            "whitelist": [
+                "af",
+                "bs"
+            ]
+        }), [
+            "root",
+            "af_NA",
+            "af_ZA",
+            "af",
+            "bs_BA",
+            "bs_Cyrl_BA",
+            "bs_Cyrl",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs"
+        ])
+
+    def test_language_blacklist(self):
+        expected_matches = set(EXAMPLE_FILE_STEMS)
+        expected_matches.remove("af_NA")
+        expected_matches.remove("af_ZA")
+        expected_matches.remove("af")
+        self._check_filter(Filter.create_from_json({
+            "filterType": "language",
+            "blacklist": [
+                "af"
+            ]
+        }), expected_matches)
+
+    def test_regex_whitelist(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "regex",
+            "whitelist": [
+                r"^ar.*$",
+                r"^zh$"
+            ]
+        }), [
+            "ar",
+            "ar_SA",
+            "ars",
+            "zh"
+        ])
+
+    def test_regex_blacklist(self):
+        expected_matches = set(EXAMPLE_FILE_STEMS)
+        expected_matches.remove("ar")
+        expected_matches.remove("ar_SA")
+        expected_matches.remove("ars")
+        expected_matches.remove("zh")
+        self._check_filter(Filter.create_from_json({
+            "filterType": "regex",
+            "blacklist": [
+                r"^ar.*$",
+                r"^zh$"
+            ]
+        }), expected_matches)
+
+    def test_locale_basic(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "whitelist": [
+                # Default scripts:
+                # sr => Cyrl
+                # vai => Vaii
+                # zh => Hans
+                "bs_BA", # is an alias to bs_Latn_BA
+                "en_DE",
+                "sr", # Language with no script
+                "vai_Latn", # Language with non-default script
+                "zh_Hans" # Language with default script
+            ]
+        }), [
+            "root",
+            # bs: should include the full dependency tree of bs_BA
+            "bs_BA",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs",
+            # en: should include the full dependency tree of en_DE
+            "en",
+            "en_DE",
+            "en_150",
+            "en_001",
+            # sr: include Cyrl, the default, but not Latn.
+            "sr",
+            "sr_BA",
+            "sr_CS",
+            "sr_Cyrl",
+            "sr_Cyrl_BA",
+            "sr_Cyrl_CS",
+            "sr_Cyrl_ME",
+            # vai: include Latn but NOT Vaii.
+            "vai_Latn",
+            "vai_Latn_LR",
+            # zh: include Hans but NOT Hant.
+            "zh",
+            "zh_CN",
+            "zh_SG",
+            "zh_Hans",
+            "zh_Hans_CN",
+            "zh_Hans_HK",
+            "zh_Hans_MO",
+            "zh_Hans_SG"
+        ])
+
+    def test_locale_no_children(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "includeChildren": False,
+            "whitelist": [
+                # See comments in test_locale_basic.
+                "bs_BA",
+                "en_DE",
+                "sr",
+                "vai_Latn",
+                "zh_Hans"
+            ]
+        }), [
+            "root",
+            "bs_BA",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs",
+            "en",
+            "en_DE",
+            "en_150",
+            "en_001",
+            "sr",
+            "vai_Latn",
+            "zh",
+            "zh_Hans",
+        ])
+
+    def test_locale_include_scripts(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "includeScripts": True,
+            "whitelist": [
+                # See comments in test_locale_basic.
+                "bs_BA",
+                "en_DE",
+                "sr",
+                "vai_Latn",
+                "zh_Hans"
+            ]
+        }), [
+            "root",
+            # bs: includeScripts only works for language-only (without region)
+            "bs_BA",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs",
+            # en: should include the full dependency tree of en_DE
+            "en",
+            "en_DE",
+            "en_150",
+            "en_001",
+            # sr: include Latn, since no particular script was requested.
+            "sr_BA",
+            "sr_CS",
+            "sr_Cyrl_BA",
+            "sr_Cyrl_CS",
+            "sr_Cyrl_ME",
+            "sr_Cyrl",
+            "sr_Latn_BA",
+            "sr_Latn_CS",
+            "sr_Latn_ME",
+            "sr_Latn",
+            "sr_ME",
+            "sr",
+            # vai: do NOT include Vaii; the script was explicitly requested.
+            "vai_Latn_LR",
+            "vai_Latn",
+            # zh: do NOT include Hant; the script was explicitly requested.
+            "zh_CN",
+            "zh_SG",
+            "zh_Hans_CN",
+            "zh_Hans_HK",
+            "zh_Hans_MO",
+            "zh_Hans_SG",
+            "zh_Hans",
+            "zh"
+        ])
+
+    def test_locale_no_children_include_scripts(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "includeChildren": False,
+            "includeScripts": True,
+            "whitelist": [
+                # See comments in test_locale_basic.
+                "bs_BA",
+                "en_DE",
+                "sr",
+                "vai_Latn",
+                "zh_Hans"
+            ]
+        }), [
+            "root",
+            # bs: includeScripts only works for language-only (without region)
+            "bs_BA",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs",
+            # en: should include the full dependency tree of en_DE
+            "en",
+            "en_DE",
+            "en_150",
+            "en_001",
+            # sr: include Cyrl and Latn but no other children
+            "sr",
+            "sr_Cyrl",
+            "sr_Latn",
+            # vai: include only the requested script
+            "vai_Latn",
+            # zh: include only the requested script
+            "zh",
+            "zh_Hans",
+        ])
+
+    def test_union(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "union",
+            "unionOf": [
+                {
+                    "whitelist": [
+                        "ars",
+                        "zh_Hans"
+                    ]
+                },
+                {
+                    "filterType": "regex",
+                    "whitelist": [
+                        r"^bs.*$",
+                        r"^zh$"
+                    ]
+                }
+            ]
+        }), [
+            "ars",
+            "zh_Hans",
+            "bs_BA",
+            "bs_Cyrl_BA",
+            "bs_Cyrl",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs",
+            "zh"
+        ])
+
+    def _check_filter(self, filter, expected_matches):
+        for file_stem in EXAMPLE_FILE_STEMS:
+            is_match = filter.match(InFile("locales/%s.txt" % file_stem))
+            expected_match = file_stem in expected_matches
+            self.assertEqual(is_match, expected_match, file_stem)
+
+# Export the test for the runner
+suite = unittest.makeSuite(FiltrationTest)
index 37f3a9568b6052f23d3f64307d7eca336afbe4ab..a50a733336b4b7c1e64006d2de2ef3fd209a52c8 100644 (file)
@@ -1,7 +1,13 @@
 # © 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html#License
 CURR_CLDR_VERSION = 34
-# A list of txt's to build
+#
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+# NOTE (January 2019): Please use ICU's new data filtering to select locale
+# files.  This makefile is no longer used to filter locale files.
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+#
+# Old description: A list of txt's to build
 # Note:
 #
 #   If you are thinking of modifying this file, READ THIS.
index 8dcc2aeaca4964f8c052d50f3408935fee62ea4d..6a8d7763edcb9c80f35d1973527571411d9ec117 100644 (file)
@@ -1,7 +1,13 @@
 # © 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html#License
 LANG_CLDR_VERSION = 34
-# A list of txt's to build
+#
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+# NOTE (January 2019): Please use ICU's new data filtering to select locale
+# files.  This makefile is no longer used to filter locale files.
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+#
+# Old description: A list of txt's to build
 # Note:
 #
 #   If you are thinking of modifying this file, READ THIS.
index f89c922486b7315210e5f3e2387e9940f0f820da..6eff17bfa639067031f493922aaa6d5fbb86b09d 100644 (file)
@@ -1,7 +1,13 @@
 # © 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html#License
 GENRB_CLDR_VERSION = 34
-# A list of txt's to build
+#
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+# NOTE (January 2019): Please use ICU's new data filtering to select locale
+# files.  This makefile is no longer used to filter locale files.
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+#
+# Old description: A list of txt's to build
 # Note:
 #
 #   If you are thinking of modifying this file, READ THIS.
index ec91cc440e9e3adead8c3d523a0d0103d28c44cf..557d247deb48da13358f1beb0b7dd8d9c878302a 100644 (file)
@@ -1,7 +1,13 @@
 # © 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html#License
 REGION_CLDR_VERSION = 34
-# A list of txt's to build
+#
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+# NOTE (January 2019): Please use ICU's new data filtering to select locale
+# files.  This makefile is no longer used to filter locale files.
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+#
+# Old description: A list of txt's to build
 # Note:
 #
 #   If you are thinking of modifying this file, READ THIS.
index 590c830f6c490fcfabd7beb4e5a515fd43297448..e41c780f36318b6195d3115e83ccb51a5c110f78 100644 (file)
@@ -1,7 +1,13 @@
 # © 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html#License
 UNIT_CLDR_VERSION = 34
-# A list of txt's to build
+#
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+# NOTE (January 2019): Please use ICU's new data filtering to select locale
+# files.  This makefile is no longer used to filter locale files.
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+#
+# Old description: A list of txt's to build
 # Note:
 #
 #   If you are thinking of modifying this file, READ THIS.
index 631df399d02e868e77973ef244d82301916f2859..f4c7e005687d2b16b9f322734eb499d2935cb553 100644 (file)
@@ -1,7 +1,13 @@
 # © 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html#License
 ZONE_CLDR_VERSION = 34
-# A list of txt's to build
+#
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+# NOTE (January 2019): Please use ICU's new data filtering to select locale
+# files.  This makefile is no longer used to filter locale files.
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+#
+# Old description: A list of txt's to build
 # Note:
 #
 #   If you are thinking of modifying this file, READ THIS.