]
},
// Test mixed feature filter and resource filter
-// Exlude translit data so we can run test for ICU-20673
+// Exclude translit data so we can run test for ICU-20673
+// Also test for "whitelist" versus "includelist"
"featureFilters": {
"misc": {
"whitelist": ["supplementalData"]
},
- "translit": "exclude"
+ "translit": "exclude",
+ "curr_tree": {
+ "filterType": "locale",
+ "includelist": ["my"]
+ },
+ "brkitr_rules": {
+ "excludelist": ["line"]
+ },
+ "brkitr_dictionaries": {
+ "blacklist": ["cjdict"]
+ }
},
"resourceFilters": [
{
"categories": ["misc"],
"files": {
- "whitelist": ["supplementalData"]
+ "includelist": ["supplementalData"]
},
"rules": ["+/*"]
}
{
"localeFilter": {
"filterType": "language",
- "whitelist": [
+ "includelist": [
"en",
"de",
"zh"
The *filterType* "language" only supports slicing by entire languages.
+##### Terminology: Includelist, Excludelist, Whitelist, Blacklist
+
+Prior to ICU 68, use `"whitelist"` and `"blacklist"` instead of `"includelist"`
+and `"excludelist"`, respectively. ICU 68 allows all four terms.
+
#### Filtering by Locale
For more control, use *filterType* "locale". Here is a *filters.hjson* file that
localeFilter: {
filterType: locale
- whitelist: [
+ includelist: [
en
de
zh
]
}
+*If using ICU 67 or earlier, see note above regarding allowed keywords.*
+
#### Adding Script Variants (includeScripts = true)
You may set the *includeScripts* option to true to include all scripts for a
"localeFilter": {
"filterType": "locale",
"includeScripts": true,
- "whitelist": [
+ "includelist": [
"en",
"de",
"zh"
}
}
+*If using ICU 67 or earlier, see note above regarding allowed keywords.*
+
If you wish to explicitly list the scripts, you may put the script code in the
locale tag in the whitelist, and you do not need the *includeScripts* option
enabled. For example, in Hjson, to include Han Traditional ***but not Han
localeFilter: {
filterType: locale
- whitelist: [
+ includelist: [
en
de
zh_Hant
]
}
-Note: the option *includeScripts* is only supported at the language level;
+*If using ICU 67 or earlier, see note above regarding allowed keywords.*
+
+**Note:** the option *includeScripts* is only supported at the language level;
i.e., in order to include all scripts for a particular language, you must
specify the language alone, without a region tag.
localeFilter: {
filterType: locale
includeChildren: false
- whitelist: [
+ includelist: [
en_US
en_GB
de_DE
]
}
+*If using ICU 67 or earlier, see note above regarding allowed keywords.*
+
Including dependencies, the above filter would include the following data files:
- root.txt
featureFilters: {
brkitr_dictionaries: {
- whitelist: [
+ includelist: [
burmesedict
]
}
automatically for you. Note that all files in a particular category have the
same directory and extension.
-You can use either a whitelist or a blacklist for the file name filter.
+You can use either `"includelist"` or `"excludelist"` for the file name filter.
+*If using ICU 67 or earlier, see note above regarding allowed keywords.*
##### Regex Filter
featureFilters: {
brkitr_rules: {
filterType: regex
- blacklist: [
+ excludelist: [
^.*_cj$
]
}
featureFilters:
curr_tree: {
filterType: locale
- whitelist: [
+ includelist: [
it
]
}
}
+*If using ICU 67 or earlier, see note above regarding allowed keywords.*
+
You can exclude an entire `_tree` category without affecting other categories.
For example, to exclude region display names:
{
categories: ["misc"]
files: {
- whitelist: ["supplementalData"]
+ includelist: ["supplementalData"]
}
rules: [
-/calendarData
}
]
+*If using ICU 67 or earlier, see note above regarding allowed keywords.*
+
#### Combining Multiple Resource Filter Specs
You can also list multiple resource filter objects in the *resourceFilters*
categories: ["unit_tree"]
files: {
filterType: locale
- whitelist: ["en_US"]
+ includelist: ["en_US"]
}
rules: [
+/*/length/mile
categories: ["unit_tree"]
files: {
filterType: locale
- whitelist: ["en_CA"]
+ includelist: ["en_CA"]
}
rules: [
+/*/length/kilometer
return False
-class WhitelistBlacklistFilter(Filter):
+class IncludeExcludeFilter(Filter):
def __init__(self, json_data):
if "whitelist" in json_data:
- self.is_whitelist = True
- self.whitelist = json_data["whitelist"]
+ self.is_includelist = True
+ self.includelist = json_data["whitelist"]
+ elif "includelist" in json_data:
+ self.is_includelist = True
+ self.includelist = json_data["includelist"]
+ elif "blacklist" in json_data:
+ self.is_includelist = False
+ self.excludelist = json_data["blacklist"]
+ elif "excludelist" in json_data:
+ self.is_includelist = False
+ self.excludelist = json_data["excludelist"]
else:
- assert "blacklist" in json_data, "Need either whitelist or blacklist: %s" % str(json_data)
- self.is_whitelist = False
- self.blacklist = json_data["blacklist"]
+ raise AssertionError("Need either includelist or excludelist: %s" % str(json_data))
def match(self, file):
file_stem = self._file_to_file_stem(file)
pass
-class FileStemFilter(WhitelistBlacklistFilter):
+class FileStemFilter(IncludeExcludeFilter):
def _should_include(self, file_stem):
- if self.is_whitelist:
- return file_stem in self.whitelist
+ if self.is_includelist:
+ return file_stem in self.includelist
else:
- return file_stem not in self.blacklist
+ return file_stem not in self.excludelist
-class LanguageFilter(WhitelistBlacklistFilter):
+class LanguageFilter(IncludeExcludeFilter):
def _should_include(self, file_stem):
language = file_stem.split("_")[0]
if language == "root":
# Always include root.txt
return True
- if self.is_whitelist:
- return language in self.whitelist
+ if self.is_includelist:
+ return language in self.includelist
else:
- return language not in self.blacklist
+ return language not in self.excludelist
-class RegexFilter(WhitelistBlacklistFilter):
+class RegexFilter(IncludeExcludeFilter):
def __init__(self, *args):
# TODO(ICU-20301): Change this to: super().__init__(*args)
super(RegexFilter, self).__init__(*args)
- if self.is_whitelist:
- self.whitelist = [re.compile(pat) for pat in self.whitelist]
+ if self.is_includelist:
+ self.includelist = [re.compile(pat) for pat in self.includelist]
else:
- self.blacklist = [re.compile(pat) for pat in self.blacklist]
+ self.excludelist = [re.compile(pat) for pat in self.excludelist]
def _should_include(self, file_stem):
- if self.is_whitelist:
- for pattern in self.whitelist:
+ if self.is_includelist:
+ for pattern in self.includelist:
if pattern.match(file_stem):
return True
return False
else:
- for pattern in self.blacklist:
+ for pattern in self.excludelist:
if pattern.match(file_stem):
return False
return True
class LocaleFilter(Filter):
def __init__(self, json_data, io):
- self.locales_requested = list(json_data["whitelist"])
+ if "whitelist" in json_data:
+ self.locales_requested = list(json_data["whitelist"])
+ elif "includelist" in json_data:
+ self.locales_requested = list(json_data["includelist"])
+ else:
+ raise AssertionError("You must have an includelist in a locale filter")
self.include_children = json_data.get("includeChildren", True)
self.include_scripts = json_data.get("includeScripts", False)
{
"properties": {
"filterType": {
- "$ref": "#/definitions/blacklistWhitelistFilterTypes"
+ "$ref": "#/definitions/includeExcludeFilterTypes"
},
"whitelist": { "$ref": "#/definitions/stringList" }
},
{
"properties": {
"filterType": {
- "$ref": "#/definitions/blacklistWhitelistFilterTypes"
+ "$ref": "#/definitions/includeExcludeFilterTypes"
},
"blacklist": { "$ref": "#/definitions/stringList" }
},
"required": ["blacklist"],
"additionalProperties": false
},
+ {
+ "properties": {
+ "filterType": {
+ "$ref": "#/definitions/includeExcludeFilterTypes"
+ },
+ "includelist": { "$ref": "#/definitions/stringList" }
+ },
+ "required": ["includelist"],
+ "additionalProperties": false
+ },
+ {
+ "properties": {
+ "filterType": {
+ "$ref": "#/definitions/includeExcludeFilterTypes"
+ },
+ "excludelist": { "$ref": "#/definitions/stringList" }
+ },
+ "required": ["excludelist"],
+ "additionalProperties": false
+ },
{
"properties": {
"filterType": {
"required": ["filterType", "whitelist"],
"additionalProperties": false
},
+ {
+ "properties": {
+ "filterType": {
+ "type": "string",
+ "enum": ["locale"]
+ },
+ "includeChildren": {
+ "type": "boolean"
+ },
+ "includeScripts": {
+ "type": "boolean"
+ },
+ "includelist": { "$ref": "#/definitions/stringList" }
+ },
+ "required": ["filterType", "includelist"],
+ "additionalProperties": false
+ },
{
"properties": {
"filterType": {
}
]
},
- "blacklistWhitelistFilterTypes": {
+ "includeExcludeFilterTypes": {
"type": "string",
"enum": [
"language",