print("Error: Cannot find data directory; please specify --glob_dir", file=sys.stderr)
exit(1)
- # DIRECTORIES
- build_dirs = [
- "{OUT_DIR}",
- "{OUT_DIR}/curr",
- "{OUT_DIR}/lang",
- "{OUT_DIR}/region",
- "{OUT_DIR}/zone",
- "{OUT_DIR}/unit",
- "{OUT_DIR}/brkitr",
- "{OUT_DIR}/coll",
- "{OUT_DIR}/rbnf",
- "{OUT_DIR}/translit",
- "{TMP_DIR}",
- "{TMP_DIR}/curr",
- "{TMP_DIR}/lang",
- "{TMP_DIR}/locales",
- "{TMP_DIR}/region",
- "{TMP_DIR}/zone",
- "{TMP_DIR}/unit",
- "{TMP_DIR}/coll",
- "{TMP_DIR}/rbnf",
- "{TMP_DIR}/translit",
- "{TMP_DIR}/brkitr"
- ]
-
requests += generate_cnvalias(config, glob, common_vars)
requests += generate_confusables(config, glob, common_vars)
requests += generate_conversion_mappings(config, glob, common_vars)
)
]
- return (build_dirs, requests)
+ return requests
def generate_cnvalias(config, glob, common_vars):
import argparse
import glob as pyglob
import json
+import os
import sys
from . import *
+from .comment_stripper import CommentStripper
from .renderers import makefile, windirect
from . import filtration, utils
import BUILDRULES
try:
with open(args.filter_file, "r") as f:
print("Note: Applying filters from %s." % args.filter_file, file=sys.stderr)
- try:
- import hjson
- self.filters_json_data = hjson.load(f)
- except ImportError:
- self.filters_json_data = json.load(f)
+ self._parse_filter_file(f)
except IOError:
print("Error: Could not read filter file %s." % args.filter_file, file=sys.stderr)
exit(1)
+ def _parse_filter_file(self, f):
+ # Use the Hjson parser if it is available; otherwise, use vanilla JSON.
+ try:
+ import hjson
+ self.filters_json_data = hjson.load(f)
+ except ImportError:
+ self.filters_json_data = json.load(CommentStripper(f))
+
+ # Optionally pre-validate the JSON schema before further processing.
+ # Some schema errors will be caught later, but this step ensures
+ # maximal validity.
+ try:
+ import jsonschema
+ schema_path = os.path.join(os.path.dirname(__file__), "filtration_schema.json")
+ with open(schema_path) as schema_f:
+ schema = json.load(CommentStripper(schema_f))
+ validator = jsonschema.Draft4Validator(schema)
+ for error in validator.iter_errors(self.filters_json_data, schema):
+ print("WARNING: ICU data filter JSON file:", error.message,
+ "at", "".join(
+ "[%d]" % part if isinstance(part, int) else ".%s" % part
+ for part in error.absolute_path
+ ),
+ file=sys.stderr)
+ except ImportError:
+ pass
+
def has_feature(self, feature_name):
assert feature_name in AVAILABLE_FEATURES
return feature_name in self._feature_set
# For the purposes of buildtool, force Unix-style directory separators.
return [v.replace("\\", "/")[len(args.glob_dir)+1:] for v in sorted(result_paths)]
- build_dirs, requests = BUILDRULES.generate(config, glob, common)
+ requests = BUILDRULES.generate(config, glob, common)
requests = filtration.apply_filters(requests, config)
requests = utils.flatten_requests(requests, config, common)
+ build_dirs = utils.compute_directories(requests)
+
if args.format == "gnumake":
print(makefile.get_gnumake_rules(
build_dirs,
--- /dev/null
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+import io
+
+class CommentStripper(object):
+ """Removes lines starting with "//" from a file stream."""
+
+ def __init__(self, f):
+ self.f = f
+ self.state = 0
+
+ def read(self, size=-1):
+ bytes = self.f.read(size)
+ # TODO: Do we need to read more bytes if comments were stripped
+ # in order to obey the size request?
+ return "".join(self._strip_comments(bytes))
+
+ def _strip_comments(self, bytes):
+ for byte in bytes:
+ if self.state == 0:
+ # state 0: start of a line
+ if byte == "/":
+ self.state = 1
+ elif byte == "\n":
+ self.state = 0
+ yield byte
+ else:
+ self.state = 2
+ yield byte
+ elif self.state == 1:
+ # state 1: read a single '/'
+ if byte == "/":
+ self.state = 3
+ elif byte == "\n":
+ self.state = 0
+ yield "/" # the one that was skipped
+ yield "\n"
+ else:
+ self.state = 2
+ yield "/" # the one that was skipped
+ yield byte
+ elif self.state == 2:
+ # state 2: middle of a line, no comment
+ if byte == "\n":
+ self.state = 0
+ yield byte
+ elif self.state == 3:
+ # state 3: inside a comment
+ if byte == "\n":
+ self.state = 0
pass
+class InclusionFilter(Filter):
+ def match(self, file):
+ return True
+
+
class ExclusionFilter(Filter):
def match(self, file):
return False
return filters
-def _apply_resource_filters(old_requests, config):
+class ResourceFilterInfo(object):
+ def __init__(self, category):
+ self.category = category
+ self.filter_tmp_dir = "filters/%s" % category
+ self.input_files = None
+ self.filter_files = None
+ self.rules_by_file = None
+
+ def apply_to_requests(self, all_requests):
+ # Call this method only once per list of requests.
+ assert self.input_files is None
+ for request in all_requests:
+ if request.category != self.category:
+ continue
+ if not isinstance(request, AbstractExecutionRequest):
+ continue
+ if request.tool != IcuTool("genrb"):
+ continue
+ self._set_files(request.input_files)
+ # Add dependencies directly to dep_files
+ request.dep_files += self.filter_files
+ arg_str = "--filterDir {TMP_DIR}/%s" % self.filter_tmp_dir
+ request.args = "%s %s" % (arg_str, request.args)
+
+ # Make sure we found the target request
+ if self.input_files is None:
+ print("WARNING: Category not found: %s" % self.category, file=sys.stderr)
+ self.input_files = []
+ self.filter_files = []
+ self.rules_by_file = []
+
+ def _set_files(self, files):
+ # Note: The input files to genrb for a certain category should always
+ # be the same. For example, there are often two genrb calls: one for
+ # --writePoolBundle, and the other for --usePoolBundle. They are both
+ # expected to have the same list of input files.
+ if self.input_files is not None:
+ assert self.input_files == files
+ return
+ self.input_files = list(files)
+ self.filter_files = [
+ TmpFile("%s/%s" % (self.filter_tmp_dir, basename))
+ for basename in (
+ file.filename[file.filename.rfind("/")+1:]
+ for file in files
+ )
+ ]
+ self.rules_by_file = [[] for _ in range(len(files))]
+
+ def add_rules(self, file_filter, rules):
+ for file, rule_list in zip(self.input_files, self.rules_by_file):
+ if file_filter.match(file):
+ rule_list += rules
+
+ def make_requests(self):
+ # Map from rule list to filter files with that rule list
+ unique_rules = defaultdict(list)
+ for filter_file, rules in zip(self.filter_files, self.rules_by_file):
+ unique_rules[tuple(rules)].append(filter_file)
+
+ new_requests = []
+ i = 0
+ for rules, filter_files in unique_rules.items():
+ base_filter_file = filter_files[0]
+ new_requests += [
+ PrintFileRequest(
+ name = "%s_print_%d" % (self.category, i),
+ output_file = base_filter_file,
+ content = self._generate_resource_filter_txt(rules)
+ )
+ ]
+ i += 1
+ for filter_file in filter_files[1:]:
+ new_requests += [
+ CopyRequest(
+ name = "%s_copy_%d" % (self.category, i),
+ input_file = base_filter_file,
+ output_file = filter_file
+ )
+ ]
+ i += 1
+ return new_requests
+
+ @classmethod
+ def _generate_resource_filter_txt(cls, rules):
+ result = "# Caution: This file is automatically generated\n\n"
+ result += "\n".join(rules)
+ return result
+
+
+def _apply_resource_filters(all_requests, config):
"""Creates filters for looking within resource bundle files."""
- return old_requests
+ json_data = config.filters_json_data
+ if "resourceFilters" not in json_data:
+ return all_requests
+
+ collected = {}
+ for entry in json_data["resourceFilters"]:
+ if "files" in entry:
+ file_filter = Filter.create_from_json(entry["files"])
+ else:
+ file_filter = InclusionFilter()
+ for category in entry["categories"]:
+ # not defaultdict because we need to pass arguments to the constructor
+ if category not in collected:
+ filter_info = ResourceFilterInfo(category)
+ filter_info.apply_to_requests(all_requests)
+ collected[category] = filter_info
+ else:
+ filter_info = collected[category]
+ filter_info.add_rules(file_filter, entry["rules"])
+
+ # Add the filter generation requests to the beginning so that by default
+ # they are made before genrb gets run (order is required by windirect)
+ new_requests = []
+ for filter_info in collected.values():
+ new_requests += filter_info.make_requests()
+ new_requests += all_requests
+ return new_requests
--- /dev/null
+// Copyright (C) 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+{
+ "$id": "http://unicode.org/icu-filter-schema",
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "description": "JSON Schema for an ICU data filter file",
+ "type": "object",
+ "properties": {
+ "localeFilter": { "$ref": "#/definitions/filter" },
+ "featureFilters": {
+ "type": "object",
+ "additionalProperties": { "$ref": "#/definitions/filter" }
+ },
+ "resourceFilters": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "categories": {
+ "type": "array",
+ "items": { "type": "string" }
+ },
+ "files": { "$ref": "#/definitions/filter" },
+ "rules": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "pattern": "^[+-]/(\\w+(/\\w+)*)?$"
+ }
+ }
+ },
+ "required": ["categories", "rules"],
+ "additionalProperties": false
+ }
+ }
+ },
+ "additionalProperties": false,
+ "definitions": {
+ "filter": {
+ "type": "object",
+ "oneOf": [
+ {
+ "properties": {
+ "filterType": { "$ref": "#/definitions/filterType" },
+ "whitelist": { "$ref": "#/definitions/stringList" }
+ },
+ "required": ["whitelist"],
+ "additionalProperties": false
+ },
+ {
+ "properties": {
+ "filterType": { "$ref": "#/definitions/filterType" },
+ "blacklist": { "$ref": "#/definitions/stringList" }
+ },
+ "required": ["blacklist"],
+ "additionalProperties": false
+ },
+ {
+ "properties": {
+ "filterType": { "$ref": "#/definitions/filterType" }
+ },
+ "additionalProperties": false
+ }
+ ]
+ },
+ "filterType": {
+ "type": "string",
+ "enum": [
+ "file-stem",
+ "language",
+ "regex",
+ "exclude"
+ ]
+ },
+ "stringList": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "minItems": 1,
+ "uniqueItems": true
+ }
+ }
+}
import copy
import sys
-
+from . import *
from . import utils
return [f for _, f in set((type(f), f) for f in files)]
+def compute_directories(requests):
+ dirs = set()
+ for file in get_all_output_files(requests, include_tmp=True):
+ path = "%s/%s" % (dir_for(file), file.filename)
+ dirs.add(path[:path.rfind("/")])
+ return list(sorted(dirs))
+
+
class SpaceSeparatedList(list):
"""A list that joins itself with spaces when converted to a string."""
def __str__(self):
def generate(config, glob, common_vars):
- build_dirs = ["{OUT_DIR}", "{TMP_DIR}"]
-
requests = []
requests += generate_rb(config, glob, common_vars)
requests += generate_sprep(config, glob, common_vars)
)
]
- return (build_dirs, requests)
+ return requests
def generate_rb(config, glob, common_vars):