From eac8f4b31ab7395abb3a216aa17bafe7af6314ed Mon Sep 17 00:00:00 2001 From: Shane Carr Date: Wed, 27 Feb 2019 16:09:17 -0800 Subject: [PATCH] ICU-20460 Adding mechanism to build unicore data into dat file. --- icu4c/source/configure | 1 + icu4c/source/configure.ac | 1 + icu4c/source/data/BUILDRULES.py | 34 +++++++- icu4c/source/data/Makefile.in | 110 ++---------------------- icu4c/source/data/buildtool/__main__.py | 10 +++ icu4c/source/data/icu4j-readme.txt | 12 ++- icu4c/source/data/makedata.mak | 2 + 7 files changed, 65 insertions(+), 105 deletions(-) diff --git a/icu4c/source/configure b/icu4c/source/configure index 2ae7d6ff219..8fbdbe667b6 100755 --- a/icu4c/source/configure +++ b/icu4c/source/configure @@ -9133,6 +9133,7 @@ else --seqmode parallel \ --src_dir "$srcdir/data" \ --filter_file "$ICU_DATA_FILTER_FILE" \ + $BUILDTOOL_OPTS \ > data/rules.mk if test "$?" != "0"; then as_fn_error $? "Python failed to run; see above error." "$LINENO" 5 diff --git a/icu4c/source/configure.ac b/icu4c/source/configure.ac index ef3e72fec48..5f8a32e65c2 100644 --- a/icu4c/source/configure.ac +++ b/icu4c/source/configure.ac @@ -1397,6 +1397,7 @@ else --seqmode parallel \ --src_dir "$srcdir/data" \ --filter_file "$ICU_DATA_FILTER_FILE" \ + $ICU_DATA_BUILDTOOL_OPTS \ > data/rules.mk if test "$?" != "0"; then AC_MSG_ERROR(Python failed to run; see above error.) diff --git a/icu4c/source/data/BUILDRULES.py b/icu4c/source/data/BUILDRULES.py index bdcd781418b..af1da51991e 100644 --- a/icu4c/source/data/BUILDRULES.py +++ b/icu4c/source/data/BUILDRULES.py @@ -29,6 +29,7 @@ def generate(config, glob, common_vars): requests += generate_brkitr_dictionaries(config, glob, common_vars) requests += generate_normalization(config, glob, common_vars) requests += generate_coll_ucadata(config, glob, common_vars) + requests += generate_full_unicore_data(config, glob, common_vars) requests += generate_unames(config, glob, common_vars) requests += generate_ulayout(config, glob, common_vars) requests += generate_misc(config, glob, common_vars) @@ -273,7 +274,8 @@ def generate_brkitr_dictionaries(config, glob, common_vars): def generate_normalization(config, glob, common_vars): # NRM Files input_files = [InFile(filename) for filename in glob("in/*.nrm")] - input_files.remove(InFile("in/nfc.nrm")) # nfc.nrm is pre-compiled into C++ + # nfc.nrm is pre-compiled into C++; see generate_full_unicore_data + input_files.remove(InFile("in/nfc.nrm")) output_files = [OutFile(v.filename[3:]) for v in input_files] return [ RepeatedExecutionRequest( @@ -308,6 +310,36 @@ def generate_coll_ucadata(config, glob, common_vars): ] +def generate_full_unicore_data(config, glob, common_vars): + # The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu) + # are hardcoded in the common DLL and therefore not included in the data package any more. + # They are not built by default but need to be built for ICU4J data, + # both in the .jar and in the .dat file (if ICU4J uses the .dat file). + # See ICU-4497. + if not config.include_uni_core_data: + return [] + + basenames = [ + "pnames.icu", + "uprops.icu", + "ucase.icu", + "ubidi.icu", + "nfc.nrm" + ] + input_files = [InFile("in/%s" % bn) for bn in basenames] + output_files = [OutFile(bn) for bn in basenames] + return [ + RepeatedExecutionRequest( + name = "unicore", + category = "unicore", + input_files = input_files, + output_files = output_files, + tool = IcuTool("icupkg"), + args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}" + ) + ] + + def generate_unames(config, glob, common_vars): # Unicode Character Names input_file = InFile("in/unames.icu") diff --git a/icu4c/source/data/Makefile.in b/icu4c/source/data/Makefile.in index 449c2696e68..43a8e01901c 100644 --- a/icu4c/source/data/Makefile.in +++ b/icu4c/source/data/Makefile.in @@ -82,35 +82,8 @@ endif OUTTMPDIR=$(OUTDIR)/tmp MAINBUILDDIR=$(OUTDIR)/build BUILDDIR=$(MAINBUILDDIR)/$(ICUDATA_PLATFORM_NAME) -UNICODEDATADIR=$(SRCDATADIR)/unidata -LOCSRCDIR=$(SRCDATADIR)/locales -CURRSRCDIR=$(SRCDATADIR)/curr -CURRBLDDIR=$(BUILDDIR)/curr -LANGSRCDIR=$(SRCDATADIR)/lang -LANGBLDDIR=$(BUILDDIR)/lang -REGIONSRCDIR=$(SRCDATADIR)/region -REGIONBLDDIR=$(BUILDDIR)/region -ZONESRCDIR=$(SRCDATADIR)/zone -ZONEBLDDIR=$(BUILDDIR)/zone -UNITSRCDIR=$(SRCDATADIR)/unit -UNITBLDDIR=$(BUILDDIR)/unit -COLSRCDIR=$(SRCDATADIR)/coll -COLBLDDIR=$(BUILDDIR)/coll -RBNFSRCDIR=$(SRCDATADIR)/rbnf -RBNFBLDDIR=$(BUILDDIR)/rbnf -TRANSLITSRCDIR=$(SRCDATADIR)/translit -TRANSLITBLDDIR=$(BUILDDIR)/translit MISCSRCDIR=$(SRCDATADIR)/misc -BRKSRCDIR=$(SRCDATADIR)/brkitr -BRKBLDDIR=$(BUILDDIR)/brkitr -DICTSRCDIR=$(BRKSRCDIR)/dictionaries -BRKRULESRCDIR=$(BRKSRCDIR)/rules -MISCSRCDIR=$(SRCDATADIR)/misc -UCMSRCDIR=$(SRCDATADIR)/mappings -SPREPSRCDIR=$(SRCDATADIR)/sprep -COMINCDIR=$(top_srcdir)/common/unicode SRCLISTDEPS=Makefile $(srcdir)/Makefile.in -BUILD_DIRS=$(OUTDIR) $(MAINBUILDDIR) $(BUILDDIR) $(CURRBLDDIR) $(LANGBLDDIR) $(REGIONBLDDIR) $(ZONEBLDDIR) $(UNITBLDDIR) $(BRKBLDDIR) $(COLBLDDIR) $(RBNFBLDDIR) $(TRANSLITBLDDIR) $(OUTTMPDIR) $(OUTTMPDIR_390STUB) $(OUTTMPDIR)/$(CURR_TREE) $(OUTTMPDIR)/$(LANG_TREE) $(OUTTMPDIR)/$(REGION_TREE) $(OUTTMPDIR)/$(ZONE_TREE) $(OUTTMPDIR)/$(UNIT_TREE) $(OUTTMPDIR)/$(COLLATION_TREE) $(OUTTMPDIR)/$(RBNF_TREE) $(OUTTMPDIR)/$(TRANSLIT_TREE) $(OUTTMPDIR)/$(BREAK_TREE) # Variable names for rules.mk OUT_DIR=$(BUILDDIR) @@ -145,7 +118,7 @@ check-exhaustive: check distclean-local: clean $(RMV) Makefile -all-local: build-dir icupkg.inc build-local packagedata $(POST_DATA_BUILD) $(OS390PKG) +all-local: icupkg.inc build-local packagedata $(POST_DATA_BUILD) $(OS390PKG) dist-local: @@ -153,7 +126,7 @@ clean-map: -test -z *.map || $(RMV) *.map clean-local: cleanpackage cleanfiles clean-map - $(RMV) build-dir* build-local packagedata uni-core-data + $(RMV) $(OUTDIR) build-local packagedata uni-core-data cleanfiles: test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES) @@ -252,7 +225,7 @@ include $(top_builddir)/$(subdir)/rules.mk ifeq ($(ENABLE_SO_VERSION_DATA),1) ifeq ($(PKGDATA_MODE),dll) SO_VERSION_DATA = $(OUTTMPDIR)/icudata.res -$(SO_VERSION_DATA) : $(MISCSRCDIR)/icudata.rc | build-dir +$(SO_VERSION_DATA) : $(MISCSRCDIR)/icudata.rc ifeq ($(MSYS_RC_MODE),1) rc.exe -i$(srcdir)/../common -i$(top_builddir)/common -fo$@ $(CPPFLAGS) $< else @@ -264,36 +237,6 @@ endif PKGDATA_LIST = $(TMP_DIR)/icudata.lst -##### Define all the data files. the build rule that depends on them is below. -# X_FILES_SHORT = just the base names (for lists) -# X_FILES = full paths (for dependency) - -## DAT files - Misc. data files. -# 2005-may-05 Removed Unicode properties files (unorm.icu, uprops.icu, ucase.icu, ubidi.icu) -# from data build. See Jitterbug 4497. (makedata.mak revision 1.117) -# 2010-dec Removed pnames.icu. -# These are now hardcoded in ICU4C and only loaded in ICU4J. -# -DAT_FILES_SHORT=unames.icu cnvalias.icu coll/ucadata.icu nfkc.nrm nfkc_cf.nrm uts46.nrm -DAT_FILES=$(DAT_FILES_SHORT:%=$(BUILDDIR)/%) - -## All generated files -ALL_FILES = $(DAT_FILES) $(CNV_FILES) $(CNV_FILES_SPECIAL) $(BRK_FILES) $(DICT_FILES) $(RES_FILES) $(INDEX_RES_FILE) $(CURR_FILES) $(LANG_FILES) $(REGION_FILES) $(ZONE_FILES) $(UNIT_FILES) $(COLLATION_FILES) $(BRK_RES_FILES) $(RBNF_FILES) $(TRANSLIT_FILES) $(SPREP_FILES) $(CFU_FILES) -ALL_INDEX_SRC_FILES = $(PKGDATA_LIST) $(INDEX_FILE) $(CURR_INDEX_FILE) $(LANG_INDEX_FILE) $(REGION_INDEX_FILE) $(ZONE_INDEX_FILE) $(UNIT_INDEX_FILE) $(COLLATION_INDEX_FILE) $(BRK_RES_INDEX_FILE) $(RBNF_INDEX_FILE) -# a list to use in the .lst files (package-relative) -COLL_FILES_LIST=$(COLLATION_FILES_SHORT) $(COLLATION_INDEX_RES_SHORT) -BRK_FILES_LIST=$(BRK_FILES_SHORT) $(BRK_RES_FILES_SHORT) $(BRK_RES_INDEX_RES_SHORT) $(DICT_FILES_SHORT) -LOCALE_FILES_LIST= $(RES_FILES_SHORT) $(LANG_FILES_SHORT) $(REGION_FILES_SHORT) $(ZONE_FILES_SHORT) $(UNIT_FILES_SHORT) -MISC_FILES_LIST=$(DAT_FILES_SHORT) $(CNV_FILES_SHORT) $(CNV_FILES_SHORT_SPECIAL) $(CURR_FILES_SHORT) $(RBNF_FILES_SHORT) $(RBNF_INDEX_RES_SHORT) $(TRANSLIT_FILES_SHORT) $(SPREP_FILES_SHORT) $(CFU_FILES_SHORT) -UNI_CORE_DATA=pnames.icu uprops.icu ucase.icu ubidi.icu nfc.nrm -UNI_CORE_TARGET_DATA=$(UNI_CORE_DATA:%=$(BUILDDIR)/%) - -ifneq ($(INCLUDE_UNI_CORE_DATA),) -MISC_FILES_LIST+=$(UNI_CORE_DATA) -build-local: uni-core-data - echo timestamp > $@ -endif - ##################################################### # General data build rules @@ -301,10 +244,10 @@ endif CLEANFILES = *~ icupkg.inc *.x ifeq ($(ICUDATA_SOURCE_ARCHIVE),) -build-local: build-dir $(SO_VERSION_DATA) $(ICUDATA_ALL_OUTPUT_FILES) $(PKGDATA_LIST) $(OS390LIST) +build-local: $(SO_VERSION_DATA) $(ICUDATA_ALL_OUTPUT_FILES) $(PKGDATA_LIST) $(OS390LIST) echo timestamp > $@ else -build-local: build-dir $(SO_VERSION_DATA) $(PKGDATA_LIST) $(OS390LIST) +build-local: $(SO_VERSION_DATA) $(PKGDATA_LIST) $(OS390LIST) echo timestamp > $@ $(PKGDATA_LIST): $(SRCLISTDEPS) $(ICUDATA_SOURCE_ARCHIVE) ifneq ($(ICUDATA_SOURCE_IS_NATIVE_TARGET),YES) @@ -317,32 +260,12 @@ endif endif -$(BUILD_DIRS): build-dir - -build-dir: - @-$(RMV) $@ - echo timestamp > $@.tmp - @list='$(BUILD_DIRS)'; \ - for dir in $$list; do \ - if ! test -d $$dir; then \ - echo $(MKINSTALLDIRS) $(BUILD_DIRS); \ - $(MKINSTALLDIRS) $(BUILD_DIRS); \ - fi; \ - done - mv $@.tmp $@ - -# The | is an order-only prerequisite. This helps when the -j option is used, -# and we don't want the files to be built before the directories are built. -ifneq ($(filter order-only,$(.FEATURES)),) -$(ALL_FILES) $(ALL_INDEX_SRC_FILES): | build-dir -endif - # if the tzcode directory contains a new tzdata*.tar.gz file, use it for zoneinfo ifeq ($(TZDATA),) TZDATA = $(firstword $(wildcard $(top_builddir)/tools/tzcode/tzdata*.tar.gz) $(wildcard $(top_srcdir)/tools/tzcode/tzdata*.tar.gz)) endif -# TODO: Make the TZDATA override part of Python buildtool +# TODO(ICU-20466): Make the TZDATA override part of Python buildtool ifneq ($(TZDATA),) TZCODE_DIR=$(top_builddir)/tools/tzcode @@ -362,14 +285,6 @@ $(ZONEINFO): $(TZDATA) # end of zoneinfo-generation endif -# The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu) -# are hardcoded in the common DLL and therefore not included in the data package any more. -# They are not built by default but need to be built for ICU4J data and for getting the .c source files -# when updating the Unicode data. -uni-core-data: build-dir $(UNI_CORE_TARGET_DATA) - @echo Unicode .icu files built to $(BUILDDIR) - echo timestamp > $@ - # Build the ICU4J icudata.jar. # Command line: # (Run this from the output data folder which may not be .../source/data in an out-of-source build.) @@ -385,19 +300,11 @@ ICU4J_TZDATA_FILES=zoneinfo64 metaZones timezoneTypes windowsZones ICU4J_DATA_DIRNAME=com/ibm/icu/impl/data/$(ICUDATA_BASENAME_VERSION)b ICU4J_TZDATA_PATHS=$(ICU4J_TZDATA_FILES:%="$(ICU4J_DATA_DIRNAME)/%.res") -# Targets for prebuilt Unicode data -$(BUILDDIR)/%.icu: $(SRCDATADIR)/in/%.icu | $(DIRS) - $(INVOKE) $(TOOLBINDIR)/icupkg -t$(ICUDATA_CHAR) $< $@ - -$(BUILDDIR)/nfc.nrm: $(SRCDATADIR)/in/nfc.nrm | $(DIRS) - $(INVOKE) $(TOOLBINDIR)/icupkg -t$(ICUDATA_CHAR) $< $@ - # generate icu4j-related data to $(OUTDIR)/icu4j/com/ibm/icu/impl/data/... -generate-data: build-dir packagedata $(OUTTMPDIR)/$(ICUDATA_PLATFORM_NAME).dat uni-core-data +generate-data: packagedata $(OUTTMPDIR)/$(ICUDATA_PLATFORM_NAME).dat mkdir -p $(OUTDIR)/icu4j/$(ICU4J_DATA_DIRNAME) mkdir -p $(OUTDIR)/icu4j/tzdata/$(ICU4J_DATA_DIRNAME) - echo $(UNI_CORE_DATA) > $(OUTDIR)/icu4j/add.txt - $(INVOKE) $(TOOLBINDIR)/icupkg $(OUTTMPDIR)/$(ICUDATA_PLATFORM_NAME).dat $(OUTDIR)/icu4j/$(ICUDATA_BASENAME_VERSION)b.dat -a $(OUTDIR)/icu4j/add.txt -s $(BUILDDIR) -x '*' -tb -d $(OUTDIR)/icu4j/$(ICU4J_DATA_DIRNAME) + $(INVOKE) $(TOOLBINDIR)/icupkg $(OUTTMPDIR)/$(ICUDATA_PLATFORM_NAME).dat $(OUTDIR)/icu4j/$(ICUDATA_BASENAME_VERSION)b.dat -s $(BUILDDIR) -x '*' -tb -d $(OUTDIR)/icu4j/$(ICU4J_DATA_DIRNAME) mv $(ICU4J_TZDATA_PATHS:%=$(OUTDIR)/icu4j/%) "$(OUTDIR)/icu4j/tzdata/$(ICU4J_DATA_DIRNAME)" $(OUTDIR)/icu4j/icutzdata.jar: generate-data @@ -408,6 +315,7 @@ $(OUTDIR)/icu4j/icutzdata.jar: generate-data # - swap the ICU data # - extract all data items # - package them into the .jar file +# TODO(ICU-20466): Move this to Python $(OUTDIR)/icu4j/icudata.jar: generate-data $(JAR) cf $(OUTDIR)/icu4j/icudata.jar -C $(OUTDIR)/icu4j $(ICU4J_DATA_DIRNAME)/ diff --git a/icu4c/source/data/buildtool/__main__.py b/icu4c/source/data/buildtool/__main__.py index 6bc2ef9ac7d..41435330355 100644 --- a/icu4c/source/data/buildtool/__main__.py +++ b/icu4c/source/data/buildtool/__main__.py @@ -84,6 +84,12 @@ flag_parser.add_argument( choices = ["unihan", "implicithan"], default = "unihan" ) +flag_parser.add_argument( + "--include_uni_core_data", + help = "Include the full Unicode core data in the dat file.", + default = False, + action = "store_true" +) flag_parser.add_argument( "--seqmode", help = "Whether to optimize rules to be run sequentially (fewer threads) or in parallel (many threads). Defaults to 'sequential', which is better for unix-exec and windows-exec modes. 'parallel' is often better for massively parallel build systems.", @@ -119,9 +125,13 @@ class Config(object): def __init__(self, args): # Process arguments self.max_parallel = (args.seqmode == "parallel") + # Either "unihan" or "implicithan" self.coll_han_type = args.collation_ucadata + # Boolean: Whether to include core Unicode data files in the .dat file + self.include_uni_core_data = args.include_uni_core_data + # Default fields before processing filter file self.filters_json_data = {} diff --git a/icu4c/source/data/icu4j-readme.txt b/icu4c/source/data/icu4j-readme.txt index 943d722dedf..00033adcfb3 100644 --- a/icu4c/source/data/icu4j-readme.txt +++ b/icu4c/source/data/icu4j-readme.txt @@ -17,9 +17,15 @@ In the following, $icu4j_root is the ICU4J root directory $jdk_bin is the JDK bin directory (for the jar tool) -1. Download and build ICU4C. For more instructions on downloading and building - ICU4C, see the ICU4C readme at: - http://source.icu-project.org/repos/icu/trunk/icu4c/readme.html#HowToBuild +1. Download, configure, and build ICU4C. When you configure ICU4C, you must + set the environment variable ICU_DATA_BUILDTOOL_OPTS to + "--include_uni_core_data" to build additional required ICU4J data: + + ICU_DATA_BUILDTOOL_OPTS=--include_uni_core_data ./runConfigureICU Linux + + For more instructions on downloading and building ICU4C, + see the ICU4C readme at: + https://htmlpreview.github.io/?https://github.com/unicode-org/icu/blob/master/icu4c/readme.html#HowToBuild (Windows: build as 'x86, Release' otherwise you will have to set 'CFG' differently below.) *NOTE* You should do a full rebuild after any data changes. diff --git a/icu4c/source/data/makedata.mak b/icu4c/source/data/makedata.mak index f9e85094432..9991d062f69 100644 --- a/icu4c/source/data/makedata.mak +++ b/icu4c/source/data/makedata.mak @@ -240,6 +240,8 @@ $(COREDATA_TS): --tool_cfg "$(CFG)" \ --out_dir "$(ICUBLD_PKG)" \ --tmp_dir "$(ICUTMP)" + --filter_file "$(ICU_DATA_FILTER_FILE)" \ + $(ICU_DATA_BUILDTOOL_OPTS) \ @echo "timestamp" > $(COREDATA_TS) -- 2.40.0