From: Shane F. Carr Date: Wed, 1 Sep 2021 18:30:14 +0000 (+0000) Subject: ICU-21545 Add icuwriteuprops tool X-Git-Tag: cldr/2021-09-15~15 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=92db25165fcbd8a7c616995f90f5d39a6babf10c;p=icu ICU-21545 Add icuwriteuprops tool See #1741 --- diff --git a/.gitignore b/.gitignore index 7322263265f..9d0654d4183 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ # Wildcards +*.1 *.8 *.a *.ao @@ -90,12 +91,8 @@ icu4c/source/samples/uresb/*.res icu4c/source/test-*.xml icu4c/source/test/hdrtst/ht_* icu4c/source/test/perf/howExpensiveIs/*.xml -icu4c/source/tools/gendict/*.1 -icu4c/source/tools/genrb/*.1 icu4c/source/tools/genrb/derb_* icu4c/source/tools/genren/urename.* -icu4c/source/tools/makeconv/*.1 -icu4c/source/tools/pkgdata/*.1 icu4j/*.jar icu4j/lib/*.jar tools/multi/packages/*.jar @@ -133,7 +130,6 @@ icu4c/source/config.log icu4c/source/config.status icu4c/source/config/Makefile.inc icu4c/source/config/icu-config -icu4c/source/config/icu-config.1 icu4c/source/config/icu.pc icu4c/source/config/icucross.inc icu4c/source/config/icucross.mk @@ -154,7 +150,6 @@ icu4c/source/extra/scrptrun/srtest.o icu4c/source/extra/scrptrun/srtest.exe icu4c/source/extra/uconv/pkgdatain.txt icu4c/source/extra/uconv/uconv -icu4c/source/extra/uconv/uconv.1 icu4c/source/extra/uconv/uconv.plg icu4c/source/extra/uconv/uconvmsg icu4c/source/i18n/i18n.res diff --git a/icu4c/source/allinone/allinone.sln b/icu4c/source/allinone/allinone.sln index fbbec359e48..858ad6ba9b4 100644 --- a/icu4c/source/allinone/allinone.sln +++ b/icu4c/source/allinone/allinone.sln @@ -106,6 +106,12 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pkgdata", "..\tools\pkgdata {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} EndProjectSection EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "icuwriteuprops", "..\tools\icuwriteuprops\icuwriteuprops.vcxproj", "{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}" + ProjectSection(ProjectDependencies) = postProject + {6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0} + {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} + EndProjectSection +EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "stubdata", "..\stubdata\stubdata.vcxproj", "{203EC78A-0531-43F0-A636-285439BDE025}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "toolutil", "..\tools\toolutil\toolutil.vcxproj", "{6B231032-3CB5-4EED-9210-810D666A23A0}" @@ -470,6 +476,22 @@ Global {4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|Win32.Build.0 = Release|Win32 {4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|x64.ActiveCfg = Release|x64 {4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|x64.Build.0 = Release|x64 + {C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Debug|ARM.ActiveCfg = Debug|ARM + {C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Debug|ARM.Build.0 = Debug|ARM + {C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Debug|ARM64.Build.0 = Debug|ARM64 + {C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Debug|Win32.ActiveCfg = Debug|Win32 + {C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Debug|Win32.Build.0 = Debug|Win32 + {C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Debug|x64.ActiveCfg = Debug|x64 + {C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Debug|x64.Build.0 = Debug|x64 + {C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Release|ARM.ActiveCfg = Release|ARM + {C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Release|ARM.Build.0 = Release|ARM + {C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Release|ARM64.ActiveCfg = Release|ARM64 + {C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Release|ARM64.Build.0 = Release|ARM64 + {C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Release|Win32.ActiveCfg = Release|Win32 + {C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Release|Win32.Build.0 = Release|Win32 + {C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Release|x64.ActiveCfg = Release|x64 + {C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Release|x64.Build.0 = Release|x64 {203EC78A-0531-43F0-A636-285439BDE025}.Debug|ARM.ActiveCfg = Debug|ARM {203EC78A-0531-43F0-A636-285439BDE025}.Debug|ARM.Build.0 = Debug|ARM {203EC78A-0531-43F0-A636-285439BDE025}.Debug|ARM64.ActiveCfg = Debug|ARM64 diff --git a/icu4c/source/configure b/icu4c/source/configure index 86cd5b9ef92..9f8d19ec5b4 100755 --- a/icu4c/source/configure +++ b/icu4c/source/configure @@ -757,6 +757,7 @@ infodir docdir oldincludedir includedir +runstatedir localstatedir sharedstatedir sysconfdir @@ -862,6 +863,7 @@ datadir='${datarootdir}' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' +runstatedir='${localstatedir}/run' includedir='${prefix}/include' oldincludedir='/usr/include' docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' @@ -1114,6 +1116,15 @@ do | -silent | --silent | --silen | --sile | --sil) silent=yes ;; + -runstatedir | --runstatedir | --runstatedi | --runstated \ + | --runstate | --runstat | --runsta | --runst | --runs \ + | --run | --ru | --r) + ac_prev=runstatedir ;; + -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \ + | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \ + | --run=* | --ru=* | --r=*) + runstatedir=$ac_optarg ;; + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ @@ -1251,7 +1262,7 @@ fi for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ datadir sysconfdir sharedstatedir localstatedir includedir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ - libdir localedir mandir + libdir localedir mandir runstatedir do eval ac_val=\$$ac_var # Remove trailing slashes. @@ -1404,6 +1415,7 @@ Fine tuning of the installation directories: --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] @@ -8049,7 +8061,7 @@ echo "CXXFLAGS=$CXXFLAGS" # output the Makefiles -ac_config_files="$ac_config_files icudefs.mk Makefile data/pkgdataMakefile config/Makefile.inc config/icu.pc config/pkgdataMakefile data/Makefile stubdata/Makefile common/Makefile i18n/Makefile layoutex/Makefile io/Makefile extra/Makefile extra/uconv/Makefile extra/uconv/pkgdataMakefile extra/scrptrun/Makefile tools/Makefile tools/ctestfw/Makefile tools/toolutil/Makefile tools/makeconv/Makefile tools/genrb/Makefile tools/genccode/Makefile tools/gencmn/Makefile tools/gencnval/Makefile tools/gendict/Makefile tools/gentest/Makefile tools/gennorm2/Makefile tools/genbrk/Makefile tools/gensprep/Makefile tools/icuinfo/Makefile tools/icupkg/Makefile tools/icuswap/Makefile tools/pkgdata/Makefile tools/tzcode/Makefile tools/gencfu/Makefile tools/escapesrc/Makefile test/Makefile test/compat/Makefile test/testdata/Makefile test/testdata/pkgdataMakefile test/hdrtst/Makefile test/intltest/Makefile test/cintltst/Makefile test/iotest/Makefile test/letest/Makefile test/perf/Makefile test/perf/collationperf/Makefile test/perf/collperf/Makefile test/perf/collperf2/Makefile test/perf/dicttrieperf/Makefile test/perf/ubrkperf/Makefile test/perf/charperf/Makefile test/perf/convperf/Makefile test/perf/localecanperf/Makefile test/perf/normperf/Makefile test/perf/DateFmtPerf/Makefile test/perf/howExpensiveIs/Makefile test/perf/strsrchperf/Makefile test/perf/unisetperf/Makefile test/perf/usetperf/Makefile test/perf/ustrperf/Makefile test/perf/utfperf/Makefile test/perf/utrie2perf/Makefile test/perf/leperf/Makefile test/fuzzer/Makefile samples/Makefile samples/date/Makefile samples/cal/Makefile samples/layout/Makefile" +ac_config_files="$ac_config_files icudefs.mk Makefile data/pkgdataMakefile config/Makefile.inc config/icu.pc config/pkgdataMakefile data/Makefile stubdata/Makefile common/Makefile i18n/Makefile layoutex/Makefile io/Makefile extra/Makefile extra/uconv/Makefile extra/uconv/pkgdataMakefile extra/scrptrun/Makefile tools/Makefile tools/ctestfw/Makefile tools/toolutil/Makefile tools/makeconv/Makefile tools/genrb/Makefile tools/genccode/Makefile tools/gencmn/Makefile tools/gencnval/Makefile tools/gendict/Makefile tools/gentest/Makefile tools/gennorm2/Makefile tools/genbrk/Makefile tools/gensprep/Makefile tools/icuinfo/Makefile tools/icupkg/Makefile tools/icuswap/Makefile tools/pkgdata/Makefile tools/icuwriteuprops/Makefile tools/tzcode/Makefile tools/gencfu/Makefile tools/escapesrc/Makefile test/Makefile test/compat/Makefile test/testdata/Makefile test/testdata/pkgdataMakefile test/hdrtst/Makefile test/intltest/Makefile test/cintltst/Makefile test/iotest/Makefile test/letest/Makefile test/perf/Makefile test/perf/collationperf/Makefile test/perf/collperf/Makefile test/perf/collperf2/Makefile test/perf/dicttrieperf/Makefile test/perf/ubrkperf/Makefile test/perf/charperf/Makefile test/perf/convperf/Makefile test/perf/localecanperf/Makefile test/perf/normperf/Makefile test/perf/DateFmtPerf/Makefile test/perf/howExpensiveIs/Makefile test/perf/strsrchperf/Makefile test/perf/unisetperf/Makefile test/perf/usetperf/Makefile test/perf/ustrperf/Makefile test/perf/utfperf/Makefile test/perf/utrie2perf/Makefile test/perf/leperf/Makefile test/fuzzer/Makefile samples/Makefile samples/date/Makefile samples/cal/Makefile samples/layout/Makefile" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure @@ -8792,6 +8804,7 @@ do "tools/icupkg/Makefile") CONFIG_FILES="$CONFIG_FILES tools/icupkg/Makefile" ;; "tools/icuswap/Makefile") CONFIG_FILES="$CONFIG_FILES tools/icuswap/Makefile" ;; "tools/pkgdata/Makefile") CONFIG_FILES="$CONFIG_FILES tools/pkgdata/Makefile" ;; + "tools/icuwriteuprops/Makefile") CONFIG_FILES="$CONFIG_FILES tools/icuwriteuprops/Makefile" ;; "tools/tzcode/Makefile") CONFIG_FILES="$CONFIG_FILES tools/tzcode/Makefile" ;; "tools/gencfu/Makefile") CONFIG_FILES="$CONFIG_FILES tools/gencfu/Makefile" ;; "tools/escapesrc/Makefile") CONFIG_FILES="$CONFIG_FILES tools/escapesrc/Makefile" ;; diff --git a/icu4c/source/configure.ac b/icu4c/source/configure.ac index 525dd126e9f..decc0684ca2 100644 --- a/icu4c/source/configure.ac +++ b/icu4c/source/configure.ac @@ -1389,6 +1389,7 @@ AC_CONFIG_FILES([icudefs.mk \ tools/icupkg/Makefile \ tools/icuswap/Makefile \ tools/pkgdata/Makefile \ + tools/icuwriteuprops/Makefile \ tools/tzcode/Makefile \ tools/gencfu/Makefile \ tools/escapesrc/Makefile \ diff --git a/icu4c/source/test/cintltst/ucptrietest.c b/icu4c/source/test/cintltst/ucptrietest.c index ee07e25da67..af578f7a1a5 100644 --- a/icu4c/source/test/cintltst/ucptrietest.c +++ b/icu4c/source/test/cintltst/ucptrietest.c @@ -726,6 +726,7 @@ trieTestGolden(const char *testName, usrc_writeCopyrightHeader(stream, "#", 2021); usrc_writeFileNameGeneratedBy(stream, "#", testName, "ucptrietest.c"); fputs("[code_point_trie.struct]\n", stream); + fprintf(stream, "name = \"%s\"\n", testName); usrc_writeUCPTrie(stream, testName, trie, UPRV_TARGET_SYNTAX_TOML); fputs("\n[code_point_trie.testdata]\n", stream); fputs("# Array of (limit, value) pairs\n", stream); diff --git a/icu4c/source/tools/Makefile.in b/icu4c/source/tools/Makefile.in index c3f81d6a2c1..74f60d4acef 100644 --- a/icu4c/source/tools/Makefile.in +++ b/icu4c/source/tools/Makefile.in @@ -17,7 +17,7 @@ subdir = tools SUBDIRS = toolutil ctestfw makeconv genrb genbrk \ gencnval gensprep icuinfo genccode gencmn icupkg pkgdata \ -gentest gennorm2 gencfu gendict +gentest gennorm2 gencfu gendict icuwriteuprops ifneq (@platform_make_fragment_name@,mh-cygwin-msvc) SUBDIRS += escapesrc diff --git a/icu4c/source/tools/icuwriteuprops/Makefile.in b/icu4c/source/tools/icuwriteuprops/Makefile.in new file mode 100644 index 00000000000..d4f5fa270de --- /dev/null +++ b/icu4c/source/tools/icuwriteuprops/Makefile.in @@ -0,0 +1,94 @@ +## Makefile.in for ICU - tools/icuwriteuprops +## Copyright (C) 2021 and later: Unicode, Inc. and others. +## License & terms of use: http://www.unicode.org/copyright.html + +## Source directory information +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ + +top_builddir = ../.. + +include $(top_builddir)/icudefs.mk + +## Build directory information +subdir = tools/icuwriteuprops + +TARGET_STUB_NAME = icuwriteuprops + +SECTION = 1 + +MAN_FILES = $(TARGET_STUB_NAME).$(SECTION) + +## Extra files to remove for 'make clean' +CLEANFILES = *~ $(DEPS) $(MAN_FILES) + +## Target information +TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT) + +CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(srcdir)/../toolutil +LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M) + +SOURCES = $(shell cat $(srcdir)/sources.txt) +OBJECTS = $(patsubst %.cpp,%.o,$(patsubst %.c,%.o, $(SOURCES))) + +DEPS = $(OBJECTS:.o=.d) + +## List of phony targets +.PHONY : all all-local install install-local clean clean-local \ +distclean distclean-local dist dist-local check check-local install-man + +## Clear suffix list +.SUFFIXES : + +## List of standard targets +all: all-local +install: install-local +clean: clean-local +distclean : distclean-local +dist: dist-local +check: all check-local + +all-local: $(TARGET) $(MAN_FILES) + +install-local: all-local install-man + $(MKINSTALLDIRS) $(DESTDIR)$(bindir) + $(INSTALL) $(TARGET) $(DESTDIR)$(bindir) + +install-man: $(MAN_FILES) + $(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION) + $(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION) + + +dist-local: + +clean-local: + test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES) + $(RMV) $(TARGET) $(OBJECTS) + +distclean-local: clean-local + $(RMV) Makefile + +check-local: all-local + +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) \ + && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status + +$(TARGET) : $(OBJECTS) + $(LINK.cc) $(OUTOPT)$@ $^ $(LIBS) + $(POST_BUILD_STEP) + + +%.$(SECTION): $(srcdir)/%.$(SECTION).in + cd $(top_builddir) \ + && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status + + +ifeq (,$(MAKECMDGOALS)) +-include $(DEPS) +else +ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),) +-include $(DEPS) +endif +endif + diff --git a/icu4c/source/tools/icuwriteuprops/icuwriteuprops.1.in b/icu4c/source/tools/icuwriteuprops/icuwriteuprops.1.in new file mode 100644 index 00000000000..5f0e405ee88 --- /dev/null +++ b/icu4c/source/tools/icuwriteuprops/icuwriteuprops.1.in @@ -0,0 +1,13 @@ +.\" Hey, Emacs! This is -*-nroff-*- you know... +.\" +.\" icuwriteuprops.1: manual page for the icuwriteuprops utility +.\" +.\" Copyright (C) 2016 and later: Unicode, Inc. and others. +.\" License & terms of use: http://www.unicode.org/copyright.html +.\" +.\" Manual page by Shane Carr . +.\" +.TH MAKECONV 1 "12 June 2021" "ICU MANPAGE" "ICU @VERSION@ Manual" +.SH NAME +.B icuwriteuprops +\- Writes text files with Unicode properties data from ICU. diff --git a/icu4c/source/tools/icuwriteuprops/icuwriteuprops.cpp b/icu4c/source/tools/icuwriteuprops/icuwriteuprops.cpp new file mode 100644 index 00000000000..9a74efaf0f5 --- /dev/null +++ b/icu4c/source/tools/icuwriteuprops/icuwriteuprops.cpp @@ -0,0 +1,208 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include +#include "toolutil.h" +#include "uoptions.h" +#include "cmemory.h" +#include "charstr.h" +#include "cstring.h" +#include "unicode/uchar.h" +#include "unicode/errorcode.h" +#include "unicode/uniset.h" +#include "unicode/putil.h" +#include "unicode/umutablecptrie.h" +#include "writesrc.h" + +U_NAMESPACE_USE + +/* + * Global - verbosity + */ +UBool VERBOSE = FALSE; +UBool QUIET = FALSE; + +UBool haveCopyright = TRUE; +UCPTrieType trieType = UCPTRIE_TYPE_SMALL; + +void handleError(ErrorCode& status, const char* context) { + if (status.isFailure()) { + std::cerr << "Error: " << context << ": " << status.errorName() << std::endl; + exit(status.reset()); + } +} + +void dumpBinaryProperty(UProperty uproperty, FILE* f) { + IcuToolErrorCode status("icuwriteuprops: dumpBinaryProperty"); + const char* fullPropName = u_getPropertyName(uproperty, U_LONG_PROPERTY_NAME); + const char* shortPropName = u_getPropertyName(uproperty, U_SHORT_PROPERTY_NAME); + const USet* uset = u_getBinaryPropertySet(uproperty, status); + handleError(status, fullPropName); + + fputs("[[binary_property]]\n", f); + fprintf(f, "long_name = \"%s\"\n", fullPropName); + fprintf(f, "short_name = \"%s\"\n", shortPropName); + usrc_writeUnicodeSet(f, uset, UPRV_TARGET_SYNTAX_TOML); +} + +void dumpEnumeratedProperty(UProperty uproperty, FILE* f) { + IcuToolErrorCode status("icuwriteuprops: dumpEnumeratedProperty"); + const char* fullPropName = u_getPropertyName(uproperty, U_LONG_PROPERTY_NAME); + const char* shortPropName = u_getPropertyName(uproperty, U_SHORT_PROPERTY_NAME); + const UCPMap* umap = u_getIntPropertyMap(uproperty, status); + handleError(status, fullPropName); + + fputs("[[enum_property]]\n", f); + fprintf(f, "long_name = \"%s\"\n", fullPropName); + fprintf(f, "short_name = \"%s\"\n", shortPropName); + usrc_writeUCPMap(f, umap, uproperty, UPRV_TARGET_SYNTAX_TOML); + fputs("\n", f); + + U_ASSERT(u_getIntPropertyMinValue(uproperty) >= 0); + int32_t maxValue = u_getIntPropertyMaxValue(uproperty); + U_ASSERT(maxValue >= 0); + UCPTrieValueWidth width = UCPTRIE_VALUE_BITS_32; + if (maxValue <= 0xff) { + width = UCPTRIE_VALUE_BITS_8; + } else if (maxValue <= 0xffff) { + width = UCPTRIE_VALUE_BITS_16; + } + LocalUMutableCPTriePointer builder(umutablecptrie_fromUCPMap(umap, status)); + LocalUCPTriePointer utrie(umutablecptrie_buildImmutable( + builder.getAlias(), + trieType, + width, + status)); + handleError(status, fullPropName); + + fputs("[enum_property.code_point_trie]\n", f); + usrc_writeUCPTrie(f, shortPropName, utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML); +} + +enum { + OPT_HELP_H, + OPT_HELP_QUESTION_MARK, + OPT_COPYRIGHT, + OPT_TRIE_TYPE, + OPT_VERSION, + OPT_DESTDIR, + OPT_VERBOSE, + OPT_QUIET, + + OPT_COUNT +}; + +#define UOPTION_TRIE_TYPE UOPTION_DEF("trie-type", 't', UOPT_REQUIRES_ARG) + +static UOption options[]={ + UOPTION_HELP_H, + UOPTION_HELP_QUESTION_MARK, + UOPTION_COPYRIGHT, + UOPTION_TRIE_TYPE, + UOPTION_VERSION, + UOPTION_DESTDIR, + UOPTION_VERBOSE, + UOPTION_QUIET, +}; + +int main(int argc, char* argv[]) { + + U_MAIN_INIT_ARGS(argc, argv); + + /* preset then read command line options */ + options[OPT_DESTDIR].value=u_getDataDirectory(); + argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); + + if(options[OPT_VERSION].doesOccur) { + printf("icuwriteuprops version %s, ICU tool to write Unicode property .toml files\n", + U_ICU_DATA_VERSION); + printf("%s\n", U_COPYRIGHT_STRING); + exit(0); + } + + /* error handling, printing usage message */ + if(argc<0) { + fprintf(stderr, + "error in command line argument \"%s\"\n", + argv[-argc]); + } else if(argc<2) { + argc=-1; + } + + if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) { + FILE *stdfile=argc<0 ? stderr : stdout; + fprintf(stdfile, + "usage: %s [-options] properties...\n" + "\tdump Unicode property data to .toml files\n" + "options:\n" + "\t-h or -? or --help this usage text\n" + "\t-V or --version show a version message\n" + "\t-c or --copyright include a copyright notice\n" + "\t-t or --trie-type set the trie type (small or fast, default small)\n" + "\t-d or --destdir destination directory, followed by the path\n" + "\t-v or --verbose Turn on verbose output\n" + "\t-q or --quiet do not display warnings and progress\n", + argv[0]); + return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; + } + + /* get the options values */ + haveCopyright = options[OPT_COPYRIGHT].doesOccur; + const char *destdir = options[OPT_DESTDIR].value; + VERBOSE = options[OPT_VERBOSE].doesOccur; + QUIET = options[OPT_QUIET].doesOccur; + + if (options[OPT_TRIE_TYPE].doesOccur) { + if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "fast") == 0) { + trieType = UCPTRIE_TYPE_FAST; + } else if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "small") == 0) { + trieType = UCPTRIE_TYPE_SMALL; + } else { + fprintf(stderr, "Invalid option for --trie-type (must be small or fast)\n"); + return U_ILLEGAL_ARGUMENT_ERROR; + } + } + + for (int i=1; i + + + {C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F} + + + Application + false + MultiByte + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + .\$(Platform)\$(Configuration)\ + .\$(Platform)\$(Configuration)\ + + .\x86\$(Configuration)\ + .\x86\$(Configuration)\ + + true + false + + + + + $(OutDir)/icuwriteuprops.tlb + + + Level3 + Default + false + ..\..\..\include;..\..\common;..\toolutil;%(AdditionalIncludeDirectories) + $(OutDir)/icuwriteuprops.pch + $(OutDir)/ + $(OutDir)/ + $(OutDir)/icuwriteuprops.pdb + + + Console + $(OutDir)/icuwriteuprops.exe + ..\..\..\$(IcuLibOutputDir);%(AdditionalLibraryDirectories) + + + copy "$(TargetPath)" ..\..\..\$(IcuBinOutputDir) + ..\..\..\$(IcuBinOutputDir)\$(TargetFileName);%(Outputs) + + + + + + true + MultiThreadedDebugDLL + + + icuucd.lib;icutud.lib;%(AdditionalDependencies) + + + + + + MultiThreadedDLL + true + + + icuuc.lib;icutu.lib;%(AdditionalDependencies) + + + + + + + + + \ No newline at end of file diff --git a/icu4c/source/tools/icuwriteuprops/icuwriteuprops.vcxproj.filters b/icu4c/source/tools/icuwriteuprops/icuwriteuprops.vcxproj.filters new file mode 100644 index 00000000000..966815c2540 --- /dev/null +++ b/icu4c/source/tools/icuwriteuprops/icuwriteuprops.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {7641b9da-f313-4ee0-8c60-2c8050c87e45} + cpp;c;cxx;rc;def;r;odl;idl;hpj;bat + + + {0333a61f-f79b-490c-9761-a4e5966f3ff0} + h;hpp;hxx;hm;inl + + + {05869d75-29f4-43d9-bebc-9973e550d958} + ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe + + + + + Source Files + + + \ No newline at end of file diff --git a/icu4c/source/tools/icuwriteuprops/sources.txt b/icu4c/source/tools/icuwriteuprops/sources.txt new file mode 100644 index 00000000000..bce81176acf --- /dev/null +++ b/icu4c/source/tools/icuwriteuprops/sources.txt @@ -0,0 +1 @@ +icuwriteuprops.cpp diff --git a/icu4c/source/tools/makeconv/makeconv.cpp b/icu4c/source/tools/makeconv/makeconv.cpp index 37dc46203f1..1e9209d2bca 100644 --- a/icu4c/source/tools/makeconv/makeconv.cpp +++ b/icu4c/source/tools/makeconv/makeconv.cpp @@ -213,6 +213,13 @@ int main(int argc, char* argv[]) options[OPT_DESTDIR].value=u_getDataDirectory(); argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); + if(options[OPT_VERSION].doesOccur) { + printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n", + dataInfo.formatVersion[0], dataInfo.formatVersion[1]); + printf("%s\n", U_COPYRIGHT_STRING); + exit(0); + } + /* error handling, printing usage message */ if(argc<0) { fprintf(stderr, @@ -244,13 +251,6 @@ int main(int argc, char* argv[]) return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } - if(options[OPT_VERSION].doesOccur) { - printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n", - dataInfo.formatVersion[0], dataInfo.formatVersion[1]); - printf("%s\n", U_COPYRIGHT_STRING); - exit(0); - } - /* get the options values */ haveCopyright = options[OPT_COPYRIGHT].doesOccur; const char *destdir = options[OPT_DESTDIR].value; diff --git a/icu4c/source/tools/toolutil/writesrc.cpp b/icu4c/source/tools/toolutil/writesrc.cpp index 8f515e5794b..85b5fdb69d9 100644 --- a/icu4c/source/tools/toolutil/writesrc.cpp +++ b/icu4c/source/tools/toolutil/writesrc.cpp @@ -23,9 +23,16 @@ #include "unicode/utypes.h" #include "unicode/putil.h" #include "unicode/ucptrie.h" +#include "unicode/errorcode.h" +#include "unicode/uniset.h" +#include "unicode/usetiter.h" +#include "unicode/utf16.h" #include "utrie2.h" #include "cstring.h" #include "writesrc.h" +#include "util.h" + +U_NAMESPACE_USE static FILE * usrc_createWithoutHeader(const char *path, const char *filename) { @@ -328,7 +335,6 @@ usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie, UTargetSyntax sprintf(line3, "\n};\n\n"); break; case UPRV_TARGET_SYNTAX_TOML: - fprintf(f, "name = \"%s\"\n", name); sprintf(line, "index = [\n "); sprintf(line2, "data_%d = [\n ", (int)width); sprintf(line3, "\n]\n"); @@ -357,6 +363,67 @@ usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie, UTargetSyntax usrc_writeUCPTrieStruct(f, line, pTrie, line2, line3, line4, syntax); } +U_CAPI void U_EXPORT2 +usrc_writeUnicodeSet( + FILE *f, + const USet *pSet, + UTargetSyntax syntax) { + // ccode is not yet supported + U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML); + + // Write out a list of ranges + const UnicodeSet* set = UnicodeSet::fromUSet(pSet); + UnicodeSetIterator it(*set); + fprintf(f, "# Inclusive ranges of the code points in the set.\n"); + fprintf(f, "ranges = [\n"); + bool seenFirstString = false; + while (it.nextRange()) { + if (it.isString()) { + if (!seenFirstString) { + seenFirstString = true; + fprintf(f, "]\nstrings = [\n"); + } + const UnicodeString& str = it.getString(); + fprintf(f, " "); + usrc_writeStringAsASCII(f, str.getBuffer(), str.length(), syntax); + fprintf(f, ",\n"); + } else { + U_ASSERT(!seenFirstString); + UChar32 start = it.getCodepoint(); + UChar32 end = it.getCodepointEnd(); + fprintf(f, " [0x%x, 0x%x],\n", start, end); + } + } + fprintf(f, "]\n"); +} + +U_CAPI void U_EXPORT2 +usrc_writeUCPMap( + FILE *f, + const UCPMap *pMap, + UProperty uproperty, + UTargetSyntax syntax) { + // ccode is not yet supported + U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML); + (void) syntax; // silence unused variable errors + + // Print out list of ranges + UChar32 start = 0, end; + uint32_t value; + fprintf(f, "# Code points `a` through `b` have value `v`, corresponding to `name`.\n"); + fprintf(f, "ranges = [\n"); + while ((end = ucpmap_getRange(pMap, start, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &value)) >= 0) { + if (uproperty != UCHAR_INVALID_CODE) { + const char* short_name = u_getPropertyValueName(uproperty, value, U_SHORT_PROPERTY_NAME); + fprintf(f, " {a=0x%x, b=0x%x, v=%u, name=\"%s\"},\n", start, end, value, short_name); + } else { + fprintf(f, " {a=0x%x, b=0x%x, v=%u},\n", start, end, value); + } + start = end + 1; + } + fprintf(f, "]\n"); +} + U_CAPI void U_EXPORT2 usrc_writeArrayOfMostlyInvChars(FILE *f, const char *prefix, @@ -395,3 +462,30 @@ usrc_writeArrayOfMostlyInvChars(FILE *f, fputs(postfix, f); } } + +U_CAPI void U_EXPORT2 +usrc_writeStringAsASCII(FILE *f, + const UChar* ptr, int32_t length, + UTargetSyntax) { + // For now, assume all UTargetSyntax values are valid here. + fprintf(f, "\""); + int32_t i = 0; + UChar32 cp; + while (i < length) { + U16_NEXT(ptr, i, length, cp); + if (cp == u'"') { + fprintf(f, "\\\""); + } else if (ICU_Utility::isUnprintable(cp)) { + UnicodeString u16result; + ICU_Utility::escapeUnprintable(u16result, cp); + std::string u8result; + u16result.toUTF8String(u8result); + fprintf(f, "%s", u8result.data()); + } else { + U_ASSERT(cp < 0x80); + char s[2] = {static_cast(cp), 0}; + fprintf(f, "%s", s); + } + } + fprintf(f, "\""); +} diff --git a/icu4c/source/tools/toolutil/writesrc.h b/icu4c/source/tools/toolutil/writesrc.h index 5d38ad00586..25377af25c0 100644 --- a/icu4c/source/tools/toolutil/writesrc.h +++ b/icu4c/source/tools/toolutil/writesrc.h @@ -23,7 +23,10 @@ #include #include "unicode/utypes.h" +#include "unicode/ucpmap.h" #include "unicode/ucptrie.h" +#include "unicode/umutablecptrie.h" +#include "unicode/uset.h" #include "utrie2.h" /** @@ -131,6 +134,28 @@ usrc_writeUCPTrieStruct(FILE *f, U_CAPI void U_EXPORT2 usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie, UTargetSyntax syntax); +/** + * Writes the UnicodeSet range and string lists. + */ +U_CAPI void U_EXPORT2 +usrc_writeUnicodeSet( + FILE *f, + const USet *pSet, + UTargetSyntax syntax); + +/** + * Writes the UCPMap ranges list. + * + * The "uproperty" argument is optional; ignored if UCHAR_INVALID_CODE. If present, it will be used + * to look up the property value name strings. + */ +U_CAPI void U_EXPORT2 +usrc_writeUCPMap( + FILE *f, + const UCPMap *pMap, + UProperty uproperty, + UTargetSyntax syntax); + /** * Writes the contents of an array of mostly invariant characters. * Characters 0..0x1f are printed as numbers, @@ -147,4 +172,13 @@ usrc_writeArrayOfMostlyInvChars(FILE *f, const char *p, int32_t length, const char *postfix); +/** + * Writes a syntactically valid Unicode string in all ASCII, escaping quotes + * and non-ASCII characters. + */ +U_CAPI void U_EXPORT2 +usrc_writeStringAsASCII(FILE *f, + const UChar* ptr, int32_t length, + UTargetSyntax syntax); + #endif