From d3fce873c64124fee5927ba80ea0585da163f1ce Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Tue, 10 Jan 2012 23:00:05 +0000 Subject: [PATCH] ICU-8972 generate norm2/nfkc_cf.txt from preparseucd.py X-SVN-Rev: 31198 --- icu4c/source/data/unidata/changes.txt | 3 ++ icu4c/source/data/unidata/norm2/nfkc_cf.txt | 48 ++++++--------------- 2 files changed, 15 insertions(+), 36 deletions(-) diff --git a/icu4c/source/data/unidata/changes.txt b/icu4c/source/data/unidata/changes.txt index 8fbee006a8c..63f5bb9a603 100644 --- a/icu4c/source/data/unidata/changes.txt +++ b/icu4c/source/data/unidata/changes.txt @@ -29,6 +29,9 @@ Tools simplified since the Unicode 6.1 update. See * generate core properties data files (makeprops.sh was deleted) - ~/svn.icu/tools/trunk/dbg/unicode$ c/genprops/genprops ~/svn.icu/trunk/src +* no more manual updates of source/data/unidata/norm2/nfkc_cf.txt +- it is now generated by preparseucd.py + * generate normalization data files - ~/svn.icu/trunk/dbg$ export LD_LIBRARY_PATH=~/svn.icu/trunk/dbg/lib - ~/svn.icu/trunk/dbg$ SRC_DATA_IN=~/svn.icu/trunk/src/source/data/in diff --git a/icu4c/source/data/unidata/norm2/nfkc_cf.txt b/icu4c/source/data/unidata/norm2/nfkc_cf.txt index c481fc3f3d8..7b7981ac14e 100644 --- a/icu4c/source/data/unidata/norm2/nfkc_cf.txt +++ b/icu4c/source/data/unidata/norm2/nfkc_cf.txt @@ -1,32 +1,18 @@ -# Extracted from: -# DerivedNormalizationProps-6.0.0.txt -# Date: 2010-05-20, 15:14:12 GMT [MD] -# # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ - -# ================================================ -# This file has been reformatted into syntax for the -# gennorm2 Normalizer2 data generator tool. -# Only the NFKC_CF mappings are retained and reformatted. -# Reformatting via regular expression: s/ *; NFKC_CF; */>/ +# +# file name: nfkc_cf.txt +# +# machine-generated by ICU preparseucd.py +# +# This file contains the Unicode NFKC_CF mappings, +# extracted from the UCD file DerivedNormalizationProps.txt, +# and reformatted into syntax for the gennorm2 Normalizer2 data generator tool. # Use this file as the second gennorm2 input file after nfkc.txt. -# ================================================ - -# Derived Property: NFKC_Casefold (NFKC_CF) -# This property removes certain variations from characters: case, compatibility, and default-ignorables. -# It is used for loose matching and certain types of identifiers. -# It is constructed by applying NFKC, CaseFolding, and removal of Default_Ignorable_Code_Points. -# The process of applying these transformations is repeated until a stable result is produced. -# WARNING: Application to STRINGS must apply NFC after mapping each character, because characters may interact. -# For more information, see [http://www.unicode.org/reports/tr44/] -# Omitted code points are unchanged by this mapping. -# @missing: 0000..10FFFF; NFKC_CF; -# All code points not explicitly listed for NFKC_Casefold -# have the value . +* Unicode 6.1.0 0041>0061 0042>0062 @@ -1043,9 +1029,7 @@ 2049>0021 003F 2057>2032 2032 2032 2032 205F>0020 -2060..2064> -2065..2069> -206A..206F> +2060..206F> 2070>0030 2071>0069 2074>0034 @@ -5569,12 +5553,4 @@ FFF0..FFF8> 2FA1B>9F16 2FA1C>9F3B 2FA1D>2A600 -E0000> -E0001> -E0002..E001F> -E0020..E007F> -E0080..E00FF> -E0100..E01EF> -E01F0..E0FFF> - -# Total code points: 9944 +E0000..E0FFF> -- 2.40.0