ICU-8972 gennorm2 parse * Unicode version lines from .txt input files

author Markus Scherer <markus.icu@gmail.com>

Tue, 10 Jan 2012 21:04:58 +0000 (21:04 +0000)

committer Markus Scherer <markus.icu@gmail.com>

Tue, 10 Jan 2012 21:04:58 +0000 (21:04 +0000)
author Markus Scherer <markus.icu@gmail.com>
Tue, 10 Jan 2012 21:04:58 +0000 (21:04 +0000)
committer Markus Scherer <markus.icu@gmail.com>
Tue, 10 Jan 2012 21:04:58 +0000 (21:04 +0000)
diff --git a/icu4c/source/data/unidata/norm2/nfc.txt b/icu4c/source/data/unidata/norm2/nfc.txt

index 49ff6dd34f7b2adc4e64d6fab8944f0fddf0ec4f..162b97511c31b04e41cd166967be7b3111e89ab8 100644 (file)
--- a/icu4c/source/data/unidata/norm2/nfc.txt
+++ b/icu4c/source/data/unidata/norm2/nfc.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 1999-2011, International Business Machines
+# Copyright (C) 1999-2012, International Business Machines
  # Corporation and others.  All Rights Reserved.
  #
  # file name: nfc.txt
@@ -6,7 +6,8 @@
  # machine-generated by ICU preparseucd.py
  #
  # Complete data for Unicode NFC normalization.
-# Unicode 6.1.0
+
+* Unicode 6.1.0
  
  # Canonical_Combining_Class (ccc) values
  0300..0314:230
diff --git a/icu4c/source/data/unidata/norm2/nfkc.txt b/icu4c/source/data/unidata/norm2/nfkc.txt

index 0883cdc33822486ed0de2071511205184ea5a727..0a9b8c857318ee06e60392263ae6703509809381 100644 (file)
--- a/icu4c/source/data/unidata/norm2/nfkc.txt
+++ b/icu4c/source/data/unidata/norm2/nfkc.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 1999-2011, International Business Machines
+# Copyright (C) 1999-2012, International Business Machines
  # Corporation and others.  All Rights Reserved.
  #
  # file name: nfkc.txt
@@ -6,7 +6,8 @@
  # machine-generated by ICU preparseucd.py
  #
  # Complete data for Unicode NFKC normalization.
-# Unicode 6.1.0
+
+* Unicode 6.1.0
  
  # Canonical_Combining_Class (ccc) values
  0300..0314:230
diff --git a/icu4c/source/tools/gennorm2/gennorm2.cpp b/icu4c/source/tools/gennorm2/gennorm2.cpp

index f0d981ec53b46b77dfac45f9a65e61a3b25a7a81..597300da24e1c2e770f729d15da6dbcc84928e0c 100644 (file)
--- a/icu4c/source/tools/gennorm2/gennorm2.cpp
+++ b/icu4c/source/tools/gennorm2/gennorm2.cpp
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2009-2010, International Business Machines
+*   Copyright (C) 2009-2012, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -80,7 +80,6 @@ main(int argc, char* argv[]) {
  
      /* preset then read command line options */
      options[SOURCEDIR].value="";
-    options[UNICODE_VERSION].value=U_UNICODE_VERSION;
      argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[HELP_H]), options);
  
      /* error handling, printing usage message */
@@ -145,7 +144,9 @@ main(int argc, char* argv[]) {
      LocalPointer<Normalizer2DataBuilder> builder(new Normalizer2DataBuilder(errorCode));
      errorCode.assertSuccess();
  
-    builder->setUnicodeVersion(options[UNICODE_VERSION].value);
+    if(options[UNICODE_VERSION].doesOccur) {
+        builder->setUnicodeVersion(options[UNICODE_VERSION].value);
+    }
  
      if(options[OPT_FAST].doesOccur) {
          builder->setOptimization(Normalizer2DataBuilder::OPTIMIZE_FAST);
@@ -198,6 +199,11 @@ void parseFile(FILE *f, Normalizer2DataBuilder &builder) {
              continue;  // skip empty and comment-only lines
          }
          if(line[0]=='*') {
+            const char *s=u_skipWhitespace(line+1);
+            if(0==strncmp(s, "Unicode", 7)) {
+                s=u_skipWhitespace(s+7);
+                builder.setUnicodeVersion(s);
+            }
              continue;  // reserved syntax
          }
          const char *delimiter;
diff --git a/icu4c/source/tools/gennorm2/n2builder.cpp b/icu4c/source/tools/gennorm2/n2builder.cpp

index 5a1b8bddf087904fbb7452e3d46b6f1a50b5d3e8..843b1f703572f6075a8966e1c861090df77d8a33 100644 (file)
--- a/icu4c/source/tools/gennorm2/n2builder.cpp
+++ b/icu4c/source/tools/gennorm2/n2builder.cpp
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2009-2011, International Business Machines
+*   Copyright (C) 2009-2012, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -193,7 +193,19 @@ Normalizer2DataBuilder::~Normalizer2DataBuilder() {
  
  void
  Normalizer2DataBuilder::setUnicodeVersion(const char *v) {
-    u_versionFromString(unicodeVersion, v);
+    UVersionInfo nullVersion={ 0, 0, 0, 0 };
+    UVersionInfo version;
+    u_versionFromString(version, v);
+    if( 0!=memcmp(version, unicodeVersion, U_MAX_VERSION_LENGTH) &&
+        0!=memcmp(nullVersion, unicodeVersion, U_MAX_VERSION_LENGTH)
+    ) {
+        char buffer[U_MAX_VERSION_STRING_LENGTH];
+        u_versionToString(unicodeVersion, buffer);
+        fprintf(stderr, "gennorm2 error: multiple inconsistent Unicode version numbers %s vs. %s\n",
+                buffer, v);
+        exit(U_ILLEGAL_ARGUMENT_ERROR);
+    }
+    memcpy(unicodeVersion, version, U_MAX_VERSION_LENGTH);
  }
  
  Norm *Normalizer2DataBuilder::allocNorm() {
@@ -1177,6 +1189,10 @@ void Normalizer2DataBuilder::writeBinaryFile(const char *filename) {
          printf("minMaybeYes:                       0x%04x\n", (int)indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]);
      }
  
+    UVersionInfo nullVersion={ 0, 0, 0, 0 };
+    if(0==memcmp(nullVersion, unicodeVersion, 4)) {
+        u_versionFromString(unicodeVersion, U_UNICODE_VERSION);
+    }
      memcpy(dataInfo.dataVersion, unicodeVersion, 4);
      UNewDataMemory *pData=
          udata_create(NULL, NULL, filename, &dataInfo,
author	Markus Scherer <markus.icu@gmail.com>
	Tue, 10 Jan 2012 21:04:58 +0000 (21:04 +0000)
committer	Markus Scherer <markus.icu@gmail.com>
	Tue, 10 Jan 2012 21:04:58 +0000 (21:04 +0000)
icu4c/source/data/unidata/norm2/nfc.txt		patch \| blob \| history
icu4c/source/data/unidata/norm2/nfkc.txt		patch \| blob \| history
icu4c/source/tools/gennorm2/gennorm2.cpp		patch \| blob \| history
icu4c/source/tools/gennorm2/n2builder.cpp		patch \| blob \| history