ICU-13197 fix indexesLength check while loading data, more readable duplicate elimina...

author Markus Scherer <markus.icu@gmail.com>

Wed, 7 Jun 2017 18:22:44 +0000 (18:22 +0000)

committer Markus Scherer <markus.icu@gmail.com>

Wed, 7 Jun 2017 18:22:44 +0000 (18:22 +0000)
author Markus Scherer <markus.icu@gmail.com>
Wed, 7 Jun 2017 18:22:44 +0000 (18:22 +0000)
committer Markus Scherer <markus.icu@gmail.com>
Wed, 7 Jun 2017 18:22:44 +0000 (18:22 +0000)
diff --git a/icu4c/source/common/loadednormalizer2impl.cpp b/icu4c/source/common/loadednormalizer2impl.cpp

index bd37b95eadc41cd98e172b80c88aa72e32deaadf..0a4bef4adf784cc5e89a20ac0b9281da765d78ce 100644 (file)
--- a/icu4c/source/common/loadednormalizer2impl.cpp
+++ b/icu4c/source/common/loadednormalizer2impl.cpp
@@ -84,7 +84,7 @@ LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCod
      const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
      const int32_t *inIndexes=(const int32_t *)inBytes;
      int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
-    if(indexesLength<=IX_MIN_MAYBE_YES) {
+    if(indexesLength<=IX_MIN_YES_NO_MAPPINGS_ONLY) {
          errorCode=U_INVALID_FORMAT_ERROR;  // Not enough indexes.
          return;
      }
diff --git a/icu4c/source/common/norm2allmodes.h b/icu4c/source/common/norm2allmodes.h

index 91c8634a1f6d8d4a2607c2ea37c71a2299fddfd3..31aaf45d667905b4ace1da4cd7576b0ed160ec38 100644 (file)
--- a/icu4c/source/common/norm2allmodes.h
+++ b/icu4c/source/common/norm2allmodes.h
@@ -5,7 +5,7 @@
  * Copyright (C) 2014, International Business Machines
  * Corporation and others.  All Rights Reserved.
  *******************************************************************************
-* loadednormalizer2impl.h
+* norm2allmodes.h
  *
  * created on: 2014sep07
  * created by: Markus W. Scherer
diff --git a/icu4c/source/common/normalizer2impl.cpp b/icu4c/source/common/normalizer2impl.cpp

index 6a6124f4b756cebfabb810badba9491c0f72c296..757a0c3f6bd10fac9f032802495d4f886535cb39 100644 (file)
--- a/icu4c/source/common/normalizer2impl.cpp
+++ b/icu4c/source/common/normalizer2impl.cpp
@@ -2645,12 +2645,13 @@ unorm2_swap(const UDataSwapper *ds,
  
      /* check data format and format version */
      pInfo=(const UDataInfo *)((const char *)inData+4);
+    uint8_t formatVersion0=pInfo->formatVersion[0];
      if(!(
          pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Nrm2" */
          pInfo->dataFormat[1]==0x72 &&
          pInfo->dataFormat[2]==0x6d &&
          pInfo->dataFormat[3]==0x32 &&
-        (pInfo->formatVersion[0]==1 || pInfo->formatVersion[0]==2)
+        (formatVersion0==1 || formatVersion0==2)
      )) {
          udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n",
                           pInfo->dataFormat[0], pInfo->dataFormat[1],
@@ -2664,10 +2665,16 @@ unorm2_swap(const UDataSwapper *ds,
      outBytes=(uint8_t *)outData+headerSize;
  
      inIndexes=(const int32_t *)inBytes;
+    int32_t minIndexesLength;
+    if(formatVersion0==1) {
+        minIndexesLength=Normalizer2Impl::IX_MIN_MAYBE_YES+1;
+    } else {
+        minIndexesLength=Normalizer2Impl::IX_MIN_YES_NO_MAPPINGS_ONLY+1;
+    }
  
      if(length>=0) {
          length-=headerSize;
-        if(length<(int32_t)sizeof(indexes)) {
+        if(length<minIndexesLength*4) {
              udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for Normalizer2 data\n",
                               length);
              *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
diff --git a/icu4c/source/tools/gennorm2/extradata.cpp b/icu4c/source/tools/gennorm2/extradata.cpp

index 2c49c7d041710c8d52722d8c9f85424bc6966873..40a88c44327117179724f32bc82b1cd2b1eb8735 100644 (file)
--- a/icu4c/source/tools/gennorm2/extradata.cpp
+++ b/icu4c/source/tools/gennorm2/extradata.cpp
@@ -90,16 +90,17 @@ int32_t ExtraData::writeMapping(UChar32 c, const Norm &norm, UnicodeString &data
  int32_t ExtraData::writeNoNoMapping(UChar32 c, const Norm &norm,
                                      UnicodeString &dataString,
                                      Hashtable &previousMappings) {
-    int32_t oldLength=dataString.length();
-    int32_t offset=oldLength+writeMapping(c, norm, dataString);
-    UnicodeString newMapping=dataString.tempSubString(oldLength);
+    UnicodeString newMapping;
+    int32_t offset=writeMapping(c, norm, newMapping);
      int32_t previousOffset=previousMappings.geti(newMapping);
      if(previousOffset!=0) {
-        // Duplicate, remove the new units and point to the old ones.
-        dataString.truncate(oldLength);
+        // Duplicate, point to the identical mapping that has already been stored.
          offset=previousOffset-1;
      } else {
-        // Enter this new mapping into the hashtable, avoiding value 0 which is "not found".
+        // Append this new mapping and
+        // enter it into the hashtable, avoiding value 0 which is "not found".
+        offset=dataString.length()+offset;
+        dataString.append(newMapping);
          IcuToolErrorCode errorCode("gennorm2/writeExtraData()/Hashtable.puti()");
          previousMappings.puti(newMapping, offset+1, errorCode);
      }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java

index 86a02479737cee70e71eb72566ae70df04c69bdd..e0670b96124bbfec2d27fc05494c4936438beecb 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java
@@ -428,7 +428,7 @@ public final class Normalizer2Impl {
          try {
              dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
              int indexesLength=bytes.getInt()/4;  // inIndexes[IX_NORM_TRIE_OFFSET]/4
-            if(indexesLength<=IX_MIN_MAYBE_YES) {
+            if(indexesLength<=IX_MIN_YES_NO_MAPPINGS_ONLY) {
                  throw new ICUUncheckedIOException("Normalizer2 data: not enough indexes");
              }
              int[] inIndexes=new int[indexesLength];
author	Markus Scherer <markus.icu@gmail.com>
	Wed, 7 Jun 2017 18:22:44 +0000 (18:22 +0000)
committer	Markus Scherer <markus.icu@gmail.com>
	Wed, 7 Jun 2017 18:22:44 +0000 (18:22 +0000)
icu4c/source/common/loadednormalizer2impl.cpp		patch \| blob \| history
icu4c/source/common/norm2allmodes.h		patch \| blob \| history
icu4c/source/common/normalizer2impl.cpp		patch \| blob \| history
icu4c/source/tools/gennorm2/extradata.cpp		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java		patch \| blob \| history