]> granicus.if.org Git - icu/commitdiff
ICU-13197 fix indexesLength check while loading data, more readable duplicate elimina...
authorMarkus Scherer <markus.icu@gmail.com>
Wed, 7 Jun 2017 18:22:44 +0000 (18:22 +0000)
committerMarkus Scherer <markus.icu@gmail.com>
Wed, 7 Jun 2017 18:22:44 +0000 (18:22 +0000)
X-SVN-Rev: 40157

icu4c/source/common/loadednormalizer2impl.cpp
icu4c/source/common/norm2allmodes.h
icu4c/source/common/normalizer2impl.cpp
icu4c/source/tools/gennorm2/extradata.cpp
icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java

index bd37b95eadc41cd98e172b80c88aa72e32deaadf..0a4bef4adf784cc5e89a20ac0b9281da765d78ce 100644 (file)
@@ -84,7 +84,7 @@ LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCod
     const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
     const int32_t *inIndexes=(const int32_t *)inBytes;
     int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
-    if(indexesLength<=IX_MIN_MAYBE_YES) {
+    if(indexesLength<=IX_MIN_YES_NO_MAPPINGS_ONLY) {
         errorCode=U_INVALID_FORMAT_ERROR;  // Not enough indexes.
         return;
     }
index 91c8634a1f6d8d4a2607c2ea37c71a2299fddfd3..31aaf45d667905b4ace1da4cd7576b0ed160ec38 100644 (file)
@@ -5,7 +5,7 @@
 * Copyright (C) 2014, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *******************************************************************************
-* loadednormalizer2impl.h
+* norm2allmodes.h
 *
 * created on: 2014sep07
 * created by: Markus W. Scherer
index 6a6124f4b756cebfabb810badba9491c0f72c296..757a0c3f6bd10fac9f032802495d4f886535cb39 100644 (file)
@@ -2645,12 +2645,13 @@ unorm2_swap(const UDataSwapper *ds,
 
     /* check data format and format version */
     pInfo=(const UDataInfo *)((const char *)inData+4);
+    uint8_t formatVersion0=pInfo->formatVersion[0];
     if(!(
         pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Nrm2" */
         pInfo->dataFormat[1]==0x72 &&
         pInfo->dataFormat[2]==0x6d &&
         pInfo->dataFormat[3]==0x32 &&
-        (pInfo->formatVersion[0]==1 || pInfo->formatVersion[0]==2)
+        (formatVersion0==1 || formatVersion0==2)
     )) {
         udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n",
                          pInfo->dataFormat[0], pInfo->dataFormat[1],
@@ -2664,10 +2665,16 @@ unorm2_swap(const UDataSwapper *ds,
     outBytes=(uint8_t *)outData+headerSize;
 
     inIndexes=(const int32_t *)inBytes;
+    int32_t minIndexesLength;
+    if(formatVersion0==1) {
+        minIndexesLength=Normalizer2Impl::IX_MIN_MAYBE_YES+1;
+    } else {
+        minIndexesLength=Normalizer2Impl::IX_MIN_YES_NO_MAPPINGS_ONLY+1;
+    }
 
     if(length>=0) {
         length-=headerSize;
-        if(length<(int32_t)sizeof(indexes)) {
+        if(length<minIndexesLength*4) {
             udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for Normalizer2 data\n",
                              length);
             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
index 2c49c7d041710c8d52722d8c9f85424bc6966873..40a88c44327117179724f32bc82b1cd2b1eb8735 100644 (file)
@@ -90,16 +90,17 @@ int32_t ExtraData::writeMapping(UChar32 c, const Norm &norm, UnicodeString &data
 int32_t ExtraData::writeNoNoMapping(UChar32 c, const Norm &norm,
                                     UnicodeString &dataString,
                                     Hashtable &previousMappings) {
-    int32_t oldLength=dataString.length();
-    int32_t offset=oldLength+writeMapping(c, norm, dataString);
-    UnicodeString newMapping=dataString.tempSubString(oldLength);
+    UnicodeString newMapping;
+    int32_t offset=writeMapping(c, norm, newMapping);
     int32_t previousOffset=previousMappings.geti(newMapping);
     if(previousOffset!=0) {
-        // Duplicate, remove the new units and point to the old ones.
-        dataString.truncate(oldLength);
+        // Duplicate, point to the identical mapping that has already been stored.
         offset=previousOffset-1;
     } else {
-        // Enter this new mapping into the hashtable, avoiding value 0 which is "not found".
+        // Append this new mapping and
+        // enter it into the hashtable, avoiding value 0 which is "not found".
+        offset=dataString.length()+offset;
+        dataString.append(newMapping);
         IcuToolErrorCode errorCode("gennorm2/writeExtraData()/Hashtable.puti()");
         previousMappings.puti(newMapping, offset+1, errorCode);
     }
index 86a02479737cee70e71eb72566ae70df04c69bdd..e0670b96124bbfec2d27fc05494c4936438beecb 100644 (file)
@@ -428,7 +428,7 @@ public final class Normalizer2Impl {
         try {
             dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
             int indexesLength=bytes.getInt()/4;  // inIndexes[IX_NORM_TRIE_OFFSET]/4
-            if(indexesLength<=IX_MIN_MAYBE_YES) {
+            if(indexesLength<=IX_MIN_YES_NO_MAPPINGS_ONLY) {
                 throw new ICUUncheckedIOException("Normalizer2 data: not enough indexes");
             }
             int[] inIndexes=new int[indexesLength];