ICU-13489 Merging #13510(r40714,r40715) UTF-8 to UTF-8 conversion overflow to maint...

author Yoshito Umaoka <y.umaoka@gmail.com>

Fri, 8 Dec 2017 23:19:10 +0000 (23:19 +0000)

committer Yoshito Umaoka <y.umaoka@gmail.com>

Fri, 8 Dec 2017 23:19:10 +0000 (23:19 +0000)
author Yoshito Umaoka <y.umaoka@gmail.com>
Fri, 8 Dec 2017 23:19:10 +0000 (23:19 +0000)
committer Yoshito Umaoka <y.umaoka@gmail.com>
Fri, 8 Dec 2017 23:19:10 +0000 (23:19 +0000)
diff --git a/icu4c/source/common/ucnv_u8.cpp b/icu4c/source/common/ucnv_u8.cpp

index 951988ed9ca3d1665146aee054b43fd493cfb283..5d72f8ef377b083b04afaaeabc08278782d983a5 100644 (file)
--- a/icu4c/source/common/ucnv_u8.cpp
+++ b/icu4c/source/common/ucnv_u8.cpp
@@ -28,6 +28,7 @@
  #include "unicode/utf.h"
  #include "unicode/utf8.h"
  #include "unicode/utf16.h"
+#include "uassert.h"
  #include "ucnv_bld.h"
  #include "ucnv_cnv.h"
  #include "cmemory.h"
@@ -694,7 +695,9 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
          // Use a single counter for source and target, counting the minimum of
          // the source length and the target capacity.
          // Let the standard converter handle edge cases.
+        const uint8_t *limit=sourceLimit;
          if(count>targetCapacity) {
+            limit-=(count-targetCapacity);
              count=targetCapacity;
          }
  
@@ -707,11 +710,11 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
              // sequence from the previous buffer.
              int32_t length=count-toULimit;
              if(length>0) {
-                uint8_t b1=*(sourceLimit-1);
+                uint8_t b1=*(limit-1);
                  if(U8_IS_SINGLE(b1)) {
                      // common ASCII character
                  } else if(U8_IS_TRAIL(b1) && length>=2) {
-                    uint8_t b2=*(sourceLimit-2);
+                    uint8_t b2=*(limit-2);
                      if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
                          // truncated 3-byte sequence
                          count-=2;
@@ -811,7 +814,7 @@ moreBytes:
              }
  
              /* copy the legal byte sequence to the target */
-            {
+            if(count>=toULength) {
                  int8_t i;
  
                  for(i=0; i<oldToULength; ++i) {
@@ -822,9 +825,18 @@ moreBytes:
                      *target++=*source++;
                  }
                  count-=toULength;
+            } else {
+                // A supplementary character that does not fit into the target.
+                // Let the standard converter handle this.
+                source-=(toULength-oldToULength);
+                pToUArgs->source=(char *)source;
+                pFromUArgs->target=(char *)target;
+                *pErrorCode=U_USING_DEFAULT_WARNING;
+                return;
              }
          }
      }
+    U_ASSERT(count>=0);
  
      if(U_SUCCESS(*pErrorCode) && source<sourceLimit) {
          if(target==(const uint8_t *)pFromUArgs->targetLimit) {
diff --git a/icu4c/source/test/intltest/convtest.cpp b/icu4c/source/test/intltest/convtest.cpp

index 6286ff54f32de62a4a3200bacaa40a828039a0d5..db0aa86912bc800ed4ab9ca8507c8a9a8470614e 100644 (file)
--- a/icu4c/source/test/intltest/convtest.cpp
+++ b/icu4c/source/test/intltest/convtest.cpp
@@ -68,21 +68,16 @@ ConversionTest::~ConversionTest() {
  void
  ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
      if (exec) logln("TestSuite ConversionTest: ");
-    switch (index) {
+    TESTCASE_AUTO_BEGIN;
  #if !UCONFIG_NO_FILE_IO
-        case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;
-        case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;
-        case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;
-        case 3: name="TestDefaultIgnorableCallback"; if (exec) TestDefaultIgnorableCallback(); break;
-#else
-        case 0:
-        case 1:
-        case 2:
-        case 3: name="skip"; break;
+    TESTCASE_AUTO(TestToUnicode);
+    TESTCASE_AUTO(TestFromUnicode);
+    TESTCASE_AUTO(TestGetUnicodeSet);
  #endif
-        case 4: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;
-        default: name=""; break; //needed to end loop
-    }
+    TESTCASE_AUTO(TestGetUnicodeSet2);
+    TESTCASE_AUTO(TestDefaultIgnorableCallback);
+    TESTCASE_AUTO(TestUTF8ToUTF8Overflow);
+    TESTCASE_AUTO_END;
  }
  
  // test data interface ----------------------------------------------------- ***
@@ -723,6 +718,80 @@ ConversionTest::TestDefaultIgnorableCallback() {
      delete set_ignorable;
  }
  
+void
+ConversionTest::TestUTF8ToUTF8Overflow() {
+    IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Overflow");
+    LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode));
+    LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode));
+    static const char *text = "aä";  // ä: 2 bytes
+    const char *source = text;
+    const char *sourceLimit = text + strlen(text);
+    char result[20];
+    char *target = result;
+    const char *targetLimit = result + sizeof(result);
+    UChar buffer16[20];
+    UChar *pivotSource = buffer16;
+    UChar *pivotTarget = buffer16;
+    const UChar *pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16);
+
+    // Convert with insufficient target capacity.
+    result[2] = 5;
+    ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+                   &target, result + 2, &source, sourceLimit,
+                   buffer16, &pivotSource, &pivotTarget, pivotLimit,
+                   FALSE, FALSE, errorCode);
+    assertEquals("overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
+    int32_t length = (int32_t)(target - result);
+    assertEquals("number of bytes written", 2, length);
+    assertEquals("next byte not clobbered", 5, result[2]);
+
+    // Convert the rest and flush.
+    ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+                   &target, targetLimit, &source, sourceLimit,
+                   buffer16, &pivotSource, &pivotTarget, pivotLimit,
+                   FALSE, TRUE, errorCode);
+
+    assertSuccess("UTF-8->UTF-8", errorCode);
+    length = (int32_t)(target - result);
+    assertEquals("3 bytes", 3, length);
+    if (length == 3) {
+        assertTrue("result same as input", memcmp(text, result, length) == 0);
+    }
+
+    ucnv_reset(cnv1.getAlias());
+    ucnv_reset(cnv2.getAlias());
+    memset(result, 0, sizeof(result));
+    static const char *text2 = "a🚲";  // U+1F6B2 bicycle: 4 bytes
+    source = text2;
+    sourceLimit = text2 + strlen(text2);
+    target = result;
+    pivotSource = pivotTarget = buffer16;
+
+    // Convert with insufficient target capacity.
+    result[3] = 5;
+    ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+                   &target, result + 3, &source, sourceLimit,
+                   buffer16, &pivotSource, &pivotTarget, pivotLimit,
+                   FALSE, FALSE, errorCode);
+    assertEquals("text2 overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
+    length = (int32_t)(target - result);
+    assertEquals("text2 number of bytes written", 3, length);
+    assertEquals("text2 next byte not clobbered", 5, result[3]);
+
+    // Convert the rest and flush.
+    ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+                   &target, targetLimit, &source, sourceLimit,
+                   buffer16, &pivotSource, &pivotTarget, pivotLimit,
+                   FALSE, TRUE, errorCode);
+
+    assertSuccess("text2 UTF-8->UTF-8", errorCode);
+    length = (int32_t)(target - result);
+    assertEquals("text2 5 bytes", 5, length);
+    if (length == 5) {
+        assertTrue("text2 result same as input", memcmp(text2, result, length) == 0);
+    }
+}
+
  // open testdata or ICU data converter ------------------------------------- ***
  
  UConverter *
diff --git a/icu4c/source/test/intltest/convtest.h b/icu4c/source/test/intltest/convtest.h

index c2d37e48974a4b53a6543300f0b8d2df8775d75e..84a3a89a503393b6f06c13a8e4c40421a966587c 100644 (file)
--- a/icu4c/source/test/intltest/convtest.h
+++ b/icu4c/source/test/intltest/convtest.h
@@ -76,6 +76,7 @@ public:
      void TestGetUnicodeSet();
      void TestGetUnicodeSet2();
      void TestDefaultIgnorableCallback();
+    void TestUTF8ToUTF8Overflow();
  
  private:
      UBool
author	Yoshito Umaoka <y.umaoka@gmail.com>
	Fri, 8 Dec 2017 23:19:10 +0000 (23:19 +0000)
committer	Yoshito Umaoka <y.umaoka@gmail.com>
	Fri, 8 Dec 2017 23:19:10 +0000 (23:19 +0000)
icu4c/source/common/ucnv_u8.cpp		patch \| blob \| history
icu4c/source/test/intltest/convtest.cpp		patch \| blob \| history
icu4c/source/test/intltest/convtest.h		patch \| blob \| history