ICU-13510 test & fix UTF-8->UTF-8 conversion overflow for supplementary characters

author Markus Scherer <markus.icu@gmail.com>

Fri, 8 Dec 2017 21:47:21 +0000 (21:47 +0000)

committer Markus Scherer <markus.icu@gmail.com>

Fri, 8 Dec 2017 21:47:21 +0000 (21:47 +0000)
author Markus Scherer <markus.icu@gmail.com>
Fri, 8 Dec 2017 21:47:21 +0000 (21:47 +0000)
committer Markus Scherer <markus.icu@gmail.com>
Fri, 8 Dec 2017 21:47:21 +0000 (21:47 +0000)
diff --git a/icu4c/source/common/ucnv_u8.cpp b/icu4c/source/common/ucnv_u8.cpp

index e47b86c5b93f9cffc07daf4f1d99f18a10971233..5d72f8ef377b083b04afaaeabc08278782d983a5 100644 (file)
--- a/icu4c/source/common/ucnv_u8.cpp
+++ b/icu4c/source/common/ucnv_u8.cpp
@@ -814,7 +814,7 @@ moreBytes:
              }
  
              /* copy the legal byte sequence to the target */
-            {
+            if(count>=toULength) {
                  int8_t i;
  
                  for(i=0; i<oldToULength; ++i) {
@@ -825,6 +825,14 @@ moreBytes:
                      *target++=*source++;
                  }
                  count-=toULength;
+            } else {
+                // A supplementary character that does not fit into the target.
+                // Let the standard converter handle this.
+                source-=(toULength-oldToULength);
+                pToUArgs->source=(char *)source;
+                pFromUArgs->target=(char *)target;
+                *pErrorCode=U_USING_DEFAULT_WARNING;
+                return;
              }
          }
      }
diff --git a/icu4c/source/test/intltest/convtest.cpp b/icu4c/source/test/intltest/convtest.cpp

index fc03ff6adc693aa06c159427a2eebd695634cce5..db0aa86912bc800ed4ab9ca8507c8a9a8470614e 100644 (file)
--- a/icu4c/source/test/intltest/convtest.cpp
+++ b/icu4c/source/test/intltest/convtest.cpp
@@ -723,7 +723,7 @@ ConversionTest::TestUTF8ToUTF8Overflow() {
      IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Overflow");
      LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode));
      LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode));
-    static const char *text = "aä";
+    static const char *text = "aä";  // ä: 2 bytes
      const char *source = text;
      const char *sourceLimit = text + strlen(text);
      char result[20];
@@ -757,6 +757,39 @@ ConversionTest::TestUTF8ToUTF8Overflow() {
      if (length == 3) {
          assertTrue("result same as input", memcmp(text, result, length) == 0);
      }
+
+    ucnv_reset(cnv1.getAlias());
+    ucnv_reset(cnv2.getAlias());
+    memset(result, 0, sizeof(result));
+    static const char *text2 = "a🚲";  // U+1F6B2 bicycle: 4 bytes
+    source = text2;
+    sourceLimit = text2 + strlen(text2);
+    target = result;
+    pivotSource = pivotTarget = buffer16;
+
+    // Convert with insufficient target capacity.
+    result[3] = 5;
+    ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+                   &target, result + 3, &source, sourceLimit,
+                   buffer16, &pivotSource, &pivotTarget, pivotLimit,
+                   FALSE, FALSE, errorCode);
+    assertEquals("text2 overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
+    length = (int32_t)(target - result);
+    assertEquals("text2 number of bytes written", 3, length);
+    assertEquals("text2 next byte not clobbered", 5, result[3]);
+
+    // Convert the rest and flush.
+    ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+                   &target, targetLimit, &source, sourceLimit,
+                   buffer16, &pivotSource, &pivotTarget, pivotLimit,
+                   FALSE, TRUE, errorCode);
+
+    assertSuccess("text2 UTF-8->UTF-8", errorCode);
+    length = (int32_t)(target - result);
+    assertEquals("text2 5 bytes", 5, length);
+    if (length == 5) {
+        assertTrue("text2 result same as input", memcmp(text2, result, length) == 0);
+    }
  }
  
  // open testdata or ICU data converter ------------------------------------- ***
author	Markus Scherer <markus.icu@gmail.com>
	Fri, 8 Dec 2017 21:47:21 +0000 (21:47 +0000)
committer	Markus Scherer <markus.icu@gmail.com>
	Fri, 8 Dec 2017 21:47:21 +0000 (21:47 +0000)
icu4c/source/common/ucnv_u8.cpp		patch \| blob \| history
icu4c/source/test/intltest/convtest.cpp		patch \| blob \| history