From 5a42118a6f31ae7e3afad71c37cb977c6f749f93 Mon Sep 17 00:00:00 2001 From: Roman Savchenko Date: Fri, 11 Sep 2020 15:50:41 +0300 Subject: [PATCH] ICU-21280 Correct source bytes counting in UTF8->UTF8 conversion --- icu4c/source/common/ucnv_u8.cpp | 4 +- icu4c/source/test/intltest/convtest.cpp | 60 +++++++++++++++++++++++++ icu4c/source/test/intltest/convtest.h | 1 + 3 files changed, 63 insertions(+), 2 deletions(-) diff --git a/icu4c/source/common/ucnv_u8.cpp b/icu4c/source/common/ucnv_u8.cpp index 9b518e08df6..1ef7fa2f02f 100644 --- a/icu4c/source/common/ucnv_u8.cpp +++ b/icu4c/source/common/ucnv_u8.cpp @@ -707,9 +707,9 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, // Do not go back into the bytes that will be read for finishing a partial // sequence from the previous buffer. - int32_t length=count-toULimit; + int32_t length=count-toULength; U8_TRUNCATE_IF_INCOMPLETE(source, 0, length); - count=toULimit+length; + count=toULength+length; } if(c!=0) { diff --git a/icu4c/source/test/intltest/convtest.cpp b/icu4c/source/test/intltest/convtest.cpp index 949bd3b8486..a25a04be901 100644 --- a/icu4c/source/test/intltest/convtest.cpp +++ b/icu4c/source/test/intltest/convtest.cpp @@ -77,6 +77,7 @@ ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha TESTCASE_AUTO(TestGetUnicodeSet2); TESTCASE_AUTO(TestDefaultIgnorableCallback); TESTCASE_AUTO(TestUTF8ToUTF8Overflow); + TESTCASE_AUTO(TestUTF8ToUTF8Streaming); TESTCASE_AUTO_END; } @@ -830,6 +831,65 @@ ConversionTest::TestUTF8ToUTF8Overflow() { } } +void +ConversionTest::TestUTF8ToUTF8Streaming() { + IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Streaming"); + LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode)); + LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode)); + + // UTF8 encoded cyrillic part of 'Lorem ipsum' + static const char* text = + "\xd0\xb5\xd1\x82\x20\xd1\x81\xd1\x86\xd0\xb0\xd0\xb5\xd0\xb2\xd0" + "\xbe\xd0\xbb\xd0\xb0\x20\xd1\x81\xd0\xb0\xd0\xb4\xd0\xb8\xd0\xbf" + "\xd1\x81\xd1\x86\xd0\xb8\xd0\xbd\xd0\xb3\x20\xd0\xb0\xd1\x86\xd1" + "\x86\xd0\xbe\xd0\xbc\xd0\xbc\xd0\xbe\xd0\xb4\xd0\xb0\xd1\x80\xd0" + "\xb5\x20\xd1\x85\xd0\xb0\xd1\x81"; + + int32_t chunk1 = 25; // partial lead at the end: 0xd0 + int32_t chunk2 = 47; // partial tail at the beginning: 0xb0 + + char result[128]; + + int32_t sourceLen = (int32_t)strlen(text); + const char* source = text; + const char* sourceLimit = text + chunk1; + + int32_t targetLen = sizeof(result); + char* target = result; + const char* targetLimit = result + targetLen; + + UChar buffer16[20]; + UChar* pivotSource = buffer16; + UChar* pivotTarget = buffer16; + const UChar* pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16); + + int32_t length; + ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), + &target, result + targetLen, &source, sourceLimit, + buffer16, &pivotSource, &pivotTarget, pivotLimit, + FALSE, FALSE, errorCode); + + length = (int32_t)(target - result); + targetLen -= length; + assertEquals("First chunk -1 doesn't match converted length", chunk1 - 1, length); + + source = text + chunk1; + sourceLimit = source + chunk2; + + // Convert the rest and flush. + ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), + &target, targetLimit, &source, sourceLimit, + buffer16, &pivotSource, &pivotTarget, pivotLimit, + FALSE, TRUE, errorCode); + + length = (int32_t)(target - result - length); + targetLen -= length; + assertEquals("Second chunk + 2 doesn't match converted length", chunk2 + 1, length); + + assertEquals("Full text length match", sourceLen, sizeof(result) - targetLen); + assertSuccess("UTF-8->UTF-8", errorCode); +} + // open testdata or ICU data converter ------------------------------------- *** UConverter * diff --git a/icu4c/source/test/intltest/convtest.h b/icu4c/source/test/intltest/convtest.h index 84a3a89a503..dda10cb5d27 100644 --- a/icu4c/source/test/intltest/convtest.h +++ b/icu4c/source/test/intltest/convtest.h @@ -77,6 +77,7 @@ public: void TestGetUnicodeSet2(); void TestDefaultIgnorableCallback(); void TestUTF8ToUTF8Overflow(); + void TestUTF8ToUTF8Streaming(); private: UBool -- 2.40.0