]> granicus.if.org Git - icu/commitdiff
ICU-13560 make some toUnicode converter check toULength not toUnicodeStatus for conti...
authorMarkus Scherer <markus.icu@gmail.com>
Tue, 23 Jan 2018 21:32:36 +0000 (21:32 +0000)
committerMarkus Scherer <markus.icu@gmail.com>
Tue, 23 Jan 2018 21:32:36 +0000 (21:32 +0000)
X-SVN-Rev: 40793

icu4c/source/common/ucnv_u32.cpp
icu4c/source/common/ucnv_u8.cpp
icu4c/source/common/ucnvlat1.cpp
icu4c/source/common/ucnvmbcs.cpp
icu4c/source/test/intltest/convtest.cpp

index e1b755ab7f2ac432949f6e022b68c202bd96b4ff..5777117a924d8adee5a1a44937cb9ed46e15bb5f 100644 (file)
@@ -55,7 +55,7 @@ T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args,
     uint32_t ch, i;
 
     /* Restore state of current sequence */
-    if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
+    if (args->converter->toULength > 0 && myTarget < targetLimit) {
         i = args->converter->toULength;       /* restore # of bytes consumed */
         args->converter->toULength = 0;
 
@@ -136,7 +136,7 @@ T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
     int32_t offsetNum = 0;
 
     /* Restore state of current sequence */
-    if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
+    if (args->converter->toULength > 0 && myTarget < targetLimit) {
         i = args->converter->toULength;       /* restore # of bytes consumed */
         args->converter->toULength = 0;
 
@@ -517,7 +517,7 @@ T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args,
     uint32_t ch, i;
 
     /* Restore state of current sequence */
-    if (args->converter->toUnicodeStatus && myTarget < targetLimit)
+    if (args->converter->toULength > 0 && myTarget < targetLimit)
     {
         i = args->converter->toULength;       /* restore # of bytes consumed */
         args->converter->toULength = 0;
@@ -604,7 +604,7 @@ T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
     int32_t offsetNum = 0;
 
     /* Restore state of current sequence */
-    if (args->converter->toUnicodeStatus && myTarget < targetLimit)
+    if (args->converter->toULength > 0 && myTarget < targetLimit)
     {
         i = args->converter->toULength;       /* restore # of bytes consumed */
         args->converter->toULength = 0;
index 5d72f8ef377b083b04afaaeabc08278782d983a5..094e2dfb6f43279885dde600400da5cc2af10dfe 100644 (file)
@@ -76,7 +76,7 @@ static void  U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
     int32_t i, inBytes;
 
     /* Restore size of current sequence */
-    if (cnv->toUnicodeStatus && myTarget < targetLimit)
+    if (cnv->toULength > 0 && myTarget < targetLimit)
     {
         inBytes = cnv->mode;            /* restore # of bytes to consume */
         i = cnv->toULength;             /* restore # of bytes consumed */
@@ -194,7 +194,7 @@ static void  U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr
     int32_t i, inBytes;
 
     /* Restore size of current sequence */
-    if (cnv->toUnicodeStatus && myTarget < targetLimit)
+    if (cnv->toULength > 0 && myTarget < targetLimit)
     {
         inBytes = cnv->mode;            /* restore # of bytes to consume */
         i = cnv->toULength;             /* restore # of bytes consumed */
@@ -670,12 +670,13 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
     targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
 
     /* get the converter state from the UTF-8 UConverter */
-    c=(UChar32)utf8->toUnicodeStatus;
-    if(c!=0) {
+    if(utf8->toULength > 0) {
         toULength=oldToULength=utf8->toULength;
         toULimit=(int8_t)utf8->mode;
+        c=(UChar32)utf8->toUnicodeStatus;
     } else {
         toULength=oldToULength=toULimit=0;
+        c = 0;
     }
 
     count=(int32_t)(sourceLimit-source)+oldToULength;
index 23e918afe7a9d063d3f3e17c556d32ebfe43bdf8..358bc0caa25a19e76936a57861d9488f98b6ee8c 100644 (file)
@@ -340,7 +340,11 @@ ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
     targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
 
     /* get the converter state from the UTF-8 UConverter */
-    c=(UChar32)utf8->toUnicodeStatus;
+    if (utf8->toULength > 0) {
+        c=(UChar32)utf8->toUnicodeStatus;
+    } else {
+        c = 0;
+    }
     if(c!=0 && source<sourceLimit) {
         if(targetCapacity==0) {
             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
@@ -620,7 +624,7 @@ ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
 
     uint8_t c;
 
-    if(pToUArgs->converter->toUnicodeStatus!=0) {
+    if(pToUArgs->converter->toULength > 0) {
         /* no handling of partial UTF-8 characters here, fall back to pivoting */
         *pErrorCode=U_USING_DEFAULT_WARNING;
         return;
index 4b36cc605b16c778f2c63406b1062e4f23a7a112..2d0c857758d9bc93898eca2338b3a57e4e0b20d5 100644 (file)
@@ -5064,12 +5064,13 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
     hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
 
     /* get the converter state from the UTF-8 UConverter */
-    c=(UChar32)utf8->toUnicodeStatus;
-    if(c!=0) {
+    if(utf8->toULength > 0) {
         toULength=oldToULength=utf8->toULength;
         toULimit=(int8_t)utf8->mode;
+        c=(UChar32)utf8->toUnicodeStatus;
     } else {
         toULength=oldToULength=toULimit=0;
+        c = 0;
     }
 
     // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
@@ -5359,12 +5360,13 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
     hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
 
     /* get the converter state from the UTF-8 UConverter */
-    c=(UChar32)utf8->toUnicodeStatus;
-    if(c!=0) {
+    if(utf8->toULength > 0) {
         toULength=oldToULength=utf8->toULength;
         toULimit=(int8_t)utf8->mode;
+        c=(UChar32)utf8->toUnicodeStatus;
     } else {
         toULength=oldToULength=toULimit=0;
+        c = 0;
     }
 
     // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
index db0aa86912bc800ed4ab9ca8507c8a9a8470614e..3a0e5414d6f0de1d1a5258ca46971cb742911baa 100644 (file)
@@ -733,6 +733,7 @@ ConversionTest::TestUTF8ToUTF8Overflow() {
     UChar *pivotSource = buffer16;
     UChar *pivotTarget = buffer16;
     const UChar *pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16);
+    int32_t length;
 
     // Convert with insufficient target capacity.
     result[2] = 5;
@@ -741,7 +742,7 @@ ConversionTest::TestUTF8ToUTF8Overflow() {
                    buffer16, &pivotSource, &pivotTarget, pivotLimit,
                    FALSE, FALSE, errorCode);
     assertEquals("overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
-    int32_t length = (int32_t)(target - result);
+    length = (int32_t)(target - result);
     assertEquals("number of bytes written", 2, length);
     assertEquals("next byte not clobbered", 5, result[2]);
 
@@ -790,6 +791,52 @@ ConversionTest::TestUTF8ToUTF8Overflow() {
     if (length == 5) {
         assertTrue("text2 result same as input", memcmp(text2, result, length) == 0);
     }
+
+    ucnv_reset(cnv1.getAlias());
+    ucnv_reset(cnv2.getAlias());
+    memset(result, 0, sizeof(result));
+    static const char *illFormed = "\xf1\x91\x93\x96\x91\x94";  // U+514D6 + two more trail bytes
+    source = illFormed;
+    sourceLimit = illFormed + strlen(illFormed);
+    target = result;
+    pivotSource = pivotTarget = buffer16;
+
+    ucnv_setToUCallBack(cnv1.getAlias(), UCNV_TO_U_CALLBACK_STOP, nullptr, nullptr, nullptr, errorCode);
+
+    // Convert only two bytes and flush (but expect failure).
+    char errorBytes[10];
+    int8_t errorLength;
+    result[0] = 5;
+    ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+                   &target, targetLimit, &source, source + 2,
+                   buffer16, &pivotSource, &pivotTarget, pivotLimit,
+                   FALSE, TRUE, errorCode);
+    assertEquals("illFormed truncated", U_TRUNCATED_CHAR_FOUND, errorCode.reset());
+    length = (int32_t)(target - result);
+    assertEquals("illFormed number of bytes written", 0, length);
+    errorLength = UPRV_LENGTHOF(errorBytes);
+    ucnv_getInvalidChars(cnv1.getAlias(), errorBytes, &errorLength, errorCode);
+    assertEquals("illFormed truncated errorLength", 2, (int32_t)errorLength);
+    if (errorLength == 2) {
+        assertEquals("illFormed truncated errorBytes", 0xf191, 
+                     ((int32_t)(uint8_t)errorBytes[0] << 8) | (uint8_t)errorBytes[1]);
+    }
+
+    // Continue conversion starting with a trail byte.
+    ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+                   &target, targetLimit, &source, sourceLimit,
+                   buffer16, &pivotSource, &pivotTarget, pivotLimit,
+                   FALSE, TRUE, errorCode);
+
+    assertEquals("illFormed trail byte", U_ILLEGAL_CHAR_FOUND, errorCode.reset());
+    length = (int32_t)(target - result);
+    assertEquals("illFormed trail byte number of bytes written", 0, length);
+    errorLength = UPRV_LENGTHOF(errorBytes);
+    ucnv_getInvalidChars(cnv1.getAlias(), errorBytes, &errorLength, errorCode);
+    assertEquals("illFormed trail byte errorLength", 1, (int32_t)errorLength);
+    if (errorLength == 1) {
+        assertEquals("illFormed trail byte errorBytes", 0x93, (int32_t)(uint8_t)errorBytes[0]);
+    }
 }
 
 // open testdata or ICU data converter ------------------------------------- ***