ucase_toFullLower(const UCaseProps *csp, UChar32 c,
UCaseContextIterator *iter, void *context,
const UChar **pString,
- const char *locale, int32_t *locCache)
-{
+ const char *locale, int32_t *locCache) {
+ // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
+ U_ASSERT(c >= 0);
UChar32 result=c;
uint16_t props=UTRIE2_GET16(&csp->trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
const UChar **pString,
const char *locale, int32_t *locCache,
UBool upperNotTitle) {
+ // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
+ U_ASSERT(c >= 0);
UChar32 result=c;
uint16_t props=UTRIE2_GET16(&csp->trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
U_CAPI int32_t U_EXPORT2
ucase_toFullFolding(const UCaseProps *csp, UChar32 c,
const UChar **pString,
- uint32_t options)
-{
+ uint32_t options) {
+ // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
+ U_ASSERT(c >= 0);
UChar32 result=c;
uint16_t props=UTRIE2_GET16(&csp->trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
return limit;
}
+static inline int32_t
+appendString(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
+ const uint8_t *s, int32_t length) {
+ if(length>0) {
+ if(length>(INT32_MAX-destIndex)) {
+ return -1; // integer overflow
+ }
+ if((destIndex+length)<=destCapacity) {
+ uprv_memcpy(dest+destIndex, s, length);
+ }
+ destIndex+=length;
+ }
+ return destIndex;
+}
+
static UChar32 U_CALLCONV
utf8_caseContextIterator(void *context, int8_t dir) {
UCaseContext *csc=(UCaseContext *)context;
U8_NEXT(src, srcIndex, srcLimit, c);
csc->cpLimit=srcIndex;
if(c<0) {
- int32_t i=csc->cpStart;
- while(destIndex<destCapacity && i<srcIndex) {
- dest[destIndex++]=src[i++];
+ // Malformed UTF-8.
+ destIndex=appendString(dest, destIndex, destCapacity, src+csc->cpStart, srcIndex-csc->cpStart);
+ if(destIndex<0) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
}
continue;
}
UErrorCode *pErrorCode) {
const UChar *s;
UChar32 c;
- int32_t prev, titleStart, titleLimit, idx, destIndex, length;
+ int32_t prev, titleStart, titleLimit, idx, destIndex;
UBool isFirstIndex;
if(U_FAILURE(*pErrorCode)) {
break; /* cased letter at [titleStart..titleLimit[ */
}
}
- length=titleStart-prev;
- if(length>0) {
- if((destIndex+length)<=destCapacity) {
- uprv_memcpy(dest+destIndex, src+prev, length);
- }
- destIndex+=length;
+ destIndex=appendString(dest, destIndex, destCapacity, src+prev, titleStart-prev);
+ if(destIndex<0) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
}
}
if(titleStart<titleLimit) {
/* titlecase c which is from [titleStart..titleLimit[ */
- csc.cpStart=titleStart;
- csc.cpLimit=titleLimit;
- c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache);
- destIndex=appendResult(dest, destIndex, destCapacity, c, s);
+ if(c>=0) {
+ csc.cpStart=titleStart;
+ csc.cpLimit=titleLimit;
+ c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache);
+ destIndex=appendResult(dest, destIndex, destCapacity, c, s);
+ } else {
+ // Malformed UTF-8.
+ destIndex=appendString(dest, destIndex, destCapacity, src+titleStart, titleLimit-titleStart);
+ }
if(destIndex<0) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
} else {
/* Optionally just copy the rest of the word unchanged. */
- length=idx-titleLimit;
- if(length>(INT32_MAX-destIndex)) {
+ destIndex=appendString(dest, destIndex, destCapacity, src+titleLimit, idx-titleLimit);
+ if(destIndex<0) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
- if((destIndex+length)<=destCapacity) {
- uprv_memcpy(dest+destIndex, src+titleLimit, length);
- }
- destIndex+=length;
}
}
}
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
- } else {
+ } else if(c>=0) {
const UChar *s;
UChar32 c2 = 0;
c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache);
return 0;
}
}
+ } else {
+ // Malformed UTF-8.
+ destIndex=appendString(dest, destIndex, destCapacity, src+i, nextIndex-i);
+ if(destIndex<0) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
}
i = nextIndex;
state = nextState;
start=srcIndex;
U8_NEXT(src, srcIndex, srcLength, c);
if(c<0) {
- while(destIndex<destCapacity && start<srcIndex) {
- dest[destIndex++]=src[start++];
+ // Malformed UTF-8.
+ destIndex=appendString(dest, destIndex, destCapacity, src+start, srcIndex-start);
+ if(destIndex<0) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
}
continue;
}
return destIndex+1;
}
+static inline int32_t
+appendString(UChar *dest, int32_t destIndex, int32_t destCapacity,
+ const UChar *s, int32_t length) {
+ if(length>0) {
+ if(length>(INT32_MAX-destIndex)) {
+ return -1; // integer overflow
+ }
+ if((destIndex+length)<=destCapacity) {
+ u_memcpy(dest+destIndex, s, length);
+ }
+ destIndex+=length;
+ }
+ return destIndex;
+}
+
static UChar32 U_CALLCONV
utf16_caseContextIterator(void *context, int8_t dir) {
UCaseContext *csc=(UCaseContext *)context;
UErrorCode *pErrorCode) {
const UChar *s;
UChar32 c;
- int32_t prev, titleStart, titleLimit, idx, destIndex, length;
+ int32_t prev, titleStart, titleLimit, idx, destIndex;
UBool isFirstIndex;
if(U_FAILURE(*pErrorCode)) {
break; /* cased letter at [titleStart..titleLimit[ */
}
}
- length=titleStart-prev;
- if(length>0) {
- if((destIndex+length)<=destCapacity) {
- u_memcpy(dest+destIndex, src+prev, length);
- }
- destIndex+=length;
+ destIndex=appendString(dest, destIndex, destCapacity, src+prev, titleStart-prev);
+ if(destIndex<0) {
+ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
}
}
}
} else {
/* Optionally just copy the rest of the word unchanged. */
- length=idx-titleLimit;
- if(length>(INT32_MAX-destIndex)) {
+ destIndex=appendString(dest, destIndex, destCapacity, src+titleLimit, idx-titleLimit);
+ if(destIndex<0) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
- if((destIndex+length)<=destCapacity) {
- u_memcpy(dest+destIndex, src+titleLimit, length);
- }
- destIndex+=length;
}
}
}
TESTCASE_AUTO(TestFullCaseFoldingIterator);
TESTCASE_AUTO(TestGreekUpper);
TESTCASE_AUTO(TestLongUpper);
+ TESTCASE_AUTO(TestMalformedUTF8);
TESTCASE_AUTO_END;
}
errorCode.errorName(), (long)destLength);
}
}
+
+void StringCaseTest::TestMalformedUTF8() {
+ // ticket #12639
+ IcuTestErrorCode errorCode(*this, "TestTitleMalformedUTF8");
+ LocalUCaseMapPointer csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT, errorCode));
+ if (errorCode.isFailure()) {
+ errln("ucasemap_open(English) failed - %s", errorCode.errorName());
+ return;
+ }
+ char src[1] = { (char)0x85 }; // malformed UTF-8
+ char dest[3] = { 0, 0, 0 };
+ int32_t destLength = ucasemap_utf8ToTitle(csm.getAlias(), dest, 3, src, 1, errorCode);
+ if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
+ errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
+ errorCode.errorName(), (int)destLength, dest[0]);
+ }
+
+ errorCode.reset();
+ dest[0] = 0;
+ destLength = ucasemap_utf8ToLower(csm.getAlias(), dest, 3, src, 1, errorCode);
+ if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
+ errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
+ errorCode.errorName(), (int)destLength, dest[0]);
+ }
+
+ errorCode.reset();
+ dest[0] = 0;
+ destLength = ucasemap_utf8ToUpper(csm.getAlias(), dest, 3, src, 1, errorCode);
+ if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
+ errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
+ errorCode.errorName(), (int)destLength, dest[0]);
+ }
+
+ errorCode.reset();
+ dest[0] = 0;
+ destLength = ucasemap_utf8FoldCase(csm.getAlias(), dest, 3, src, 1, errorCode);
+ if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
+ errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
+ errorCode.errorName(), (int)destLength, dest[0]);
+ }
+}
void TestFullCaseFoldingIterator();
void TestGreekUpper();
void TestLongUpper();
+ void TestMalformedUTF8();
private:
void assertGreekUpper(const char *s, const char *expected);