From e9503bdade45587ee4ad9ae9dd9fc07c60244dd2 Mon Sep 17 00:00:00 2001 From: Yoshito Umaoka Date: Mon, 14 Nov 2011 19:32:51 +0000 Subject: [PATCH] ICU-8909 Fixed various warnings reported by a source code analysis tool. X-SVN-Rev: 30958 --- icu4c/source/common/bmpset.cpp | 4 + icu4c/source/common/bytestriebuilder.cpp | 1 + icu4c/source/common/messagepattern.cpp | 2 +- icu4c/source/common/normalizer2.cpp | 7 +- icu4c/source/common/normalizer2impl.cpp | 2 +- icu4c/source/common/propsvec.c | 2 + icu4c/source/common/punycode.cpp | 5 +- icu4c/source/common/putil.cpp | 17 +++- icu4c/source/common/rbbi.cpp | 16 +++- icu4c/source/common/rbbidata.h | 4 +- icu4c/source/common/rbbirb.cpp | 19 ++-- icu4c/source/common/rbbiscan.cpp | 5 +- icu4c/source/common/serv.cpp | 47 +++++----- icu4c/source/common/stringtriebuilder.cpp | 9 +- icu4c/source/common/triedict.cpp | 6 +- icu4c/source/common/ubidiwrt.c | 24 +++-- icu4c/source/common/ucharstriebuilder.cpp | 1 + icu4c/source/common/ucnv2022.cpp | 12 ++- icu4c/source/common/ucnv_ct.c | 6 +- icu4c/source/common/ucnv_ext.cpp | 6 +- icu4c/source/common/ucnv_lmb.c | 8 +- icu4c/source/common/ucnv_u7.c | 2 + icu4c/source/common/ucnv_u8.c | 8 +- icu4c/source/common/ucnvbocu.cpp | 47 +++++----- icu4c/source/common/ucnvisci.c | 2 + icu4c/source/common/ucnvmbcs.c | 24 ++--- icu4c/source/common/ucnvscsu.c | 38 ++++---- icu4c/source/common/ucnvsel.cpp | 43 +++++---- icu4c/source/common/uloc.cpp | 18 ++-- icu4c/source/common/uloc_tag.c | 2 +- icu4c/source/common/unistr.cpp | 14 ++- icu4c/source/common/unistr_case.cpp | 4 +- icu4c/source/common/unorm.cpp | 88 ++++++++++--------- icu4c/source/common/unormcmp.cpp | 102 +++++++++++----------- icu4c/source/common/uresbund.cpp | 2 +- icu4c/source/common/uresdata.c | 78 ++++++++++------- icu4c/source/common/ushape.cpp | 11 ++- icu4c/source/common/ustrcase.cpp | 12 +-- icu4c/source/common/ustrtrns.cpp | 31 ++++--- icu4c/source/common/utext.cpp | 5 +- icu4c/source/common/utf_impl.c | 2 + icu4c/source/common/utrie2.cpp | 3 + icu4c/source/i18n/alphaindex.cpp | 11 ++- icu4c/source/i18n/calendar.cpp | 2 + icu4c/source/i18n/coleitr.cpp | 7 +- icu4c/source/i18n/colldata.cpp | 2 + icu4c/source/i18n/decNumber.c | 10 ++- icu4c/source/i18n/plurrule.cpp | 8 ++ icu4c/source/i18n/regexcmp.cpp | 1 + icu4c/source/i18n/tmutfmt.cpp | 8 +- icu4c/source/i18n/tzgnames.cpp | 1 + icu4c/source/i18n/tznames_impl.cpp | 3 + icu4c/source/i18n/ucol.cpp | 3 +- icu4c/source/i18n/ucol_elm.cpp | 2 + icu4c/source/i18n/umsg.cpp | 2 + icu4c/source/i18n/unum.cpp | 1 + icu4c/source/i18n/uregex.cpp | 13 ++- icu4c/source/i18n/uspoof_wsconf.cpp | 10 ++- icu4c/source/i18n/zonemeta.cpp | 3 + icu4c/source/io/ufile.c | 2 +- icu4c/source/io/ustdio.c | 2 +- 61 files changed, 501 insertions(+), 329 deletions(-) diff --git a/icu4c/source/common/bmpset.cpp b/icu4c/source/common/bmpset.cpp index e3f98cbeb73..af1876d4e61 100644 --- a/icu4c/source/common/bmpset.cpp +++ b/icu4c/source/common/bmpset.cpp @@ -20,6 +20,7 @@ #include "unicode/utf16.h" #include "cmemory.h" #include "bmpset.h" +#include "uassert.h" U_NAMESPACE_BEGIN @@ -63,6 +64,9 @@ BMPSet::~BMPSet() { * start>6; int32_t trail=start&0x3f; diff --git a/icu4c/source/common/bytestriebuilder.cpp b/icu4c/source/common/bytestriebuilder.cpp index 4f4adfb2313..c105dfec1ba 100644 --- a/icu4c/source/common/bytestriebuilder.cpp +++ b/icu4c/source/common/bytestriebuilder.cpp @@ -162,6 +162,7 @@ BytesTrieBuilder::add(const StringPiece &s, int32_t value, UErrorCode &errorCode BytesTrieElement *newElements=new BytesTrieElement[newCapacity]; if(newElements==NULL) { errorCode=U_MEMORY_ALLOCATION_ERROR; + return *this; // error instead of dereferencing null } if(elementsLength>0) { uprv_memcpy(newElements, elements, elementsLength*sizeof(BytesTrieElement)); diff --git a/icu4c/source/common/messagepattern.cpp b/icu4c/source/common/messagepattern.cpp index f77be8c2a38..b9c15f9c14b 100644 --- a/icu4c/source/common/messagepattern.cpp +++ b/icu4c/source/common/messagepattern.cpp @@ -923,7 +923,7 @@ MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity, } U_ASSERT(start=MIN_NORMAL_MAYBE_YES) { norm16&=0xff; norm16|=norm16<<8; diff --git a/icu4c/source/common/propsvec.c b/icu4c/source/common/propsvec.c index de897ff56d7..f91a155cf8d 100644 --- a/icu4c/source/common/propsvec.c +++ b/icu4c/source/common/propsvec.c @@ -23,6 +23,7 @@ #include "utrie2.h" #include "uarrsort.h" #include "propsvec.h" +#include "uassert.h" struct UPropsVectors { uint32_t *v; @@ -352,6 +353,7 @@ upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UE rows=pv->rows; columns=pv->columns; + U_ASSERT(columns>=3); /* upvec_open asserts this */ valueColumns=columns-2; /* not counting start & limit */ /* sort the properties vectors to find unique vector values */ diff --git a/icu4c/source/common/punycode.cpp b/icu4c/source/common/punycode.cpp index 825990324b1..e2b3a58915f 100644 --- a/icu4c/source/common/punycode.cpp +++ b/icu4c/source/common/punycode.cpp @@ -53,6 +53,7 @@ Disclaimer and license #include "cstring.h" #include "cmemory.h" #include "punycode.h" +#include "uassert.h" /* Punycode ----------------------------------------------------------------- */ @@ -407,6 +408,7 @@ u_strFromPunycode(const UChar *src, int32_t srcLength, } } destLength=basicLength=destCPCount=j; + U_ASSERT(destLength>=0); while(j>0) { b=src[--j]; @@ -520,7 +522,7 @@ u_strFromPunycode(const UChar *src, int32_t srcLength, /* Insert n at position i of the output: */ cpLength=U16_LENGTH(n); - if((destLength+cpLength)<=destCapacity) { + if(dest!=NULL && ((destLength+cpLength)<=destCapacity)) { int32_t codeUnitIndex; /* @@ -573,6 +575,7 @@ u_strFromPunycode(const UChar *src, int32_t srcLength, } } destLength+=cpLength; + U_ASSERT(destLength>=0); ++i; } diff --git a/icu4c/source/common/putil.cpp b/icu4c/source/common/putil.cpp index ae39d532527..ce585b42e4c 100644 --- a/icu4c/source/common/putil.cpp +++ b/icu4c/source/common/putil.cpp @@ -735,12 +735,14 @@ static void skipZoneIDPrefix(const char** id) { #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600) typedef struct OffsetZoneMapping { int32_t offsetSeconds; - int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/ + int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/ const char *stdID; const char *dstID; const char *olsonID; } OffsetZoneMapping; +enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 }; + /* This list tries to disambiguate a set of abbreviated timezone IDs and offsets and maps it to an Olson ID. @@ -749,7 +751,7 @@ icu/source/tools/tzcode/tz.alias Sometimes no daylight savings (0) is important to define due to aliases. This list can be tested with icu/source/test/compat/tzone.pl More values could be added to daylightType to increase precision. -*/ +g*/ static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = { {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"}, {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"}, @@ -1006,7 +1008,7 @@ uprv_tzname(int n) /* else U_TZNAME will give a better result. */ #endif -#if defined(CHECK_LOCALTIME_LINK) +#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) /* Caller must handle threading issues */ if (gTimeZoneBufferPtr == NULL) { /* @@ -1080,7 +1082,14 @@ uprv_tzname(int n) /* This probing will tell us when daylight savings occurs. */ localtime_r(&juneSolstice, &juneSol); localtime_r(&decemberSolstice, &decemberSol); - daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0); + if(decemberSol.tm_isdst > 0) { + daylightType = U_DAYLIGHT_DECEMBER; + } else if(juneSol.tm_isdst > 0) { + daylightType = U_DAYLIGHT_JUNE; + } else { + daylightType = U_DAYLIGHT_NONE; + } + printf("daylightType=%d\n", daylightType); tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone()); if (tzid != NULL) { return tzid; diff --git a/icu4c/source/common/rbbi.cpp b/icu4c/source/common/rbbi.cpp index 7eb258a99bd..7bc18f1e33a 100644 --- a/icu4c/source/common/rbbi.cpp +++ b/icu4c/source/common/rbbi.cpp @@ -1083,7 +1083,7 @@ int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) { } } - #ifdef RBBI_DEBUG + #ifdef RBBI_DEBUG if (fTrace) { RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(fText)); if (0x20<=c && c<0x7f) { @@ -1097,7 +1097,12 @@ int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) { // State Transition - move machine to its next state // - state = row->fNextState[category]; + + // Note: fNextState is defined as uint16_t[2], but we are casting + // a generated RBBI table to RBBIStateTableRow and some tables + // actually have more than 2 categories. + U_ASSERT(categoryfHeader->fCatCount); + state = row->fNextState[category]; /*Not accessing beyond memory*/ row = (RBBIStateTableRow *) // (statetable->fTableData + (statetable->fRowLen * state)); (tableData + tableRowLen * state); @@ -1312,7 +1317,12 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable) // State Transition - move machine to its next state // - state = row->fNextState[category]; + + // Note: fNextState is defined as uint16_t[2], but we are casting + // a generated RBBI table to RBBIStateTableRow and some tables + // actually have more than 2 categories. + U_ASSERT(categoryfHeader->fCatCount); + state = row->fNextState[category]; /*Not accessing beyond memory*/ row = (RBBIStateTableRow *) (statetable->fTableData + (statetable->fRowLen * state)); diff --git a/icu4c/source/common/rbbidata.h b/icu4c/source/common/rbbidata.h index e8ef322e4d7..7073b8d931d 100644 --- a/icu4c/source/common/rbbidata.h +++ b/icu4c/source/common/rbbidata.h @@ -113,8 +113,8 @@ struct RBBIStateTableRow { /* tags (rule status values) */ int16_t fReserved; uint16_t fNextState[2]; /* Next State, indexed by char category. */ - /* Array Size is fNumCols from the */ - /* state table header. */ + /* This array does not have two elements */ + /* Array Size is actually fData->fHeader->fCatCount */ /* CAUTION: see RBBITableBuilder::getTableSize() */ /* before changing anything here. */ }; diff --git a/icu4c/source/common/rbbirb.cpp b/icu4c/source/common/rbbirb.cpp index bbc11cd9751..12de395bca2 100644 --- a/icu4c/source/common/rbbirb.cpp +++ b/icu4c/source/common/rbbirb.cpp @@ -1,7 +1,7 @@ // // file: rbbirb.cpp // -// Copyright (C) 2002-2008, International Business Machines Corporation and others. +// Copyright (C) 2002-2011, International Business Machines Corporation and others. // All Rights Reserved. // // This file contains the RBBIRuleBuilder class implementation. This is the main class for @@ -260,19 +260,14 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules, builder.fReverseTables = new RBBITableBuilder(&builder, &builder.fReverseTree); builder.fSafeFwdTables = new RBBITableBuilder(&builder, &builder.fSafeFwdTree); builder.fSafeRevTables = new RBBITableBuilder(&builder, &builder.fSafeRevTree); - if (U_SUCCESS(status) - && (builder.fForwardTables == NULL || builder.fReverseTables == NULL || - builder.fSafeFwdTables == NULL || builder.fSafeRevTables == NULL)) + if (builder.fForwardTables == NULL || builder.fReverseTables == NULL || + builder.fSafeFwdTables == NULL || builder.fSafeRevTables == NULL) { status = U_MEMORY_ALLOCATION_ERROR; - } - - // Before building the tables, check to make sure the status is ok. - if (U_FAILURE(status)) { - delete builder.fForwardTables; builder.fForwardTables = NULL; - delete builder.fReverseTables; builder.fReverseTables = NULL; - delete builder.fSafeFwdTables; builder.fSafeFwdTables = NULL; - delete builder.fSafeRevTables; builder.fSafeRevTables = NULL; + delete builder.fForwardTables; builder.fForwardTables = NULL; + delete builder.fReverseTables; builder.fReverseTables = NULL; + delete builder.fSafeFwdTables; builder.fSafeFwdTables = NULL; + delete builder.fSafeRevTables; builder.fSafeRevTables = NULL; return NULL; } diff --git a/icu4c/source/common/rbbiscan.cpp b/icu4c/source/common/rbbiscan.cpp index 5e74d7e58c5..71ce31338fc 100644 --- a/icu4c/source/common/rbbiscan.cpp +++ b/icu4c/source/common/rbbiscan.cpp @@ -992,6 +992,7 @@ void RBBIRuleScanner::parse() { if (tableEl->fCharClass >= 128 && tableEl->fCharClass < 240 && // Table specs a char class && fC.fEscaped == FALSE && // char is not escaped && fC.fChar != (UChar32)-1) { // char is not EOF + U_ASSERT((tableEl->fCharClass-128) < sizeof(fRuleSets)/sizeof(fRuleSets[0])); if (fRuleSets[tableEl->fCharClass-128].contains(fC.fChar)) { // Table row specified a character class, or set of characters, // and the current char matches it. @@ -1149,8 +1150,9 @@ void RBBIRuleScanner::scanSet() { uset = new UnicodeSet(); if (uset == NULL) { localStatus = U_MEMORY_ALLOCATION_ERROR; + } else { + uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus); } - uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus); if (U_FAILURE(localStatus)) { // TODO: Get more accurate position of the error from UnicodeSet's return info. // UnicodeSet appears to not be reporting correctly at this time. @@ -1164,6 +1166,7 @@ void RBBIRuleScanner::scanSet() { // Verify that the set contains at least one code point. // + U_ASSERT(uset!=NULL); if (uset->isEmpty()) { // This set is empty. // Make it an error, because it almost certainly is not what the user wanted. diff --git a/icu4c/source/common/serv.cpp b/icu4c/source/common/serv.cpp index 6564312b99c..2cddcfe65dc 100644 --- a/icu4c/source/common/serv.cpp +++ b/icu4c/source/common/serv.cpp @@ -752,32 +752,33 @@ ICUService::getDisplayNames(UVector& result, if (dnCache == NULL) { const Hashtable* m = getVisibleIDMap(status); - if (m != NULL) { - ncthis->dnCache = new DNCache(locale); - if (dnCache == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return result; - } + if (U_FAILURE(status)) { + return result; + } + ncthis->dnCache = new DNCache(locale); + if (dnCache == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return result; + } - int32_t pos = -1; - const UHashElement* entry = NULL; - while ((entry = m->nextElement(pos)) != NULL) { - const UnicodeString* id = (const UnicodeString*)entry->key.pointer; - ICUServiceFactory* f = (ICUServiceFactory*)entry->value.pointer; - UnicodeString dname; - f->getDisplayName(*id, locale, dname); - if (dname.isBogus()) { - status = U_MEMORY_ALLOCATION_ERROR; - } else { - dnCache->cache.put(dname, (void*)id, status); // share pointer with visibleIDMap - if (U_SUCCESS(status)) { - continue; - } + int32_t pos = -1; + const UHashElement* entry = NULL; + while ((entry = m->nextElement(pos)) != NULL) { + const UnicodeString* id = (const UnicodeString*)entry->key.pointer; + ICUServiceFactory* f = (ICUServiceFactory*)entry->value.pointer; + UnicodeString dname; + f->getDisplayName(*id, locale, dname); + if (dname.isBogus()) { + status = U_MEMORY_ALLOCATION_ERROR; + } else { + dnCache->cache.put(dname, (void*)id, status); // share pointer with visibleIDMap + if (U_SUCCESS(status)) { + continue; } - delete dnCache; - ncthis->dnCache = NULL; - return result; } + delete dnCache; + ncthis->dnCache = NULL; + return result; } } } diff --git a/icu4c/source/common/stringtriebuilder.cpp b/icu4c/source/common/stringtriebuilder.cpp index 327661825d3..e3e58eb6c4f 100644 --- a/icu4c/source/common/stringtriebuilder.cpp +++ b/icu4c/source/common/stringtriebuilder.cpp @@ -47,11 +47,12 @@ StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode } nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL, sizeGuess, &errorCode); - if(U_SUCCESS(errorCode) && nodes==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } if(U_SUCCESS(errorCode)) { - uhash_setKeyDeleter(nodes, uprv_deleteUObject); + if(nodes==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } else { + uhash_setKeyDeleter(nodes, uprv_deleteUObject); + } } } diff --git a/icu4c/source/common/triedict.cpp b/icu4c/source/common/triedict.cpp index 4f8c9965ae3..e905a40966e 100644 --- a/icu4c/source/common/triedict.cpp +++ b/icu4c/source/common/triedict.cpp @@ -275,7 +275,7 @@ public: where = (StackBranch) fBranchStack.push(kLessThan, status); break; } - case kEqual: + case kEqual: /*fall through*/ emit = (node->flags & kEndsWord) != 0; equal = (node->equal != NULL); // If this node should be part of the next emitted string, append @@ -296,7 +296,7 @@ public: if (equal) { break; } - case kGreaterThan: + case kGreaterThan: /*fall through*/ // If this node's character is in the string, remove it. if (node->equal != NULL || (node->flags & kEndsWord)) { unistr.truncate(unistr.length()-1); @@ -307,7 +307,7 @@ public: where = (StackBranch) fBranchStack.push(kLessThan, status); break; } - case kDone: + case kDone: /*fall through*/ fNodeStack.pop(); fBranchStack.popi(); node = (TernaryNode *) fNodeStack.peek(); diff --git a/icu4c/source/common/ubidiwrt.c b/icu4c/source/common/ubidiwrt.c index 3f1053e39f9..f554f35c4a0 100644 --- a/icu4c/source/common/ubidiwrt.c +++ b/icu4c/source/common/ubidiwrt.c @@ -447,7 +447,9 @@ ubidi_writeReordered(UBiDi *pBiDi, dest, destSize, options, pErrorCode); } - dest+=runLength; + if(dest!=NULL) { + dest+=runLength; + } destSize-=runLength; } } else { @@ -489,7 +491,9 @@ ubidi_writeReordered(UBiDi *pBiDi, runLength=doWriteForward(src, runLength, dest, destSize, (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); - dest+=runLength; + if(dest!=NULL) { + dest+=runLength; + } destSize-=runLength; if((pBiDi->isInverse) && @@ -531,7 +535,9 @@ ubidi_writeReordered(UBiDi *pBiDi, runLength=doWriteReverse(src, runLength, dest, destSize, options, pErrorCode); - dest+=runLength; + if(dest!=NULL) { + dest+=runLength; + } destSize-=runLength; if((pBiDi->isInverse) && @@ -568,7 +574,9 @@ ubidi_writeReordered(UBiDi *pBiDi, dest, destSize, options, pErrorCode); } - dest+=runLength; + if(dest!=NULL) { + dest+=runLength; + } destSize-=runLength; } } else { @@ -593,7 +601,9 @@ ubidi_writeReordered(UBiDi *pBiDi, runLength=doWriteReverse(src, runLength, dest, destSize, (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); - dest+=runLength; + if(dest!=NULL) { + dest+=runLength; + } destSize-=runLength; if(/*run>0 &&*/ dirProps[logicalStart]!=L) { @@ -613,7 +623,9 @@ ubidi_writeReordered(UBiDi *pBiDi, runLength=doWriteForward(src, runLength, dest, destSize, options, pErrorCode); - dest+=runLength; + if(dest!=NULL) { + dest+=runLength; + } destSize-=runLength; if(/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1]))) { diff --git a/icu4c/source/common/ucharstriebuilder.cpp b/icu4c/source/common/ucharstriebuilder.cpp index 24b46f50837..b304a3b42c3 100644 --- a/icu4c/source/common/ucharstriebuilder.cpp +++ b/icu4c/source/common/ucharstriebuilder.cpp @@ -112,6 +112,7 @@ UCharsTrieBuilder::add(const UnicodeString &s, int32_t value, UErrorCode &errorC UCharsTrieElement *newElements=new UCharsTrieElement[newCapacity]; if(newElements==NULL) { errorCode=U_MEMORY_ALLOCATION_ERROR; + return *this; } if(elementsLength>0) { uprv_memcpy(newElements, elements, elementsLength*sizeof(UCharsTrieElement)); diff --git a/icu4c/source/common/ucnv2022.cpp b/icu4c/source/common/ucnv2022.cpp index 404303658d3..90b4532f100 100644 --- a/icu4c/source/common/ucnv2022.cpp +++ b/icu4c/source/common/ucnv2022.cpp @@ -41,6 +41,7 @@ #include "ucnvmbcs.h" #include "cstring.h" #include "cmemory.h" +#include "uassert.h" #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) @@ -2836,7 +2837,7 @@ getTrailByte: * */ -/* The following are defined this way to make the strings truely readonly */ +/* The following are defined this way to make the strings truly readonly */ static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41"; static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45"; static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47"; @@ -2849,9 +2850,9 @@ static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D"; /********************** ISO2022-CN Data **************************/ static const char* const escSeqCharsCN[10] ={ - SHIFT_IN_STR, /* ASCII */ - GB_2312_80_STR, - ISO_IR_165_STR, + SHIFT_IN_STR, /* 0 ASCII */ + GB_2312_80_STR, /* 1 GB2312_1 */ + ISO_IR_165_STR, /* 2 ISO_IR_165 */ CNS_11643_1992_Plane_1_STR, CNS_11643_1992_Plane_2_STR, CNS_11643_1992_Plane_3_STR, @@ -3056,6 +3057,7 @@ getTrail: } } else { /* GB2312_1 or ISO-IR-165 */ + U_ASSERT(cs0myConverterArray[cs0], sourceChar, @@ -3081,6 +3083,7 @@ getTrail: if(cs < CNS_11643) { uprv_memcpy(buffer, escSeqCharsCN[cs], 4); } else { + U_ASSERT(cs >= CNS_11643_1); uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4); } len = 4; @@ -3339,6 +3342,7 @@ getTrailByte: tempBufLen = 3; }else{ + U_ASSERT(tempStatemyConverterArray[tempState]; tempBuf[0] = (char) (mySourceChar); tempBuf[1] = (char) trailByte; diff --git a/icu4c/source/common/ucnv_ct.c b/icu4c/source/common/ucnv_ct.c index 16f43d27754..393bd3f515f 100644 --- a/icu4c/source/common/ucnv_ct.c +++ b/icu4c/source/common/ucnv_ct.c @@ -341,7 +341,7 @@ UConverter_fromUnicode_CompoundText_OFFSETS(UConverterFromUnicodeArgs* args, UEr COMPOUND_TEXT_CONVERTERS currentState, tmpState; uint32_t pValue; int32_t pValueLength = 0; - int32_t i, n; + int32_t i, n, j; UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) cnv->extraInfo; @@ -410,8 +410,8 @@ getTrail: tmpState = (COMPOUND_TEXT_CONVERTERS)i; if (currentState != tmpState) { currentState = tmpState; - for (i = 0; escSeqCompoundText[currentState][i] != 0; i++) { - tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][i]; + for (j = 0; escSeqCompoundText[currentState][j] != 0; j++) { + tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][j]; } } for (n = (pValueLength - 1); n >= 0; n--) { diff --git a/icu4c/source/common/ucnv_ext.cpp b/icu4c/source/common/ucnv_ext.cpp index 898a235713e..51d1ba03375 100644 --- a/icu4c/source/common/ucnv_ext.cpp +++ b/icu4c/source/common/ucnv_ext.cpp @@ -25,6 +25,7 @@ #include "ucnv_cnv.h" #include "ucnv_ext.h" #include "cmemory.h" +#include "uassert.h" /* to Unicode --------------------------------------------------------------- */ @@ -690,9 +691,9 @@ ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx, switch(length) { case 3: *p++=(uint8_t)(value>>16); - case 2: + case 2: /*fall through*/ *p++=(uint8_t)(value>>8); - case 1: + case 1: /*fall through*/ *p++=(uint8_t)value; default: break; /* will never occur */ @@ -902,6 +903,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv, s=pArgs->source; match=-match-2; /* remove 2 for the initial code point */ for(j=cnv->preFromULength; j=0); cnv->preFromU[j]=*s++; } pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ diff --git a/icu4c/source/common/ucnv_lmb.c b/icu4c/source/common/ucnv_lmb.c index 275e7d52679..3df255692d4 100644 --- a/icu4c/source/common/ucnv_lmb.c +++ b/icu4c/source/common/ucnv_lmb.c @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2000-2010, International Business Machines +* Copyright (C) 2000-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ucnv_lmb.cpp @@ -798,11 +798,11 @@ LMBCSConversionWorker ( { case 4: *pLMBCS++ = (ulmbcs_byte_t)(value >> 24); - case 3: + case 3: /*fall through*/ *pLMBCS++ = (ulmbcs_byte_t)(value >> 16); - case 2: + case 2: /*fall through*/ *pLMBCS++ = (ulmbcs_byte_t)(value >> 8); - case 1: + case 1: /*fall through*/ *pLMBCS++ = (ulmbcs_byte_t)value; default: /* will never occur */ diff --git a/icu4c/source/common/ucnv_u7.c b/icu4c/source/common/ucnv_u7.c index 84613283889..42943f4129a 100644 --- a/icu4c/source/common/ucnv_u7.c +++ b/icu4c/source/common/ucnv_u7.c @@ -21,6 +21,7 @@ #include "unicode/ucnv.h" #include "ucnv_bld.h" #include "ucnv_cnv.h" +#include "uassert.h" /* UTF-7 -------------------------------------------------------------------- */ @@ -486,6 +487,7 @@ _UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, inDirectMode=(UBool)((status>>24)&1); base64Counter=(int8_t)(status>>16); bits=(uint8_t)status; + U_ASSERT(bits<=sizeof(toBase64)/sizeof(toBase64[0])); } /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */ diff --git a/icu4c/source/common/ucnv_u8.c b/icu4c/source/common/ucnv_u8.c index 6ff1f5fa224..8b265cb0fc4 100644 --- a/icu4c/source/common/ucnv_u8.c +++ b/icu4c/source/common/ucnv_u8.c @@ -671,7 +671,7 @@ static UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args, break; } ++source; - case 5: + case 5: /*fall through*/ ch += (myByte = *source); ch <<= 6; if (!U8_IS_TRAIL(myByte)) @@ -680,7 +680,7 @@ static UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args, break; } ++source; - case 4: + case 4: /*fall through*/ ch += (myByte = *source); ch <<= 6; if (!U8_IS_TRAIL(myByte)) @@ -689,7 +689,7 @@ static UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args, break; } ++source; - case 3: + case 3: /*fall through*/ ch += (myByte = *source); ch <<= 6; if (!U8_IS_TRAIL(myByte)) @@ -698,7 +698,7 @@ static UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args, break; } ++source; - case 2: + case 2: /*fall through*/ ch += (myByte = *source); if (!U8_IS_TRAIL(myByte)) { diff --git a/icu4c/source/common/ucnvbocu.cpp b/icu4c/source/common/ucnvbocu.cpp index 9898f50c399..b97d6662c7d 100644 --- a/icu4c/source/common/ucnvbocu.cpp +++ b/icu4c/source/common/ucnvbocu.cpp @@ -27,6 +27,7 @@ #include "putilimp.h" #include "ucnv_bld.h" #include "ucnv_cnv.h" +#include "uassert.h" /* BOCU-1 constants and macros ---------------------------------------------- */ @@ -208,6 +209,17 @@ bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={ } \ } +/* Faster versions of packDiff() for single-byte-encoded diff values. */ + +/** Is a diff value encodable in a single byte? */ +#define DIFF_IS_SINGLE(diff) (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1) + +/** Encode a diff value in a single byte. */ +#define PACK_SINGLE_DIFF(diff) (BOCU1_MIDDLE+(diff)) + +/** Is a diff value encodable in two bytes? */ +#define DIFF_IS_DOUBLE(diff) (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2) + /* BOCU-1 implementation functions ------------------------------------------ */ #define BOCU1_SIMPLE_PREV(c) (((c)&~0x7f)+BOCU1_ASCII_PREV) @@ -256,7 +268,7 @@ bocu1Prev(int32_t c) { * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes * and return a packed integer with them. * - * The encoding favors small absolut differences with short encodings + * The encoding favors small absolute differences with short encodings * to compress runs of same-script characters. * * Optimized version with unrolled loops and fewer floating-point operations @@ -273,6 +285,7 @@ static int32_t packDiff(int32_t diff) { int32_t result, m; + U_ASSERT(!DIFF_IS_SINGLE(diff)); /* assume we won't be called where diff==BOCU1_REACH_NEG_1=-64 */ if(diff>=BOCU1_REACH_NEG_1) { /* mostly positive differences, and single-byte negative ones */ #if 0 /* single-byte case handled in macros, see below */ @@ -372,16 +385,6 @@ packDiff(int32_t diff) { return result; } -/* Faster versions of packDiff() for single-byte-encoded diff values. */ - -/** Is a diff value encodable in a single byte? */ -#define DIFF_IS_SINGLE(diff) (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1) - -/** Encode a diff value in a single byte. */ -#define PACK_SINGLE_DIFF(diff) (BOCU1_MIDDLE+(diff)) - -/** Is a diff value encodable in two bytes? */ -#define DIFF_IS_DOUBLE(diff) (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2) static void _Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, @@ -548,10 +551,10 @@ getTrail: case 4: *target++=(uint8_t)(diff>>24); *offsets++=sourceIndex; - case 3: + case 3: /*fall through*/ *target++=(uint8_t)(diff>>16); *offsets++=sourceIndex; - case 2: + case 2: /*fall through*/ *target++=(uint8_t)(diff>>8); *offsets++=sourceIndex; /* case 1: handled above */ @@ -579,9 +582,9 @@ getTrail: /* each branch falls through to the next one */ case 3: *charErrorBuffer++=(uint8_t)(diff>>16); - case 2: + case 2: /*fall through*/ *charErrorBuffer++=(uint8_t)(diff>>8); - case 1: + case 1: /*fall through*/ *charErrorBuffer=(uint8_t)diff; default: /* will never occur */ @@ -596,10 +599,10 @@ getTrail: case 3: *target++=(uint8_t)(diff>>16); *offsets++=sourceIndex; - case 2: + case 2: /*fall through*/ *target++=(uint8_t)(diff>>8); *offsets++=sourceIndex; - case 1: + case 1: /*fall through*/ *target++=(uint8_t)diff; *offsets++=sourceIndex; default: @@ -776,7 +779,7 @@ getTrail: /* each branch falls through to the next one */ case 4: *target++=(uint8_t)(diff>>24); - case 3: + case 3: /*fall through*/ *target++=(uint8_t)(diff>>16); /* case 2: handled above */ *target++=(uint8_t)(diff>>8); @@ -803,9 +806,9 @@ getTrail: /* each branch falls through to the next one */ case 3: *charErrorBuffer++=(uint8_t)(diff>>16); - case 2: + case 2: /*fall through*/ *charErrorBuffer++=(uint8_t)(diff>>8); - case 1: + case 1: /*fall through*/ *charErrorBuffer=(uint8_t)diff; default: /* will never occur */ @@ -819,9 +822,9 @@ getTrail: /* each branch falls through to the next one */ case 3: *target++=(uint8_t)(diff>>16); - case 2: + case 2: /*fall through*/ *target++=(uint8_t)(diff>>8); - case 1: + case 1: /*fall through*/ *target++=(uint8_t)diff; default: /* will never occur */ diff --git a/icu4c/source/common/ucnvisci.c b/icu4c/source/common/ucnvisci.c index 696fde4c682..055a0a497c9 100644 --- a/icu4c/source/common/ucnvisci.c +++ b/icu4c/source/common/ucnvisci.c @@ -26,6 +26,7 @@ #include "ucnv_bld.h" #include "ucnv_cnv.h" #include "cstring.h" +#include "uassert.h" #define UCNV_OPTIONS_VERSION_MASK 0xf #define NUKTA 0x093c @@ -1333,6 +1334,7 @@ static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *ar i=1; found=FALSE; for (; i>24); - case 3: + case 3: /*fall through*/ *target++=(uint8_t)(value>>16); - case 2: + case 2: /*fall through*/ *target++=(uint8_t)(value>>8); - case 1: + case 1: /*fall through*/ *target++=(uint8_t)value; default: /* will never occur */ @@ -4513,13 +4513,13 @@ unassigned: case 4: *target++=(uint8_t)(value>>24); *offsets++=sourceIndex; - case 3: + case 3: /*fall through*/ *target++=(uint8_t)(value>>16); *offsets++=sourceIndex; - case 2: + case 2: /*fall through*/ *target++=(uint8_t)(value>>8); *offsets++=sourceIndex; - case 1: + case 1: /*fall through*/ *target++=(uint8_t)value; *offsets++=sourceIndex; default: @@ -4544,9 +4544,9 @@ unassigned: /* each branch falls through to the next one */ case 3: *charErrorBuffer++=(uint8_t)(value>>16); - case 2: + case 2: /*fall through*/ *charErrorBuffer++=(uint8_t)(value>>8); - case 1: + case 1: /*fall through*/ *charErrorBuffer=(uint8_t)value; default: /* will never occur */ @@ -4563,12 +4563,12 @@ unassigned: if(offsets!=NULL) { *offsets++=sourceIndex; } - case 2: + case 2: /*fall through*/ *target++=(uint8_t)(value>>8); if(offsets!=NULL) { *offsets++=sourceIndex; } - case 1: + case 1: /*fall through*/ *target++=(uint8_t)value; if(offsets!=NULL) { *offsets++=sourceIndex; diff --git a/icu4c/source/common/ucnvscsu.c b/icu4c/source/common/ucnvscsu.c index cf2a0a5c519..c6e96e1f074 100644 --- a/icu4c/source/common/ucnvscsu.c +++ b/icu4c/source/common/ucnvscsu.c @@ -1395,11 +1395,11 @@ outputBytes: /* each branch falls through to the next one */ case 4: *target++=(uint8_t)(c>>24); - case 3: + case 3: /*fall through*/ *target++=(uint8_t)(c>>16); - case 2: + case 2: /*fall through*/ *target++=(uint8_t)(c>>8); - case 1: + case 1: /*fall through*/ *target++=(uint8_t)c; default: /* will never occur */ @@ -1411,13 +1411,13 @@ outputBytes: case 4: *target++=(uint8_t)(c>>24); *offsets++=sourceIndex; - case 3: + case 3: /*fall through*/ *target++=(uint8_t)(c>>16); *offsets++=sourceIndex; - case 2: + case 2: /*fall through*/ *target++=(uint8_t)(c>>8); *offsets++=sourceIndex; - case 1: + case 1: /*fall through*/ *target++=(uint8_t)c; *offsets++=sourceIndex; default: @@ -1448,11 +1448,11 @@ outputBytes: /* each branch falls through to the next one */ case 4: *p++=(uint8_t)(c>>24); - case 3: + case 3: /*fall through*/ *p++=(uint8_t)(c>>16); - case 2: + case 2: /*fall through*/ *p++=(uint8_t)(c>>8); - case 1: + case 1: /*fall through*/ *p=(uint8_t)c; default: /* will never occur */ @@ -1469,12 +1469,12 @@ outputBytes: if(offsets!=NULL) { *offsets++=sourceIndex; } - case 2: + case 2: /*fall through*/ *target++=(uint8_t)(c>>8); if(offsets!=NULL) { *offsets++=sourceIndex; } - case 1: + case 1: /*fall through*/ *target++=(uint8_t)c; if(offsets!=NULL) { *offsets++=sourceIndex; @@ -1853,11 +1853,11 @@ outputBytes: /* each branch falls through to the next one */ case 4: *target++=(uint8_t)(c>>24); - case 3: + case 3: /*fall through*/ *target++=(uint8_t)(c>>16); - case 2: + case 2: /*fall through*/ *target++=(uint8_t)(c>>8); - case 1: + case 1: /*fall through*/ *target++=(uint8_t)c; default: /* will never occur */ @@ -1885,11 +1885,11 @@ outputBytes: /* each branch falls through to the next one */ case 4: *p++=(uint8_t)(c>>24); - case 3: + case 3: /*fall through*/ *p++=(uint8_t)(c>>16); - case 2: + case 2: /*fall through*/ *p++=(uint8_t)(c>>8); - case 1: + case 1: /*fall through*/ *p=(uint8_t)c; default: /* will never occur */ @@ -1903,9 +1903,9 @@ outputBytes: /* each branch falls through to the next one */ case 3: *target++=(uint8_t)(c>>16); - case 2: + case 2: /*fall through*/ *target++=(uint8_t)(c>>8); - case 1: + case 1: /*fall through*/ *target++=(uint8_t)c; default: break; diff --git a/icu4c/source/common/ucnvsel.cpp b/icu4c/source/common/ucnvsel.cpp index 03744df94d4..e0e86999988 100644 --- a/icu4c/source/common/ucnvsel.cpp +++ b/icu4c/source/common/ucnvsel.cpp @@ -757,19 +757,21 @@ ucnvsel_selectForString(const UConverterSelector* sel, } uprv_memset(mask, ~0, columns *4); - const UChar *limit; - if (length >= 0) { - limit = s + length; - } else { - limit = NULL; - } - - while (limit == NULL ? *s != 0 : s != limit) { - UChar32 c; - uint16_t pvIndex; - UTRIE2_U16_NEXT16(sel->trie, s, limit, c, pvIndex); - if (intersectMasks(mask, sel->pv+pvIndex, columns)) { - break; + if(s!=NULL) { + const UChar *limit; + if (length >= 0) { + limit = s + length; + } else { + limit = NULL; + } + + while (limit == NULL ? *s != 0 : s != limit) { + UChar32 c; + uint16_t pvIndex; + UTRIE2_U16_NEXT16(sel->trie, s, limit, c, pvIndex); + if (intersectMasks(mask, sel->pv+pvIndex, columns)) { + break; + } } } return selectForMask(sel, mask, status); @@ -800,13 +802,16 @@ ucnvsel_selectForUTF8(const UConverterSelector* sel, if (length < 0) { length = (int32_t)uprv_strlen(s); } - const char *limit = s + length; - while (s != limit) { - uint16_t pvIndex; - UTRIE2_U8_NEXT16(sel->trie, s, limit, pvIndex); - if (intersectMasks(mask, sel->pv+pvIndex, columns)) { - break; + if(s!=NULL) { + const char *limit = s + length; + + while (s != limit) { + uint16_t pvIndex; + UTRIE2_U8_NEXT16(sel->trie, s, limit, pvIndex); + if (intersectMasks(mask, sel->pv+pvIndex, columns)) { + break; + } } } return selectForMask(sel, mask, status); diff --git a/icu4c/source/common/uloc.cpp b/icu4c/source/common/uloc.cpp index 009ebb8d46e..7d2b1f235c1 100644 --- a/icu4c/source/common/uloc.cpp +++ b/icu4c/source/common/uloc.cpp @@ -867,6 +867,7 @@ uloc_getKeywordValue(const char* localeID, /* trim trailing spaces */ while(startSearchHere[i-1] == ' ') { i--; + U_ASSERT(i>=0); } localeKeywordNameBuffer[i] = 0; @@ -1008,6 +1009,7 @@ uloc_setKeywordValue(const char* keywordName, while(keywordStart[i-1] == ' ') { i--; } + U_ASSERT(i>=0); localeKeywordNameBuffer[i] = 0; nextSeparator = uprv_strchr(nextEqualsign, ';'); @@ -1225,6 +1227,7 @@ ulocimp_getLanguage(const char *localeID, language[i]=(char)uprv_tolower(*localeID); } if(i<3) { + U_ASSERT(i>=0); lang[i]=(char)uprv_tolower(*localeID); } i++; @@ -1670,7 +1673,8 @@ _canonicalize(const char* localeID, } ++len; - scriptSize=ulocimp_getScript(tmpLocaleID+1, name+len, nameCapacity-len, &scriptID); + scriptSize=ulocimp_getScript(tmpLocaleID+1, + (len 0) { /* Found optional script */ tmpLocaleID = scriptID; @@ -1687,7 +1691,8 @@ _canonicalize(const char* localeID, if (_isIDSeparator(*tmpLocaleID)) { const char *cntryID; - int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1, name+len, nameCapacity-len, &cntryID); + int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1, + (len 0) { /* Found optional country */ tmpLocaleID = cntryID; @@ -1703,9 +1708,10 @@ _canonicalize(const char* localeID, ++len; } - variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID, name+len, nameCapacity-len); + variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID, + (len 0) { - variant = name+len; + variant = len> 15 | 1); @@ -1320,6 +1321,9 @@ UnicodeString::doReplace(int32_t start, // optimize append() onto a large-enough, owned string if(start >= oldLength) { + if(srcLength == 0) { + return *this; + } newLength = oldLength + srcLength; if(newLength <= getCapacity() && isBufferWritable()) { UChar *oldArray = getArrayStart(); @@ -1614,7 +1618,7 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, newCapacity > getCapacity() ) { // check growCapacity for default value and use of the stack buffer - if(growCapacity == -1) { + if(growCapacity < 0) { growCapacity = newCapacity; } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) { growCapacity = US_STACKBUF_SIZE; @@ -1626,6 +1630,7 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, uint8_t flags = fFlags; if(flags&kUsingStackBuffer) { + U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */ if(doCopyArray && growCapacity > US_STACKBUF_SIZE) { // copy the stack buffer contents because it will be overwritten with // fUnion.fFields values @@ -1636,6 +1641,7 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, } } else { oldArray = fUnion.fFields.fArray; + U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */ } // allocate a new array diff --git a/icu4c/source/common/unistr_case.cpp b/icu4c/source/common/unistr_case.cpp index bab50d4ffbc..8b1023a5e10 100644 --- a/icu4c/source/common/unistr_case.cpp +++ b/icu4c/source/common/unistr_case.cpp @@ -57,7 +57,9 @@ UnicodeString::doCaseCompare(int32_t start, const UChar *chars = getArrayStart(); chars += start; - srcChars += srcStart; + if(srcStart!=0) { + srcChars += srcStart; + } if(chars != srcChars) { UErrorCode errorCode=U_ZERO_ERROR; diff --git a/icu4c/source/common/unorm.cpp b/icu4c/source/common/unorm.cpp index d31682183ee..9ca56bfae47 100644 --- a/icu4c/source/common/unorm.cpp +++ b/icu4c/source/common/unorm.cpp @@ -114,31 +114,15 @@ unorm_normalize(const UChar *src, int32_t srcLength, /* iteration functions ------------------------------------------------------ */ static int32_t -unorm_iterate(UCharIterator *src, UBool forward, +_iterate(UCharIterator *src, UBool forward, UChar *dest, int32_t destCapacity, - UNormalizationMode mode, int32_t options, + const Normalizer2 *n2, UBool doNormalize, UBool *pNeededToNormalize, UErrorCode *pErrorCode) { - const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); - const UnicodeSet *uni32; - if(options&UNORM_UNICODE_3_2) { - uni32=uniset_getUnicode32Instance(*pErrorCode); - } else { - uni32=NULL; // unused - } - if(U_FAILURE(*pErrorCode)) { return 0; } - - FilteredNormalizer2 fn2(*n2, *uni32); - if(options&UNORM_UNICODE_3_2) { - n2=&fn2; - } - - if( destCapacity<0 || (dest==NULL && destCapacity>0) || - src==NULL - ) { + if(destCapacity<0 || (dest==NULL && destCapacity>0) || src==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } @@ -189,6 +173,26 @@ unorm_iterate(UCharIterator *src, UBool forward, } } +static int32_t +unorm_iterate(UCharIterator *src, UBool forward, + UChar *dest, int32_t destCapacity, + UNormalizationMode mode, int32_t options, + UBool doNormalize, UBool *pNeededToNormalize, + UErrorCode *pErrorCode) { + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); + if(options&UNORM_UNICODE_3_2) { + const UnicodeSet *uni32 = uniset_getUnicode32Instance(*pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return 0; + } + FilteredNormalizer2 fn2(*n2, *uni32); + return _iterate(src, forward, dest, destCapacity, + &fn2, doNormalize, pNeededToNormalize, pErrorCode); + } + return _iterate(src, forward, dest, destCapacity, + n2, doNormalize, pNeededToNormalize, pErrorCode); +} + U_CAPI int32_t U_EXPORT2 unorm_previous(UCharIterator *src, UChar *dest, int32_t destCapacity, @@ -217,33 +221,17 @@ unorm_next(UCharIterator *src, /* Concatenation of normalized strings -------------------------------------- */ -U_CAPI int32_t U_EXPORT2 -unorm_concatenate(const UChar *left, int32_t leftLength, +static int32_t +_concatenate(const UChar *left, int32_t leftLength, const UChar *right, int32_t rightLength, UChar *dest, int32_t destCapacity, - UNormalizationMode mode, int32_t options, + const Normalizer2 *n2, UErrorCode *pErrorCode) { - const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); - const UnicodeSet *uni32; - if(options&UNORM_UNICODE_3_2) { - uni32=uniset_getUnicode32Instance(*pErrorCode); - } else { - uni32=NULL; // unused - } - if(U_FAILURE(*pErrorCode)) { return 0; } - - FilteredNormalizer2 fn2(*n2, *uni32); - if(options&UNORM_UNICODE_3_2) { - n2=&fn2; - } - - if( destCapacity<0 || (dest==NULL && destCapacity>0) || - left==NULL || leftLength<-1 || - right==NULL || rightLength<-1 - ) { + if(destCapacity<0 || (dest==NULL && destCapacity>0) || + left==NULL || leftLength<-1 || right==NULL || rightLength<-1) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } @@ -269,4 +257,24 @@ unorm_concatenate(const UChar *left, int32_t leftLength, extract(dest, destCapacity, *pErrorCode); } +U_CAPI int32_t U_EXPORT2 +unorm_concatenate(const UChar *left, int32_t leftLength, + const UChar *right, int32_t rightLength, + UChar *dest, int32_t destCapacity, + UNormalizationMode mode, int32_t options, + UErrorCode *pErrorCode) { + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); + if(options&UNORM_UNICODE_3_2) { + const UnicodeSet *uni32 = uniset_getUnicode32Instance(*pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return 0; + } + FilteredNormalizer2 fn2(*n2, *uni32); + return _concatenate(left, leftLength, right, rightLength, + dest, destCapacity, &fn2, pErrorCode); + } + return _concatenate(left, leftLength, right, rightLength, + dest, destCapacity, n2, pErrorCode); +} + #endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/common/unormcmp.cpp b/icu4c/source/common/unormcmp.cpp index f53f8660af6..521e355883a 100644 --- a/icu4c/source/common/unormcmp.cpp +++ b/icu4c/source/common/unormcmp.cpp @@ -233,10 +233,10 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, /* reached end of level buffer, pop one level */ do { --level1; - start1=stack1[level1].start; + start1=stack1[level1].start; /*Not uninitialized*/ } while(start1==NULL); - s1=stack1[level1].s; - limit1=stack1[level1].limit; + s1=stack1[level1].s; /*Not uninitialized*/ + limit1=stack1[level1].limit; /*Not uninitialized*/ } } @@ -256,10 +256,10 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, /* reached end of level buffer, pop one level */ do { --level2; - start2=stack2[level2].start; + start2=stack2[level2].start; /*Not uninitialized*/ } while(start2==NULL); - s2=stack2[level2].s; - limit2=stack2[level2].limit; + s2=stack2[level2].s; /*Not uninitialized*/ + limit2=stack2[level2].limit; /*Not uninitialized*/ } } @@ -534,6 +534,35 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, } } +static +UBool _normalize(const Normalizer2 *n2, const UChar *s, int32_t length, + UnicodeString &normalized, UErrorCode *pErrorCode) { + UnicodeString str(length<0, s, length); + + // check if s fulfill the conditions + int32_t spanQCYes=n2->spanQuickCheckYes(str, *pErrorCode); + if (U_FAILURE(*pErrorCode)) { + return FALSE; + } + /* + * ICU 2.4 had a further optimization: + * If both strings were not in FCD, then they were both NFD'ed, + * and the _COMPARE_EQUIV option was turned off. + * It is not entirely clear that this is valid with the current + * definition of the canonical caseless match. + * Therefore, ICU 2.6 removes that optimization. + */ + if(spanQCYesnormalizeSecondAndAppend(normalized, unnormalized, *pErrorCode); + if (U_SUCCESS(*pErrorCode)) { + return TRUE; + } + } + return FALSE; +} + U_CAPI int32_t U_EXPORT2 unorm_compare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, @@ -584,51 +613,26 @@ unorm_compare(const UChar *s1, int32_t length1, return 0; } - // check if s1 and/or s2 fulfill the FCD conditions - const UnicodeSet *uni32; if(normOptions&UNORM_UNICODE_3_2) { - uni32=uniset_getUnicode32Instance(*pErrorCode); + const UnicodeSet *uni32=uniset_getUnicode32Instance(*pErrorCode); + FilteredNormalizer2 fn2(*n2, *uni32); + if(_normalize(&fn2, s1, length1, fcd1, pErrorCode)) { + s1=fcd1.getBuffer(); + length1=fcd1.length(); + } + if(_normalize(&fn2, s2, length2, fcd2, pErrorCode)) { + s2=fcd2.getBuffer(); + length2=fcd2.length(); + } } else { - uni32=NULL; // unused - } - if(U_FAILURE(*pErrorCode)) { - return 0; - } - FilteredNormalizer2 fn2(*n2, *uni32); - if(normOptions&UNORM_UNICODE_3_2) { - n2=&fn2; - } - - UnicodeString str1(length1<0, s1, length1); - UnicodeString str2(length2<0, s2, length2); - int32_t spanQCYes1=n2->spanQuickCheckYes(str1, *pErrorCode); - int32_t spanQCYes2=n2->spanQuickCheckYes(str2, *pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - /* - * ICU 2.4 had a further optimization: - * If both strings were not in FCD, then they were both NFD'ed, - * and the _COMPARE_EQUIV option was turned off. - * It is not entirely clear that this is valid with the current - * definition of the canonical caseless match. - * Therefore, ICU 2.6 removes that optimization. - */ - - if(spanQCYes1normalizeSecondAndAppend(fcd1, unnormalized, *pErrorCode); - s1=fcd1.getBuffer(); - length1=fcd1.length(); - } - if(spanQCYes2normalizeSecondAndAppend(fcd2, unnormalized, *pErrorCode); - s2=fcd2.getBuffer(); - length2=fcd2.length(); + if(_normalize(n2, s1, length1, fcd1, pErrorCode)) { + s1=fcd1.getBuffer(); + length1=fcd1.length(); + } + if(_normalize(n2, s2, length2, fcd2, pErrorCode)) { + s2=fcd2.getBuffer(); + length2=fcd2.length(); + } } } diff --git a/icu4c/source/common/uresbund.cpp b/icu4c/source/common/uresbund.cpp index 5a0a510daf8..450f43fdd2f 100644 --- a/icu4c/source/common/uresbund.cpp +++ b/icu4c/source/common/uresbund.cpp @@ -1410,7 +1410,7 @@ U_CAPI const UChar* U_EXPORT2 ures_getNextString(UResourceBundle *resB, int32_t* case URES_BINARY: case URES_INT_VECTOR: *status = U_RESOURCE_TYPE_MISMATCH; - default: + default: /*fall through*/ return NULL; } } diff --git a/icu4c/source/common/uresdata.c b/icu4c/source/common/uresdata.c index 6ea0b97fcde..f5218ea8f82 100644 --- a/icu4c/source/common/uresdata.c +++ b/icu4c/source/common/uresdata.c @@ -31,6 +31,7 @@ #include "uinvchar.h" #include "uresdata.h" #include "uresimp.h" +#include "uassert.h" #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) @@ -53,7 +54,12 @@ /* empty resources, returned when the resource offset is 0 */ static const uint16_t gEmpty16=0; -static const int32_t gEmpty32=0; + +static const struct { + int32_t length; + int32_t res; +} gEmpty32={ 0, 0 }; + static const struct { int32_t length; UChar nul; @@ -352,7 +358,7 @@ res_getBinary(const ResourceData *pResData, Resource res, int32_t *pLength) { uint32_t offset=RES_GET_OFFSET(res); int32_t length; if(RES_GET_TYPE(res)==URES_BINARY) { - const int32_t *p32= offset==0 ? &gEmpty32 : pResData->pRoot+offset; + const int32_t *p32= offset==0 ? (const int32_t*)&gEmpty32 : pResData->pRoot+offset; length=*p32++; p=(const uint8_t *)p32; } else { @@ -372,7 +378,7 @@ res_getIntVector(const ResourceData *pResData, Resource res, int32_t *pLength) { uint32_t offset=RES_GET_OFFSET(res); int32_t length; if(RES_GET_TYPE(res)==URES_INT_VECTOR) { - p= offset==0 ? &gEmpty32 : pResData->pRoot+offset; + p= offset==0 ? (const int32_t *)&gEmpty32 : pResData->pRoot+offset; length=*p++; } else { p=NULL; @@ -419,12 +425,14 @@ res_getTableItemByKey(const ResourceData *pResData, Resource table, } switch(RES_GET_TYPE(table)) { case URES_TABLE: { - const uint16_t *p= offset==0 ? &gEmpty16 : (const uint16_t *)(pResData->pRoot+offset); - length=*p++; - *indexR=idx=_res_findTableItem(pResData, p, length, *key, key); - if(idx>=0) { - const Resource *p32=(const Resource *)(p+length+(~length&1)); - return p32[idx]; + if (offset!=0) { /* empty if offset==0 */ + const uint16_t *p= (const uint16_t *)(pResData->pRoot+offset); + length=*p++; + *indexR=idx=_res_findTableItem(pResData, p, length, *key, key); + if(idx>=0) { + const Resource *p32=(const Resource *)(p+length+(~length&1)); + return p32[idx]; + } } break; } @@ -438,11 +446,13 @@ res_getTableItemByKey(const ResourceData *pResData, Resource table, break; } case URES_TABLE32: { - const int32_t *p= offset==0 ? &gEmpty32 : pResData->pRoot+offset; - length=*p++; - *indexR=idx=_res_findTable32Item(pResData, p, length, *key, key); - if(idx>=0) { - return (Resource)p[length+idx]; + if (offset!=0) { /* empty if offset==0 */ + const int32_t *p= pResData->pRoot+offset; + length=*p++; + *indexR=idx=_res_findTable32Item(pResData, p, length, *key, key); + if(idx>=0) { + return (Resource)p[length+idx]; + } } break; } @@ -457,16 +467,19 @@ res_getTableItemByIndex(const ResourceData *pResData, Resource table, int32_t indexR, const char **key) { uint32_t offset=RES_GET_OFFSET(table); int32_t length; + U_ASSERT(indexR>=0); /* to ensure the index is not negative */ switch(RES_GET_TYPE(table)) { case URES_TABLE: { - const uint16_t *p= offset==0 ? &gEmpty16 : (const uint16_t *)(pResData->pRoot+offset); - length=*p++; - if(indexRpRoot+offset); + length=*p++; + if(indexRpRoot+offset; - length=*p++; - if(indexRpRoot+offset; + length=*p++; + if(indexR=0); /* to ensure the index is not negative */ switch(RES_GET_TYPE(array)) { case URES_ARRAY: { - const int32_t *p= offset==0 ? &gEmpty32 : pResData->pRoot+offset; - if(indexR<*p) { - return (Resource)p[1+indexR]; + if (offset!=0) { /* empty if offset==0 */ + const int32_t *p= pResData->pRoot+offset; + if(indexR<*p) { + return (Resource)p[1+indexR]; + } } break; } diff --git a/icu4c/source/common/ushape.cpp b/icu4c/source/common/ushape.cpp index c90ee73a8a9..14aaffc6bbb 100644 --- a/icu4c/source/common/ushape.cpp +++ b/icu4c/source/common/ushape.cpp @@ -24,6 +24,7 @@ #include "putilimp.h" #include "ustr_imp.h" #include "ubidi_props.h" +#include "uassert.h" #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) @@ -909,6 +910,9 @@ expandCompositCharAtBegin(UChar *dest, int32_t sourceLength, int32_t destSize,UE while(i >= 0 && j >= 0) { if( countl>0 && isLamAlefChar(dest[i])) { tempbuffer[j] = LAM_CHAR; + /* to ensure the array index is within the range */ + U_ASSERT(dest[i]-0xFEF5 >= 0 + && dest[i]-0xFEF5 < sizeof(convertLamAlef)/sizeof(convertLamAlef[0])); tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ]; j--; countl--; @@ -1298,8 +1302,11 @@ shapeUnicode(UChar *dest, int32_t sourceLength, if (tashkeelFlag == 2){ dest[i] = TASHKEEL_SPACE_SUB; tashkeelFound = 1; - }else { - dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + Shape; + } else { + /* to ensure the array index is within the range */ + U_ASSERT(dest[i]-0x064B >= 0 + && dest[i]-0x064B < sizeof(IrrelevantPos)/sizeof(IrrelevantPos[0])); + dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + Shape; } }else if ((currLink & APRESENT) > 0) { dest[i] = (UChar)(0xFB50 + (currLink >> 8) + Shape); diff --git a/icu4c/source/common/ustrcase.cpp b/icu4c/source/common/ustrcase.cpp index b0b4ef7c04e..fce05c8e69d 100644 --- a/icu4c/source/common/ustrcase.cpp +++ b/icu4c/source/common/ustrcase.cpp @@ -544,10 +544,10 @@ u_strcmpFold(const UChar *s1, int32_t length1, /* reached end of level buffer, pop one level */ do { --level1; - start1=stack1[level1].start; + start1=stack1[level1].start; /*Not uninitialized*/ } while(start1==NULL); - s1=stack1[level1].s; - limit1=stack1[level1].limit; + s1=stack1[level1].s; /*Not uninitialized*/ + limit1=stack1[level1].limit; /*Not uninitialized*/ } } @@ -567,10 +567,10 @@ u_strcmpFold(const UChar *s1, int32_t length1, /* reached end of level buffer, pop one level */ do { --level2; - start2=stack2[level2].start; + start2=stack2[level2].start; /*Not uninitialized*/ } while(start2==NULL); - s2=stack2[level2].s; - limit2=stack2[level2].limit; + s2=stack2[level2].s; /*Not uninitialized*/ + limit2=stack2[level2].limit; /*Not uninitialized*/ } } diff --git a/icu4c/source/common/ustrtrns.cpp b/icu4c/source/common/ustrtrns.cpp index beecd91506c..d702aa8fd6b 100644 --- a/icu4c/source/common/ustrtrns.cpp +++ b/icu4c/source/common/ustrtrns.cpp @@ -32,6 +32,7 @@ #include "cstring.h" #include "cmemory.h" #include "ustr_imp.h" +#include "uassert.h" U_CAPI UChar* U_EXPORT2 u_strFromUTF32WithSub(UChar *dest, @@ -65,7 +66,7 @@ u_strFromUTF32WithSub(UChar *dest, } pDest = dest; - destLimit = dest + destCapacity; + destLimit = (dest!=NULL)?(dest + destCapacity):NULL; reqLength = 0; numSubstitutions = 0; @@ -86,7 +87,7 @@ u_strFromUTF32WithSub(UChar *dest, while(*++srcLimit != 0) {} } } else { - srcLimit = src + srcLength; + srcLimit = (src!=NULL)?(src + srcLength):NULL; } /* convert with length */ @@ -102,7 +103,7 @@ u_strFromUTF32WithSub(UChar *dest, } break; } else if(0x10000 <= ch && ch <= 0x10ffff) { - if((pDest + 2) <= destLimit) { + if(pDest!=NULL && ((pDest + 2) <= destLimit)) { *pDest++ = U16_LEAD(ch); *pDest++ = U16_TRAIL(ch); } else { @@ -180,7 +181,7 @@ u_strToUTF32WithSub(UChar32 *dest, } pDest = dest; - destLimit = dest + destCapacity; + destLimit = (dest!=NULL)?(dest + destCapacity):NULL; reqLength = 0; numSubstitutions = 0; @@ -200,7 +201,7 @@ u_strToUTF32WithSub(UChar32 *dest, while(*++srcLimit != 0) {} } } else { - srcLimit = src + srcLength; + srcLimit = (src!=NULL)?(src + srcLength):NULL; } /* convert with length */ @@ -272,6 +273,7 @@ utf8_nextCharSafeBodyTerminated(const uint8_t **ps, UChar32 c) { const uint8_t *s=*ps; uint8_t trail, illegal=0; uint8_t count=U8_COUNT_TRAIL_BYTES(c); + U_ASSERT(count<6); U8_MASK_LEAD_BYTE((c), count); /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */ switch(count) { @@ -289,7 +291,7 @@ utf8_nextCharSafeBodyTerminated(const uint8_t **ps, UChar32 c) { illegal=1; break; } - case 2: + case 2: /*fall through*/ trail=(uint8_t)(*s++ - 0x80); if(trail>0x3f) { /* not a trail byte */ @@ -297,7 +299,7 @@ utf8_nextCharSafeBodyTerminated(const uint8_t **ps, UChar32 c) { break; } c=(c<<6)|trail; - case 1: + case 1: /*fall through*/ trail=(uint8_t)(*s++ - 0x80); if(trail>0x3f) { /* not a trail byte */ @@ -360,11 +362,11 @@ utf8_nextCharSafeBodyPointer(const uint8_t **ps, const uint8_t *limit, UChar32 c illegal=1; break; } - case 2: + case 2: /*fall through*/ trail=*s++; c=(c<<6)|(trail&0x3f); illegal|=(trail&0xc0)^0x80; - case 1: + case 1: /*fall through*/ trail=*s++; c=(c<<6)|(trail&0x3f); illegal|=(trail&0xc0)^0x80; @@ -379,6 +381,7 @@ utf8_nextCharSafeBodyPointer(const uint8_t **ps, const uint8_t *limit, UChar32 c /* correct sequence - all trail bytes have (b7..b6)==(10)? */ /* illegal is also set if count>=4 */ + U_ASSERT(count= 0 */ { - const uint8_t *pSrcLimit = pSrc + srcLength; + const uint8_t *pSrcLimit = (pSrc!=NULL)?(pSrc + srcLength):NULL; /* * This function requires that if srcLength is given, then it must be @@ -981,7 +984,7 @@ u_strToUTF8WithSub(char *dest, int32_t reqLength=0; uint32_t ch=0,ch2=0; uint8_t *pDest = (uint8_t *)dest; - uint8_t *pDestLimit = pDest + destCapacity; + uint8_t *pDestLimit = (pDest!=NULL)?(pDest + destCapacity):NULL; int32_t numSubstitutions; /* args check */ @@ -1075,7 +1078,7 @@ u_strToUTF8WithSub(char *dest, } } } else { - const UChar *pSrcLimit = pSrc+srcLength; + const UChar *pSrcLimit = (pSrc!=NULL)?(pSrc+srcLength):NULL; int32_t count; /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ @@ -1547,7 +1550,7 @@ u_strToJavaModifiedUTF8( } /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ - pSrcLimit = src+srcLength; + pSrcLimit = (src!=NULL)?(src+srcLength):NULL; for(;;) { count = (int32_t)(pDestLimit - pDest); srcLength = (int32_t)(pSrcLimit - src); diff --git a/icu4c/source/common/utext.cpp b/icu4c/source/common/utext.cpp index 30bfdeaa2ca..6b98b825be9 100644 --- a/icu4c/source/common/utext.cpp +++ b/icu4c/source/common/utext.cpp @@ -1752,7 +1752,7 @@ utext_strFromUTF8(UChar *dest, { UChar *pDest = dest; - UChar *pDestLimit = dest+destCapacity; + UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL; UChar32 ch=0; int32_t index = 0; int32_t reqLength = 0; @@ -2864,7 +2864,6 @@ ucstrTextExtract(UText *ut, } else { limit32 = pinIndex(limit, INT32_MAX); } - di = 0; for (si=start32; si=0); /* to ensure di never exceeds INT32_MAX, which must not happen logically */ if (dinext32PostInc(); int32_t len = U16_LENGTH(c); + U_ASSERT(desti+len>0); /* to ensure desti+len never exceeds MAX_INT32, which must not happen logically */ if (desti+len <= destCapacity) { U16_APPEND_UNSAFE(dest, desti, c); copyLimit = srci+len; diff --git a/icu4c/source/common/utf_impl.c b/icu4c/source/common/utf_impl.c index 982ce065593..7ff990b18a5 100644 --- a/icu4c/source/common/utf_impl.c +++ b/icu4c/source/common/utf_impl.c @@ -26,6 +26,7 @@ #include "unicode/utf.h" #include "unicode/utf8.h" #include "unicode/utf_old.h" +#include "uassert.h" /* * This table could be replaced on many machines by @@ -111,6 +112,7 @@ U_CAPI UChar32 U_EXPORT2 utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) { int32_t i=*pi; uint8_t count=U8_COUNT_TRAIL_BYTES(c); + U_ASSERT(count >= 0 && count <= 5); /* U8_COUNT_TRAIL_BYTES returns value 0...5 */ if((i)+count<=(length)) { uint8_t trail, illegal=0; diff --git a/icu4c/source/common/utrie2.cpp b/icu4c/source/common/utrie2.cpp index 40650fe6b4d..ac6a9256a96 100644 --- a/icu4c/source/common/utrie2.cpp +++ b/icu4c/source/common/utrie2.cpp @@ -33,6 +33,7 @@ #include "cmemory.h" #include "utrie2.h" #include "utrie2_impl.h" +#include "uassert.h" /* Public UTrie2 API implementation ----------------------------------------- */ @@ -532,6 +533,7 @@ enumEitherTrie(const UTrie2 *trie, if(trie->newTrie==NULL) { /* frozen trie */ idx=trie->index; + U_ASSERT(idx!=NULL); /* the following code assumes trie->newTrie is not NULL when idx is NULL */ data32=trie->data32; index2NullOffset=trie->index2NullOffset; @@ -540,6 +542,7 @@ enumEitherTrie(const UTrie2 *trie, /* unfrozen, mutable trie */ idx=NULL; data32=trie->newTrie->data; + U_ASSERT(data32!=NULL); /* the following code assumes idx is not NULL when data32 is NULL */ index2NullOffset=trie->newTrie->index2NullOffset; nullBlock=trie->newTrie->dataNullOffset; diff --git a/icu4c/source/i18n/alphaindex.cpp b/icu4c/source/i18n/alphaindex.cpp index 47157f0bcb8..2a9a16993ec 100644 --- a/icu4c/source/i18n/alphaindex.cpp +++ b/icu4c/source/i18n/alphaindex.cpp @@ -869,8 +869,10 @@ UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) { return NULL; } UVector *dest = new UVector(status); - if (dest == NULL && U_SUCCESS(status)) { - status = U_MEMORY_ALLOCATION_ERROR; + if (dest == NULL) { + if (U_SUCCESS(status)) { + status = U_MEMORY_ALLOCATION_ERROR; + } return NULL; } dest->setDeleter(uprv_deleteUObject); @@ -883,9 +885,10 @@ UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) { UnicodeString *str = new UnicodeString(src, -1); if (str == NULL) { status = U_MEMORY_ALLOCATION_ERROR; + } else { + dest->addElement(str, status); + src += str->length() + 1; } - dest->addElement(str, status); - src += str->length() + 1; } while (src < limit); dest->sortWithUComparator(sortCollateComparator, collator_, status); return dest; diff --git a/icu4c/source/i18n/calendar.cpp b/icu4c/source/i18n/calendar.cpp index c66f0501164..df2ee1d9c1b 100644 --- a/icu4c/source/i18n/calendar.cpp +++ b/icu4c/source/i18n/calendar.cpp @@ -50,6 +50,7 @@ #include "locbased.h" #include "uresimp.h" #include "ustrenum.h" +#include "uassert.h" #if !UCONFIG_NO_SERVICE static icu::ICULocaleService* gService = NULL; @@ -2485,6 +2486,7 @@ UCalendarDateFields Calendar::resolveFields(const UFieldResolutionTable* precede int32_t lineStamp = kUnset; // Skip over first entry if it is negative for (int32_t i=((precedenceTable[g][l][0]>=kResolveRemap)?1:0); precedenceTable[g][l][i]!=-1; ++i) { + U_ASSERT(precedenceTable[g][l][i] < UCAL_FIELD_COUNT); int32_t s = fStamp[precedenceTable[g][l][i]]; // If any field is unset then don't use this line if (s == kUnset) { diff --git a/icu4c/source/i18n/coleitr.cpp b/icu4c/source/i18n/coleitr.cpp index 173166af622..7db3e5f7527 100644 --- a/icu4c/source/i18n/coleitr.cpp +++ b/icu4c/source/i18n/coleitr.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 1996-2010, International Business Machines Corporation and * +* Copyright (C) 1996-2011, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ @@ -31,6 +31,7 @@ #include "unicode/coleitr.h" #include "unicode/ustring.h" #include "ucol_imp.h" +#include "uassert.h" #include "cmemory.h" @@ -418,7 +419,7 @@ const CollationElementIterator& CollationElementIterator::operator=( } /* start and end of string */ - coliter->endp = coliter->string + length; + coliter->endp = coliter->string == NULL ? NULL : coliter->string + length; /* handle writable buffer here */ @@ -431,6 +432,7 @@ const CollationElementIterator& CollationElementIterator::operator=( if (othercoliter->pos >= othercoliter->string && othercoliter->pos <= othercoliter->endp) { + U_ASSERT(coliter->string != NULL); coliter->pos = coliter->string + (othercoliter->pos - othercoliter->string); } @@ -462,6 +464,7 @@ const CollationElementIterator& CollationElementIterator::operator=( } if (othercoliter->fcdPosition != NULL) { + U_ASSERT(coliter->string != NULL); coliter->fcdPosition = coliter->string + (othercoliter->fcdPosition - othercoliter->string); diff --git a/icu4c/source/i18n/colldata.cpp b/icu4c/source/i18n/colldata.cpp index 6d73aeacc40..358b3ac4227 100644 --- a/icu4c/source/i18n/colldata.cpp +++ b/icu4c/source/i18n/colldata.cpp @@ -29,6 +29,7 @@ #include "ucln_in.h" #include "ucol_imp.h" #include "umutex.h" +#include "uassert.h" #include "unicode/colldata.h" @@ -893,6 +894,7 @@ int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t #endif if (ceList->matchesAt(offset, ceList2)) { + U_ASSERT(ceList2 != NULL); int32_t clength = ceList2->size(); int32_t slength = string->length(); int32_t roffset = offset + clength; diff --git a/icu4c/source/i18n/decNumber.c b/icu4c/source/i18n/decNumber.c index ab690782f32..b4c147bfaa1 100644 --- a/icu4c/source/i18n/decNumber.c +++ b/icu4c/source/i18n/decNumber.c @@ -1,7 +1,7 @@ /* ------------------------------------------------------------------ */ /* Decimal Number arithmetic module */ /* ------------------------------------------------------------------ */ -/* Copyright (c) IBM Corporation, 2000-2010. All rights reserved. */ +/* Copyright (c) IBM Corporation, 2000-2011. All rights reserved. */ /* */ /* This software is made available under the terms of the */ /* ICU License -- ICU 1.8.1 and later. */ @@ -181,6 +181,7 @@ #include "cmemory.h" /* for uprv_malloc, etc., in ICU */ #include "decNumber.h" /* base number library */ #include "decNumberLocal.h" /* decNumber local types, etc. */ +#include "uassert.h" /* Constants */ /* Public lookup table used by the D2U macro */ @@ -4059,6 +4060,8 @@ static decNumber * decAddOp(decNumber *res, const decNumber *lhs, #endif /* add [A+B*m] or subtract [A+B*(-m)] */ + U_ASSERT(rhs->digits > 0); + U_ASSERT(lhs->digits > 0); res->digits=decUnitAddSub(lhs->lsu, D2U(lhs->digits), rhs->lsu, D2U(rhs->digits), rhsshift, acc, mult) @@ -4971,6 +4974,10 @@ static decNumber * decMultiplyOp(decNumber *res, const decNumber *lhs, /* (rounded up to a multiple of 8 bytes), and the uLong */ /* accumulator starts offset the appropriate number of units */ /* to the right to avoid overwrite during the unchunking. */ + + /* Make sure no signed int overflow below. This is always true */ + /* if the given numbers have less digits than DEC_MAX_DIGITS. */ + U_ASSERT(iacc <= INT32_MAX/sizeof(uLong)); needbytes=iacc*sizeof(uLong); #if DECDPUN==1 zoff=(iacc+7)/8; /* items to offset by */ @@ -6978,6 +6985,7 @@ static void decSetCoeff(decNumber *dn, decContext *set, const Unit *lsu, if (cut==0) quot=*up; /* is at bottom of unit */ else /* cut>0 */ { /* it's not at bottom of unit */ #if DECDPUN<=4 + U_ASSERT(cut >= 0 && cut <= 4); quot=QUOT10(*up, cut); rem=*up-quot*powers[cut]; #else diff --git a/icu4c/source/i18n/plurrule.cpp b/icu4c/source/i18n/plurrule.cpp index df3e7c6fb7b..ba9b9bd4488 100644 --- a/icu4c/source/i18n/plurrule.cpp +++ b/icu4c/source/i18n/plurrule.cpp @@ -27,6 +27,7 @@ #include "ucln_in.h" #include "ustrfmt.h" #include "locutil.h" +#include "uassert.h" #if !UCONFIG_NO_FORMATTING @@ -363,6 +364,7 @@ PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode } switch (type) { case tAnd: + U_ASSERT(curAndConstraint != NULL); curAndConstraint = curAndConstraint->add(); break; case tOr: @@ -380,19 +382,24 @@ PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode curAndConstraint = orNode->add(); break; case tIs: + U_ASSERT(curAndConstraint != NULL); curAndConstraint->rangeHigh=-1; break; case tNot: + U_ASSERT(curAndConstraint != NULL); curAndConstraint->notIn=TRUE; break; case tIn: + U_ASSERT(curAndConstraint != NULL); curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH; curAndConstraint->integerOnly = TRUE; break; case tWithin: + U_ASSERT(curAndConstraint != NULL); curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH; break; case tNumber: + U_ASSERT(curAndConstraint != NULL); if ( (curAndConstraint->op==AndConstraint::MOD)&& (curAndConstraint->opNum == -1 ) ) { curAndConstraint->opNum=getNumberValue(token); @@ -407,6 +414,7 @@ PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode } break; case tMod: + U_ASSERT(curAndConstraint != NULL); curAndConstraint->op=AndConstraint::MOD; break; case tKeyword: diff --git a/icu4c/source/i18n/regexcmp.cpp b/icu4c/source/i18n/regexcmp.cpp index 604e926312c..c2af07be2c7 100644 --- a/icu4c/source/i18n/regexcmp.cpp +++ b/icu4c/source/i18n/regexcmp.cpp @@ -204,6 +204,7 @@ void RegexCompile::compile( if (tableEl->fCharClass >= 128 && tableEl->fCharClass < 240 && // Table specs a char class && fC.fQuoted == FALSE && // char is not escaped && fC.fChar != (UChar32)-1) { // char is not EOF + U_ASSERT(tableEl->fCharClass <= 137); if (RegexStaticSets::gStaticSets->fRuleSets[tableEl->fCharClass-128].contains(fC.fChar)) { // Table row specified a character class, or set of characters, // and the current char matches it. diff --git a/icu4c/source/i18n/tmutfmt.cpp b/icu4c/source/i18n/tmutfmt.cpp index 9a40fc427d5..dc355a0365e 100644 --- a/icu4c/source/i18n/tmutfmt.cpp +++ b/icu4c/source/i18n/tmutfmt.cpp @@ -17,6 +17,7 @@ #include "hash.h" #include "uresimp.h" #include "unicode/msgfmt.h" +#include "uassert.h" #define LEFT_CURLY_BRACKET ((UChar)0x007B) #define RIGHT_CURLY_BRACKET ((UChar)0x007D) @@ -614,6 +615,7 @@ TimeUnitFormat::searchInLocaleChain(UTimeUnitFormatStyle style, const char* key, char parentLocale[ULOC_FULLNAME_CAPACITY]; uprv_strcpy(parentLocale, localeName); int32_t locNameLen; + U_ASSERT(countToPatterns != NULL); while ((locNameLen = uloc_getParent(parentLocale, parentLocale, ULOC_FULLNAME_CAPACITY, &status)) >= 0){ // look for pattern for srcPluralCount in locale tree @@ -677,9 +679,9 @@ TimeUnitFormat::searchInLocaleChain(UTimeUnitFormatStyle style, const char* key, uprv_strcat(pLocale, "_"); searchInLocaleChain(style, gUnitsTag, pLocale, srcTimeUnitField, srcPluralCount, searchPluralCount, countToPatterns, err); - if (countToPatterns != NULL) { - MessageFormat** formatters = (MessageFormat**)countToPatterns->get(srcPluralCount); - if (formatters != NULL && formatters[style] != NULL) return; + MessageFormat** formatters = (MessageFormat**)countToPatterns->get(srcPluralCount); + if (formatters != NULL && formatters[style] != NULL) { + return; } } diff --git a/icu4c/source/i18n/tzgnames.cpp b/icu4c/source/i18n/tzgnames.cpp index 3f178fbbf6f..293eae1b2cc 100644 --- a/icu4c/source/i18n/tzgnames.cpp +++ b/icu4c/source/i18n/tzgnames.cpp @@ -215,6 +215,7 @@ GNameSearchHandler::handleMatch(int32_t matchLength, const CharacterNode *node, } } if (U_SUCCESS(status)) { + U_ASSERT(fResults != NULL); GMatchInfo *gmatch = (GMatchInfo *)uprv_malloc(sizeof(GMatchInfo)); if (gmatch == NULL) { status = U_MEMORY_ALLOCATION_ERROR; diff --git a/icu4c/source/i18n/tznames_impl.cpp b/icu4c/source/i18n/tznames_impl.cpp index c23a3587071..ba2d0a10a21 100644 --- a/icu4c/source/i18n/tznames_impl.cpp +++ b/icu4c/source/i18n/tznames_impl.cpp @@ -164,6 +164,7 @@ TextTrieMap::put(const UChar *key, void *value, UErrorCode &status) { if (U_FAILURE(status)) { return; } + U_ASSERT(fLazyContents != NULL); UChar *s = const_cast(key); fLazyContents->addElement(s, status); fLazyContents->addElement(value, status); @@ -845,6 +846,7 @@ ZNameSearchHandler::handleMatch(int32_t matchLength, const CharacterNode *node, } } if (U_SUCCESS(status)) { + U_ASSERT(fResults != NULL); ZMatchInfo *zmatch = (ZMatchInfo *)uprv_malloc(sizeof(ZMatchInfo)); if (zmatch == NULL) { status = U_MEMORY_ALLOCATION_ERROR; @@ -1038,6 +1040,7 @@ TimeZoneNamesImpl::getAvailableMetaZoneIDs(const UnicodeString& tzID, UErrorCode status = U_MEMORY_ALLOCATION_ERROR; } if (U_SUCCESS(status)) { + U_ASSERT(mzIDs != NULL); for (int32_t i = 0; U_SUCCESS(status) && i < mappings->size(); i++) { OlsonToMetaMappingEntry *map = (OlsonToMetaMappingEntry *)mappings->elementAt(i); diff --git a/icu4c/source/i18n/ucol.cpp b/icu4c/source/i18n/ucol.cpp index 3bb10699389..a4a6cb55919 100644 --- a/icu4c/source/i18n/ucol.cpp +++ b/icu4c/source/i18n/ucol.cpp @@ -301,6 +301,7 @@ void collIterate::appendOffset(int32_t offset, UErrorCode &errorCode) { return; } int32_t length = offsetStore == NULL ? 0 : (int32_t)(offsetStore - offsetBuffer); + U_ASSERT(length >= offsetBufferSize || offsetStore != NULL); if(length >= offsetBufferSize) { int32_t newCapacity = 2 * offsetBufferSize + UCOL_EXPAND_CE_BUFFER_SIZE; int32_t *newBuffer = reinterpret_cast(uprv_malloc(newCapacity * 4)); @@ -1946,7 +1947,7 @@ inline UBool isAtStartPrevIterate(collIterate *data) { } //return (collIter_bos(data)) || return (data->pos == data->string) || - ((data->flags & UCOL_ITER_INNORMBUF) && + ((data->flags & UCOL_ITER_INNORMBUF) && (data->pos != NULL) && *(data->pos - 1) == 0 && data->fcdPosition == NULL); } diff --git a/icu4c/source/i18n/ucol_elm.cpp b/icu4c/source/i18n/ucol_elm.cpp index ae9269fbde2..e33a59e47c3 100644 --- a/icu4c/source/i18n/ucol_elm.cpp +++ b/icu4c/source/i18n/ucol_elm.cpp @@ -38,6 +38,7 @@ #include "ucol_cnt.h" #include "unicode/caniter.h" #include "cmemory.h" +#include "uassert.h" U_NAMESPACE_USE @@ -1772,6 +1773,7 @@ uprv_uca_addFCD4AccentedContractions(tempUCATable *t, uprv_uca_setMapCE(t, el, status); uprv_uca_addAnElement(t, el, status); } + el->cPoints=NULL; /* don't leak reference to stack */ } static void diff --git a/icu4c/source/i18n/umsg.cpp b/icu4c/source/i18n/umsg.cpp index 8b2bb66836e..33015dd467a 100644 --- a/icu4c/source/i18n/umsg.cpp +++ b/icu4c/source/i18n/umsg.cpp @@ -634,6 +634,7 @@ int32_t umsg_autoQuoteApostrophe(const UChar* pattern, *ec = U_ILLEGAL_ARGUMENT_ERROR; return -1; } + U_ASSERT(destCapacity >= 0); if (patternLength == -1) { patternLength = u_strlen(pattern); @@ -695,6 +696,7 @@ int32_t umsg_autoQuoteApostrophe(const UChar* pattern, break; } + U_ASSERT(len >= 0); MAppend(c); } diff --git a/icu4c/source/i18n/unum.cpp b/icu4c/source/i18n/unum.cpp index fde9621a3a0..7d7fe9cd171 100644 --- a/icu4c/source/i18n/unum.cpp +++ b/icu4c/source/i18n/unum.cpp @@ -410,6 +410,7 @@ unum_parseDecimal(const UNumberFormat* fmt, uprv_strncpy(outBuf, sp.data(), sp.size()); *status = U_STRING_NOT_TERMINATED_WARNING; } else { + U_ASSERT(outBufLength > 0); uprv_strcpy(outBuf, sp.data()); } return sp.size(); diff --git a/icu4c/source/i18n/uregex.cpp b/icu4c/source/i18n/uregex.cpp index f096ac126a7..8618baa936c 100644 --- a/icu4c/source/i18n/uregex.cpp +++ b/icu4c/source/i18n/uregex.cpp @@ -1406,9 +1406,10 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp, } else { UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, - &dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), &possibleOverflowError); + dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), + &possibleOverflowError); } - + U_ASSERT(destIdx >= 0); // scan the replacement text, looking for substitutions ($n) and \escapes. int32_t replIdx = 0; @@ -1496,7 +1497,8 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp, } // Finally, append the capture group data to the destination. - destIdx += uregex_group((URegularExpression*)regexp, groupNum, &dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status); + destIdx += uregex_group((URegularExpression*)regexp, groupNum, + dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status); if (*status == U_BUFFER_OVERFLOW_ERROR) { // Ignore buffer overflow when extracting the group. We need to // continue on to get full size of the untruncated result. We will @@ -1626,6 +1628,8 @@ int32_t RegexCImpl::appendTail(RegularExpression *regexp, } for (;;) { + U_ASSERT(destIdx >= 0); + if (srcIdx == regexp->fTextLength) { break; } @@ -1634,6 +1638,7 @@ int32_t RegexCImpl::appendTail(RegularExpression *regexp, regexp->fTextLength = srcIdx; break; } + if (destIdx < destCap) { dest[destIdx] = c; } else { @@ -1686,7 +1691,7 @@ int32_t RegexCImpl::appendTail(RegularExpression *regexp, if (destIdx < destCap) { *destBuf += destIdx; *destCapacity -= destIdx; - } else { + } else if (*destBuf != NULL) { *destBuf += destCap; *destCapacity = 0; } diff --git a/icu4c/source/i18n/uspoof_wsconf.cpp b/icu4c/source/i18n/uspoof_wsconf.cpp index 2417512de2b..6c64d77eb3b 100644 --- a/icu4c/source/i18n/uspoof_wsconf.cpp +++ b/icu4c/source/i18n/uspoof_wsconf.cpp @@ -397,11 +397,13 @@ cleanup: uprv_free(input); int32_t i; - for (i=0; isize(); i++) { - BuilderScriptSet *bsset = static_cast(scriptSets->elementAt(i)); - delete bsset; + if (scriptSets != NULL) { + for (i=0; isize(); i++) { + BuilderScriptSet *bsset = static_cast(scriptSets->elementAt(i)); + delete bsset; + } + delete scriptSets; } - delete scriptSets; utrie2_close(anyCaseTrie); utrie2_close(lowerCaseTrie); return; diff --git a/icu4c/source/i18n/zonemeta.cpp b/icu4c/source/i18n/zonemeta.cpp index 9c6b26e681b..0d90498cf2e 100644 --- a/icu4c/source/i18n/zonemeta.cpp +++ b/icu4c/source/i18n/zonemeta.cpp @@ -446,6 +446,8 @@ ZoneMeta::getSingleCountry(const UnicodeString &tzid, UnicodeString &country) { country.setToBogus(); return country; } + U_ASSERT(gSingleZoneCountries != NULL); + U_ASSERT(gMultiZonesCountries != NULL); } // Check if it was already cached @@ -775,6 +777,7 @@ ZoneMeta::initAvailableMetaZoneIDs () { uhash_close(metaZoneIDTable); } if (U_SUCCESS(status)) { + U_ASSERT(metaZoneIDs != NULL); metaZoneIDs->setDeleter(uprv_free); UResourceBundle *rb = ures_openDirect(NULL, gMetaZones, &status); diff --git a/icu4c/source/io/ufile.c b/icu4c/source/io/ufile.c index 62d9551234c..e49d5b95dc5 100644 --- a/icu4c/source/io/ufile.c +++ b/icu4c/source/io/ufile.c @@ -143,7 +143,7 @@ u_fopen(const char *filename, fclose(systemFile); } - return result; + return result; /* not a file leak */ } U_CAPI UFILE* U_EXPORT2 diff --git a/icu4c/source/io/ustdio.c b/icu4c/source/io/ustdio.c index 14bdb9867c4..df4c4f1c37c 100644 --- a/icu4c/source/io/ustdio.c +++ b/icu4c/source/io/ustdio.c @@ -425,7 +425,7 @@ ufile_fill_uchar_buffer(UFILE *f) /* shift the buffer if it isn't empty */ if(dataSize != 0) { - uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar)); + uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar)); /* not accessing beyond memory */ } -- 2.40.0