From 0f4572a7e68a2c699451d94db0cf97f5e8cf1e7f Mon Sep 17 00:00:00 2001 From: Peter Edberg Date: Tue, 21 Mar 2017 23:08:37 +0000 Subject: [PATCH] ICU-13018 uloc_get/setKeywordValue have stricter input conditions; setKeywordValue uses CharString to assemble updated output X-SVN-Rev: 39891 --- icu4c/source/common/uloc.cpp | 420 +++++++++++++++------------ icu4c/source/common/unicode/uloc.h | 32 +- icu4c/source/test/cintltst/cloctst.c | 106 +++++-- 3 files changed, 342 insertions(+), 216 deletions(-) diff --git a/icu4c/source/common/uloc.cpp b/icu4c/source/common/uloc.cpp index 4d98def1977..4d854bbcca3 100644 --- a/icu4c/source/common/uloc.cpp +++ b/icu4c/source/common/uloc.cpp @@ -44,10 +44,11 @@ #include "uarrsort.h" #include "uenumimp.h" #include "uassert.h" +#include "charstr.h" #include /* for sprintf */ -using namespace icu; +U_NAMESPACE_USE /* ### Declarations **************************************************/ @@ -560,6 +561,10 @@ static int32_t getShortestSubtagLength(const char *localeID) { } /* ### Keywords **************************************************/ +#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9')) +#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) ) +/* Punctuation/symbols allowed in legacy key values */ +#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/') #define ULOC_KEYWORD_BUFFER_LEN 25 #define ULOC_MAX_NO_KEYWORDS 25 @@ -596,20 +601,26 @@ locale_getKeywordsStart(const char *localeID) { */ static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status) { - int32_t i; - int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName); + int32_t keywordNameLen = 0; - if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) { - /* keyword name too long for internal buffer */ - *status = U_INTERNAL_PROGRAM_ERROR; - return 0; + for (; *keywordName != 0; keywordName++) { + if (!UPRV_ISALPHANUM(*keywordName)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ + return 0; + } + if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) { + buf[keywordNameLen++] = uprv_tolower(*keywordName); + } else { + /* keyword name too long for internal buffer */ + *status = U_INTERNAL_PROGRAM_ERROR; + return 0; + } } - - /* normalize the keyword name */ - for(i = 0; i < keywordNameLen; i++) { - buf[i] = uprv_tolower(keywordName[i]); + if (keywordNameLen == 0) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */ + return 0; } - buf[i] = 0; + buf[keywordNameLen] = 0; /* terminate */ return keywordNameLen; } @@ -837,87 +848,108 @@ uloc_getKeywordValue(const char* localeID, const char* nextSeparator = NULL; char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; - int32_t i = 0; int32_t result = 0; if(status && U_SUCCESS(*status) && localeID) { char tempBuffer[ULOC_FULLNAME_CAPACITY]; const char* tmpLocaleID; + if (keywordName == NULL || keywordName[0] == 0) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + locale_canonKeywordName(keywordNameBuffer, keywordName, status); + if(U_FAILURE(*status)) { + return 0; + } + if (_hasBCP47Extension(localeID)) { _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); } else { tmpLocaleID=localeID; } - startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */ + startSearchHere = locale_getKeywordsStart(tmpLocaleID); if(startSearchHere == NULL) { /* no keywords, return at once */ return 0; } - locale_canonKeywordName(keywordNameBuffer, keywordName, status); - if(U_FAILURE(*status)) { - return 0; - } - /* find the first keyword */ while(startSearchHere) { - startSearchHere++; - /* skip leading spaces (allowed?) */ + const char* keyValueTail; + int32_t keyValueLen; + + startSearchHere++; /* skip @ or ; */ + nextSeparator = uprv_strchr(startSearchHere, '='); + if(!nextSeparator) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */ + return 0; + } + /* strip leading & trailing spaces (TC decided to tolerate these) */ while(*startSearchHere == ' ') { startSearchHere++; } - nextSeparator = uprv_strchr(startSearchHere, '='); - /* need to normalize both keyword and keyword name */ - if(!nextSeparator) { - break; + keyValueTail = nextSeparator; + while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') { + keyValueTail--; } - if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) { + /* now keyValueTail points to first char after the keyName */ + /* copy & normalize keyName from locale */ + if (startSearchHere == keyValueTail) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */ + return 0; + } + keyValueLen = 0; + while (startSearchHere < keyValueTail) { + if (!UPRV_ISALPHANUM(*startSearchHere)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ + return 0; + } + if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) { + localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++); + } else { /* keyword name too long for internal buffer */ *status = U_INTERNAL_PROGRAM_ERROR; return 0; + } } - for(i = 0; i < nextSeparator - startSearchHere; i++) { - localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]); - } - /* trim trailing spaces */ - while(startSearchHere[i-1] == ' ') { - i--; - U_ASSERT(i>=0); - } - localeKeywordNameBuffer[i] = 0; + localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */ startSearchHere = uprv_strchr(nextSeparator, ';'); if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) { - nextSeparator++; + /* current entry matches the keyword. */ + nextSeparator++; /* skip '=' */ + /* First strip leading & trailing spaces (TC decided to tolerate these) */ while(*nextSeparator == ' ') { - nextSeparator++; + nextSeparator++; } - /* we actually found the keyword. Copy the value */ - if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) { - while(*(startSearchHere-1) == ' ') { - startSearchHere--; - } - uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator); - result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status); - } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */ - i = (int32_t)uprv_strlen(nextSeparator); - while(nextSeparator[i - 1] == ' ') { - i--; - } - uprv_strncpy(buffer, nextSeparator, i); - result = u_terminateChars(buffer, bufferCapacity, i, status); - } else { - /* give a bigger buffer, please */ - *status = U_BUFFER_OVERFLOW_ERROR; - if(startSearchHere) { - result = (int32_t)(startSearchHere - nextSeparator); - } else { - result = (int32_t)uprv_strlen(nextSeparator); - } + keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator); + while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') { + keyValueTail--; } + /* Now copy the value, but check well-formedness */ + if (nextSeparator == keyValueTail) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */ + return 0; + } + keyValueLen = 0; + while (nextSeparator < keyValueTail) { + if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */ + return 0; + } + if (keyValueLen < bufferCapacity) { + /* Should we lowercase value to return here? Tests expect as-is. */ + buffer[keyValueLen++] = *nextSeparator++; + } else { /* keep advancing so we return correct length in case of overflow */ + keyValueLen++; + nextSeparator++; + } + } + result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status); return result; } } @@ -936,46 +968,59 @@ uloc_setKeywordValue(const char* keywordName, int32_t keywordValueLen; int32_t bufLen; int32_t needLen = 0; - int32_t foundValueLen; - int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */ char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; + char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1]; char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; - int32_t i = 0; int32_t rc; char* nextSeparator = NULL; char* nextEqualsign = NULL; char* startSearchHere = NULL; char* keywordStart = NULL; - char *insertHere = NULL; + CharString updatedKeysAndValues; + int32_t updatedKeysAndValuesLen; + UBool handledInputKeyAndValue = FALSE; + char keyValuePrefix = '@'; + if(U_FAILURE(*status)) { return -1; } - if(bufferCapacity>1) { - bufLen = (int32_t)uprv_strlen(buffer); - } else { + if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } + bufLen = (int32_t)uprv_strlen(buffer); if(bufferCapacity= ULOC_KEYWORD_BUFFER_LEN) { - /* keyword name too long for internal buffer */ - *status = U_INTERNAL_PROGRAM_ERROR; + /* now keyValueTail points to first char after the keyName */ + /* copy & normalize keyName from locale */ + if (keywordStart == keyValueTail) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */ return 0; } - for(i = 0; i < nextEqualsign - keywordStart; i++) { - localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]); - } - /* trim trailing spaces */ - while(keywordStart[i-1] == ' ') { - i--; + keyValueLen = 0; + while (keywordStart < keyValueTail) { + if (!UPRV_ISALPHANUM(*keywordStart)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ + return 0; + } + if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) { + localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++); + } else { + /* keyword name too long for internal buffer */ + *status = U_INTERNAL_PROGRAM_ERROR; + return 0; + } } - U_ASSERT(i>=0 && i nextEqualsign && *(keyValueTail-1) == ' ') { + keyValueTail--; + } + if (nextEqualsign == keyValueTail) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */ + return 0; + } + rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer); if(rc == 0) { - nextEqualsign++; - while(*nextEqualsign == ' ') { - nextEqualsign++; - } - /* we actually found the keyword. Change the value */ - if (nextSeparator) { - keywordAtEnd = 0; - foundValueLen = (int32_t)(nextSeparator - nextEqualsign); - } else { - keywordAtEnd = 1; - foundValueLen = (int32_t)uprv_strlen(nextEqualsign); - } - if(keywordValue) { /* adding a value - not removing */ - if(foundValueLen == keywordValueLen) { - uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); - return bufLen; /* no change in size */ - } else if(foundValueLen > keywordValueLen) { - int32_t delta = foundValueLen - keywordValueLen; - if(nextSeparator) { /* RH side */ - uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer)); - } - uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); - bufLen -= delta; - buffer[bufLen]=0; - return bufLen; - } else { /* FVL < KVL */ - int32_t delta = keywordValueLen - foundValueLen; - if((bufLen+delta) >= bufferCapacity) { - *status = U_BUFFER_OVERFLOW_ERROR; - return bufLen+delta; - } - if(nextSeparator) { /* RH side */ - uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer)); - } - uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); - bufLen += delta; - buffer[bufLen]=0; - return bufLen; - } - } else { /* removing a keyword */ - if(keywordAtEnd) { - /* zero out the ';' or '@' just before startSearchhere */ - keywordStart[-1] = 0; - return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */ - } else { - uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer)); - keywordStart[bufLen-((nextSeparator+1)-buffer)]=0; - return (int32_t)(bufLen-((nextSeparator+1)-keywordStart)); - } + /* Current entry matches the input keyword. Update the entry */ + if(keywordValueLen > 0) { /* updating a value */ + updatedKeysAndValues.append(keyValuePrefix, *status); + keyValuePrefix = ';'; /* for any subsequent key-value pair */ + updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status); + updatedKeysAndValues.append('=', *status); + updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status); + } /* else removing this entry, don't emit anything */ + handledInputKeyAndValue = TRUE; + } else { + /* input keyword sorts earlier than current entry, add before current entry */ + if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) { + /* insert new entry at this location */ + updatedKeysAndValues.append(keyValuePrefix, *status); + keyValuePrefix = ';'; /* for any subsequent key-value pair */ + updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status); + updatedKeysAndValues.append('=', *status); + updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status); + handledInputKeyAndValue = TRUE; } - } else if(rc<0){ /* end match keyword */ - /* could insert at this location. */ - insertHere = keywordStart; + /* copy the current entry */ + updatedKeysAndValues.append(keyValuePrefix, *status); + keyValuePrefix = ';'; /* for any subsequent key-value pair */ + updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status); + updatedKeysAndValues.append('=', *status); + updatedKeysAndValues.append(nextEqualsign, keyValueTail-nextEqualsign, *status); + } + if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) { + /* append new entry at the end, it sorts later than existing entries */ + updatedKeysAndValues.append(keyValuePrefix, *status); + /* skip keyValuePrefix update, no subsequent key-value pair */ + updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status); + updatedKeysAndValues.append('=', *status); + updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status); + handledInputKeyAndValue = TRUE; } keywordStart = nextSeparator; } /* end loop searching */ - if(!keywordValue) { - return bufLen; /* removal of non-extant keyword - no change */ - } - - /* we know there is at least one keyword. */ - needLen = bufLen+1+keywordNameLen+1+keywordValueLen; + /* Any error from updatedKeysAndValues.append above would be internal and not due to + * problems with the passed-in locale. So if we did encounter problems with the + * passed-in locale above, those errors took precedence and overrode any error + * status from updatedKeysAndValues.append, and also caused a return of 0. If there + * are errors here they are from updatedKeysAndValues.append; they do cause an + * error return but the passed-in locale is unmodified and the original bufLen is + * returned. + */ + if (!handledInputKeyAndValue || U_FAILURE(*status)) { + /* if input key/value specified removal of a keyword not present in locale, or + * there was an error in CharString.append, leave original locale alone. */ + return bufLen; + } + + updatedKeysAndValuesLen = updatedKeysAndValues.length(); + /* needLen = length of the part before '@' + length of updated key-value part including '@' */ + needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen; if(needLen >= bufferCapacity) { *status = U_BUFFER_OVERFLOW_ERROR; return needLen; /* no change */ } - - if(insertHere) { - uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer)); - keywordStart = insertHere; - } else { - keywordStart = buffer+bufLen; - *keywordStart = ';'; - keywordStart++; - } - uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen); - keywordStart += keywordNameLen; - *keywordStart = '='; - keywordStart++; - uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */ - keywordStart+=keywordValueLen; - if(insertHere) { - *keywordStart = ';'; - keywordStart++; + if (updatedKeysAndValuesLen > 0) { + uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen); } buffer[needLen]=0; return needLen; @@ -2543,9 +2592,6 @@ uloc_toUnicodeLocaleType(const char* keyword, const char* value) return bcpType; } -#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9')) -#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) ) - static UBool isWellFormedLegacyKey(const char* legacyKey) { @@ -2588,11 +2634,10 @@ uloc_toLegacyKey(const char* keyword) // Checks if the specified locale key is well-formed with the legacy locale syntax. // // Note: - // Neither ICU nor LDML/CLDR provides the definition of keyword syntax. - // However, a key should not contain '=' obviously. For now, all existing - // keys are using ASCII alphabetic letters only. We won't add any new key - // that is not compatible with the BCP 47 syntax. Therefore, we assume - // a valid key consist from [0-9a-zA-Z], no symbols. + // LDML/CLDR provides some definition of keyword syntax in + // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and + // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax + // Keys can only consist of [0-9a-zA-Z]. if (isWellFormedLegacyKey(keyword)) { return keyword; } @@ -2608,12 +2653,11 @@ uloc_toLegacyType(const char* keyword, const char* value) // Checks if the specified locale type is well-formed with the legacy locale syntax. // // Note: - // Neither ICU nor LDML/CLDR provides the definition of keyword syntax. - // However, a type should not contain '=' obviously. For now, all existing - // types are using ASCII alphabetic letters with a few symbol letters. We won't - // add any new type that is not compatible with the BCP 47 syntax except timezone - // IDs. For now, we assume a valid type start with [0-9a-zA-Z], but may contain - // '-' '_' '/' in the middle. + // LDML/CLDR provides some definition of keyword syntax in + // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and + // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax + // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values + // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv") if (isWellFormedLegacyType(value)) { return value; } diff --git a/icu4c/source/common/unicode/uloc.h b/icu4c/source/common/unicode/uloc.h index f330aa4d56c..6f94920d373 100644 --- a/icu4c/source/common/unicode/uloc.h +++ b/icu4c/source/common/unicode/uloc.h @@ -855,10 +855,12 @@ uloc_openKeywords(const char* localeID, * Get the value for a keyword. Locale name does not need to be normalized. * * @param localeID locale name containing the keyword ("de_DE@currency=EURO;collation=PHONEBOOK") - * @param keywordName name of the keyword for which we want the value. Case insensitive. + * @param keywordName name of the keyword for which we want the value; must not be + * NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive. * @param buffer receiving buffer * @param bufferCapacity capacity of receiving buffer - * @param status containing error code - buffer not big enough. + * @param status containing error code: e.g. buffer not big enough or ill-formed localeID + * or keywordName parameters. * @return the length of keyword value * @stable ICU 2.8 */ @@ -875,18 +877,26 @@ uloc_getKeywordValue(const char* localeID, * For removing all keywords, use uloc_getBaseName(). * * NOTE: Unlike almost every other ICU function which takes a - * buffer, this function will NOT truncate the output text. If a - * BUFFER_OVERFLOW_ERROR is received, it means that the original - * buffer is untouched. This is done to prevent incorrect or possibly - * even malformed locales from being generated and used. + * buffer, this function will NOT truncate the output text, and will + * not update the buffer with unterminated text setting a status of + * U_STRING_NOT_TERMINATED_WARNING. If a BUFFER_OVERFLOW_ERROR is received, + * it means a terminated version of the updated locale ID would not fit + * in the buffer, and the original buffer is untouched. This is done to + * prevent incorrect or possibly even malformed locales from being generated + * and used. * - * @param keywordName name of the keyword to be set. Case insensitive. + * @param keywordName name of the keyword to be set; must not be + * NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive. * @param keywordValue value of the keyword to be set. If 0-length or - * NULL, will result in the keyword being removed. No error is given if - * that keyword does not exist. - * @param buffer input buffer containing locale to be modified. + * NULL, will result in the keyword being removed; no error is given if + * that keyword does not exist. Otherwise, must consist only of + * [A-Za-z0-9] and [/_+-]. + * @param buffer input buffer containing well-formed locale ID to be + * modified. * @param bufferCapacity capacity of receiving buffer - * @param status containing error code - buffer not big enough. + * @param status containing error code: e.g. buffer not big enough + * or ill-formed keywordName or keywordValue parameters, or ill-formed + * locale ID in buffer on input. * @return the length needed for the buffer * @see uloc_getKeywordValue * @stable ICU 3.2 diff --git a/icu4c/source/test/cintltst/cloctst.c b/icu4c/source/test/cintltst/cloctst.c index 9263dfc1025..2e7de580ad6 100644 --- a/icu4c/source/test/cintltst/cloctst.c +++ b/icu4c/source/test/cintltst/cloctst.c @@ -1828,27 +1828,38 @@ static void TestKeywordVariantParsing(void) static const struct { const char *localeID; const char *keyword; - const char *expectedValue; + const char *expectedValue; /* NULL if failure is expected */ } testCases[] = { - { "de_DE@ C o ll A t i o n = Phonebook ", "c o ll a t i o n", "Phonebook" }, + { "de_DE@ C o ll A t i o n = Phonebook ", "c o ll a t i o n", NULL }, /* malformed key name */ { "de_DE", "collation", ""}, { "de_DE@collation=PHONEBOOK", "collation", "PHONEBOOK" }, { "de_DE@currency = euro; CoLLaTion = PHONEBOOk", "collatiON", "PHONEBOOk" }, }; - UErrorCode status = U_ZERO_ERROR; - + UErrorCode status; int32_t i = 0; int32_t resultLen = 0; char buffer[256]; for(i = 0; i < UPRV_LENGTHOF(testCases); i++) { *buffer = 0; + status = U_ZERO_ERROR; resultLen = uloc_getKeywordValue(testCases[i].localeID, testCases[i].keyword, buffer, 256, &status); (void)resultLen; /* Suppress set but not used warning. */ - if(uprv_strcmp(testCases[i].expectedValue, buffer) != 0) { - log_err("Expected to extract \"%s\" from \"%s\" for keyword \"%s\". Got \"%s\" instead\n", - testCases[i].expectedValue, testCases[i].localeID, testCases[i].keyword, buffer); + if (testCases[i].expectedValue) { + /* expect success */ + if (U_FAILURE(status)) { + log_err("Expected to extract \"%s\" from \"%s\" for keyword \"%s\". Instead got status %s\n", + testCases[i].expectedValue, testCases[i].localeID, testCases[i].keyword, u_errorName(status)); + } else if (uprv_strcmp(testCases[i].expectedValue, buffer) != 0) { + log_err("Expected to extract \"%s\" from \"%s\" for keyword \"%s\". Instead got \"%s\"\n", + testCases[i].expectedValue, testCases[i].localeID, testCases[i].keyword, buffer); + } + } else if (U_SUCCESS(status)) { + /* expect failure */ + log_err("Expected failure but got success from \"%s\" for keyword \"%s\". Got \"%s\"\n", + testCases[i].localeID, testCases[i].keyword, buffer); + } } } @@ -1901,7 +1912,40 @@ static const struct { /* 4. removal of only item */ { "de@collation=phonebook", "collation", NULL, "de" }, #endif - { "de@collation=phonebook", "Currency", "CHF", "de@collation=phonebook;currency=CHF" } + { "de@collation=phonebook", "Currency", "CHF", "de@collation=phonebook;currency=CHF" }, + /* cases with legal extra spacing */ + /*31*/{ "en_US@ calendar = islamic", "calendar", "japanese", "en_US@calendar=japanese" }, + /*32*/{ "en_US@ calendar = gregorian ; collation = phonebook", "calendar", "japanese", "en_US@calendar=japanese;collation=phonebook" }, + /*33*/{ "en_US@ calendar = islamic", "currency", "CHF", "en_US@calendar=islamic;currency=CHF" }, + /*34*/{ "en_US@ currency = CHF", "calendar", "japanese", "en_US@calendar=japanese;currency=CHF" }, + /* cases in which setKeywordValue expected to fail (implied by NULL for expected); locale need not be canonical */ + /*35*/{ "en_US@calendar=gregorian;", "calendar", "japanese", NULL }, + /*36*/{ "en_US@calendar=gregorian;=", "calendar", "japanese", NULL }, + /*37*/{ "en_US@calendar=gregorian;currency=", "calendar", "japanese", NULL }, + /*38*/{ "en_US@=", "calendar", "japanese", NULL }, + /*39*/{ "en_US@=;", "calendar", "japanese", NULL }, + /*40*/{ "en_US@= ", "calendar", "japanese", NULL }, + /*41*/{ "en_US@ =", "calendar", "japanese", NULL }, + /*42*/{ "en_US@ = ", "calendar", "japanese", NULL }, + /*43*/{ "en_US@=;calendar=gregorian", "calendar", "japanese", NULL }, + /*44*/{ "en_US@= calen dar = gregorian", "calendar", "japanese", NULL }, + /*45*/{ "en_US@= calendar = greg orian", "calendar", "japanese", NULL }, + /*46*/{ "en_US@=;cal...endar=gregorian", "calendar", "japanese", NULL }, + /*47*/{ "en_US@=;calendar=greg...orian", "calendar", "japanese", NULL }, + /*48*/{ "en_US@calendar=gregorian", "cale ndar", "japanese", NULL }, + /*49*/{ "en_US@calendar=gregorian", "calendar", "japa..nese", NULL }, + /* cases in which getKeywordValue and setKeyword expected to fail (implied by NULL for value and expected) */ + /*50*/{ "en_US@=", "calendar", NULL, NULL }, + /*51*/{ "en_US@=;", "calendar", NULL, NULL }, + /*52*/{ "en_US@= ", "calendar", NULL, NULL }, + /*53*/{ "en_US@ =", "calendar", NULL, NULL }, + /*54*/{ "en_US@ = ", "calendar", NULL, NULL }, + /*55*/{ "en_US@=;calendar=gregorian", "calendar", NULL, NULL }, + /*56*/{ "en_US@= calen dar = gregorian", "calendar", NULL, NULL }, + /*57*/{ "en_US@= calendar = greg orian", "calendar", NULL, NULL }, + /*58*/{ "en_US@=;cal...endar=gregorian", "calendar", NULL, NULL }, + /*59*/{ "en_US@=;calendar=greg...orian", "calendar", NULL, NULL }, + /*60*/{ "en_US@calendar=gregorian", "cale ndar", NULL, NULL }, }; @@ -1914,31 +1958,59 @@ static void TestKeywordSet(void) char cbuffer[1024]; for(i = 0; i < UPRV_LENGTHOF(kwSetTestCases); i++) { - UErrorCode status = U_ZERO_ERROR; - memset(buffer,'%',1023); - strcpy(buffer, kwSetTestCases[i].l); + UErrorCode status = U_ZERO_ERROR; + memset(buffer,'%',1023); + strcpy(buffer, kwSetTestCases[i].l); + if (kwSetTestCases[i].x != NULL) { uloc_canonicalize(kwSetTestCases[i].l, cbuffer, 1023, &status); if(strcmp(buffer,cbuffer)) { log_verbose("note: [%d] wasn't canonical, should be: '%s' not '%s'. Won't check for canonicity in output.\n", i, cbuffer, buffer); } - /* sanity check test case results for canonicity */ + /* sanity check test case results for canonicity */ uloc_canonicalize(kwSetTestCases[i].x, cbuffer, 1023, &status); if(strcmp(kwSetTestCases[i].x,cbuffer)) { log_err("%s:%d: ERROR: kwSetTestCases[%d].x = '%s', should be %s (must be canonical)\n", __FILE__, __LINE__, i, kwSetTestCases[i].x, cbuffer); } + status = U_ZERO_ERROR; resultLen = uloc_setKeywordValue(kwSetTestCases[i].k, kwSetTestCases[i].v, buffer, 1023, &status); if(U_FAILURE(status)) { - log_err("Err on test case %d: got error %s\n", i, u_errorName(status)); - continue; - } - if(strcmp(buffer,kwSetTestCases[i].x) || ((int32_t)strlen(buffer)!=resultLen)) { - log_err("FAIL: #%d: %s + [%s=%s] -> %s (%d) expected %s (%d)\n", i, kwSetTestCases[i].l, kwSetTestCases[i].k, + log_err("Err on test case %d for setKeywordValue: got error %s\n", i, u_errorName(status)); + } else if(strcmp(buffer,kwSetTestCases[i].x) || ((int32_t)strlen(buffer)!=resultLen)) { + log_err("FAIL: #%d setKeywordValue: %s + [%s=%s] -> %s (%d) expected %s (%d)\n", i, kwSetTestCases[i].l, kwSetTestCases[i].k, kwSetTestCases[i].v, buffer, resultLen, kwSetTestCases[i].x, strlen(buffer)); } else { log_verbose("pass: #%d: %s + [%s=%s] -> %s\n", i, kwSetTestCases[i].l, kwSetTestCases[i].k, kwSetTestCases[i].v,buffer); } + + if (kwSetTestCases[i].v != NULL && kwSetTestCases[i].v[0] != 0) { + status = U_ZERO_ERROR; + resultLen = uloc_getKeywordValue(kwSetTestCases[i].x, kwSetTestCases[i].k, buffer, 1023, &status); + if(U_FAILURE(status)) { + log_err("Err on test case %d for getKeywordValue: got error %s\n", i, u_errorName(status)); + } else if (resultLen != uprv_strlen(kwSetTestCases[i].v) || uprv_strcmp(buffer, kwSetTestCases[i].v) != 0) { + log_err("FAIL: #%d getKeywordValue: got %s (%d) expected %s (%d)\n", i, buffer, resultLen, + kwSetTestCases[i].v, uprv_strlen(kwSetTestCases[i].v)); + } + } + } else { + /* test cases expected to result in error */ + status = U_ZERO_ERROR; + resultLen = uloc_setKeywordValue(kwSetTestCases[i].k, kwSetTestCases[i].v, buffer, 1023, &status); + if(U_SUCCESS(status)) { + log_err("Err on test case %d for setKeywordValue: expected to fail but succeeded, got %s (%d)\n", i, buffer, resultLen); + } + + if (kwSetTestCases[i].v == NULL) { + status = U_ZERO_ERROR; + strcpy(cbuffer, kwSetTestCases[i].l); + resultLen = uloc_getKeywordValue(cbuffer, kwSetTestCases[i].k, buffer, 1023, &status); + if(U_SUCCESS(status)) { + log_err("Err on test case %d for getKeywordValue: expected to fail but succeeded\n", i); + } + } + } } } -- 2.40.0