From 321f4671909a6174b8a9bf894fcaf30afe666326 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 13 Sep 2017 16:15:28 +0000 Subject: [PATCH] ICU-13337 public string option for not resetting an Edits object: U_EDITS_NO_RESET X-SVN-Rev: 40394 --- icu4c/source/common/filterednormalizer2.cpp | 2 +- icu4c/source/common/norm2allmodes.h | 2 +- icu4c/source/common/normalizer2.cpp | 2 +- icu4c/source/common/ucasemap.cpp | 1 + icu4c/source/common/unicode/casemap.h | 40 ++++++++++++--------- icu4c/source/common/unicode/normalizer2.h | 10 +++--- icu4c/source/common/unicode/stringoptions.h | 12 ++++++- icu4c/source/common/ustr_imp.h | 5 --- icu4c/source/common/ustrcase.cpp | 1 + icu4c/source/test/intltest/strcase.cpp | 21 ++++++++--- 10 files changed, 62 insertions(+), 34 deletions(-) diff --git a/icu4c/source/common/filterednormalizer2.cpp b/icu4c/source/common/filterednormalizer2.cpp index f627b601ce0..1a0914d3f7b 100644 --- a/icu4c/source/common/filterednormalizer2.cpp +++ b/icu4c/source/common/filterednormalizer2.cpp @@ -22,11 +22,11 @@ #include "unicode/edits.h" #include "unicode/normalizer2.h" +#include "unicode/stringoptions.h" #include "unicode/uniset.h" #include "unicode/unistr.h" #include "unicode/unorm.h" #include "cpputils.h" -#include "ustr_imp.h" // U_EDITS_NO_RESET U_NAMESPACE_BEGIN diff --git a/icu4c/source/common/norm2allmodes.h b/icu4c/source/common/norm2allmodes.h index 3305f05f64f..96b0ebe0828 100644 --- a/icu4c/source/common/norm2allmodes.h +++ b/icu4c/source/common/norm2allmodes.h @@ -20,10 +20,10 @@ #include "unicode/edits.h" #include "unicode/normalizer2.h" +#include "unicode/stringoptions.h" #include "unicode/unistr.h" #include "cpputils.h" #include "normalizer2impl.h" -#include "ustr_imp.h" // U_EDITS_NO_RESET U_NAMESPACE_BEGIN diff --git a/icu4c/source/common/normalizer2.cpp b/icu4c/source/common/normalizer2.cpp index 8915c1ddc31..ef0f7d56b03 100644 --- a/icu4c/source/common/normalizer2.cpp +++ b/icu4c/source/common/normalizer2.cpp @@ -22,6 +22,7 @@ #include "unicode/edits.h" #include "unicode/normalizer2.h" +#include "unicode/stringoptions.h" #include "unicode/unistr.h" #include "unicode/unorm.h" #include "cstring.h" @@ -30,7 +31,6 @@ #include "normalizer2impl.h" #include "uassert.h" #include "ucln_cmn.h" -#include "ustr_imp.h" // U_EDITS_NO_RESET using icu::Normalizer2Impl; diff --git a/icu4c/source/common/ucasemap.cpp b/icu4c/source/common/ucasemap.cpp index 57bf3a52a4e..6e550aadcd8 100644 --- a/icu4c/source/common/ucasemap.cpp +++ b/icu4c/source/common/ucasemap.cpp @@ -22,6 +22,7 @@ #include "unicode/brkiter.h" #include "unicode/casemap.h" #include "unicode/edits.h" +#include "unicode/stringoptions.h" #include "unicode/ubrk.h" #include "unicode/uloc.h" #include "unicode/ustring.h" diff --git a/icu4c/source/common/unicode/casemap.h b/icu4c/source/common/unicode/casemap.h index 581f1ab532a..6a29a426605 100644 --- a/icu4c/source/common/unicode/casemap.h +++ b/icu4c/source/common/unicode/casemap.h @@ -36,7 +36,7 @@ public: * The source string and the destination buffer must not overlap. * * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT. + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if @@ -48,7 +48,8 @@ public: * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be NULL. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. @@ -71,7 +72,7 @@ public: * The source string and the destination buffer must not overlap. * * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT. + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if @@ -83,7 +84,8 @@ public: * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be NULL. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. @@ -112,7 +114,7 @@ public: * all others. (This can be modified with options bits.) * * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, * U_TITLECASE_NO_LOWERCASE, * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. @@ -132,7 +134,8 @@ public: * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be NULL. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. @@ -161,7 +164,7 @@ public: * The result may be longer or shorter than the original. * The source string and the destination buffer must not overlap. * - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. @@ -174,7 +177,8 @@ public: * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be NULL. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. @@ -197,7 +201,7 @@ public: * The source string and the destination buffer must not overlap. * * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT. + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if @@ -209,7 +213,8 @@ public: * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be NULL. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. @@ -232,7 +237,7 @@ public: * The source string and the destination buffer must not overlap. * * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT. + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if @@ -244,7 +249,8 @@ public: * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be NULL. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. @@ -273,7 +279,7 @@ public: * all others. (This can be modified with options bits.) * * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, * U_TITLECASE_NO_LOWERCASE, * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. @@ -293,7 +299,8 @@ public: * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be NULL. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. @@ -321,7 +328,7 @@ public: * The result may be longer or shorter than the original. * The source string and the destination buffer must not overlap. * - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. @@ -334,7 +341,8 @@ public: * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be NULL. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. diff --git a/icu4c/source/common/unicode/normalizer2.h b/icu4c/source/common/unicode/normalizer2.h index 631e29bafef..97337c5300e 100644 --- a/icu4c/source/common/unicode/normalizer2.h +++ b/icu4c/source/common/unicode/normalizer2.h @@ -228,14 +228,15 @@ public: * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). * Otherwise currently converts to & from UTF-16 and does not support edits. * - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT. + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src Source UTF-8 string. * @param sink A ByteSink to which the normalized UTF-8 result string is written. * sink.Flush() is called at the end. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be nullptr. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. * @param errorCode Standard ICU error code. Its input value must * pass the U_SUCCESS() test, or else the function returns * immediately. Check for U_FAILURE() on output or use with @@ -545,14 +546,15 @@ public: * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). * Otherwise currently converts to & from UTF-16 and does not support edits. * - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT. + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src Source UTF-8 string. * @param sink A ByteSink to which the normalized UTF-8 result string is written. * sink.Flush() is called at the end. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be nullptr. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. * @param errorCode Standard ICU error code. Its input value must * pass the U_SUCCESS() test, or else the function returns * immediately. Check for U_FAILURE() on output or use with diff --git a/icu4c/source/common/unicode/stringoptions.h b/icu4c/source/common/unicode/stringoptions.h index d8b29fd40d8..f2de96e9634 100644 --- a/icu4c/source/common/unicode/stringoptions.h +++ b/icu4c/source/common/unicode/stringoptions.h @@ -134,6 +134,17 @@ */ #define U_TITLECASE_ADJUST_TO_CASED 0x400 +/** + * Option for string transformation functions to not first reset the Edits object. + * Used for example in some case-mapping and normalization functions. + * + * @see CaseMap + * @see Edits + * @see Normalizer2 + * @draft ICU 60 + */ +#define U_EDITS_NO_RESET 0x2000 + /** * Omit unchanged text when recording how source substrings * relate to changed and unchanged result substrings. @@ -182,7 +193,6 @@ // ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0 // ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600 // ustr_imp.h #define _STRNCMP_STYLE 0x1000 -// ustr_imp.h #define U_EDITS_NO_RESET 0x2000 // unormcmp.cpp #define _COMPARE_EQUIV 0x80000 #endif // __STRINGOPTIONS_H__ diff --git a/icu4c/source/common/ustr_imp.h b/icu4c/source/common/ustr_imp.h index 3c2d8574f8d..9815915ff52 100644 --- a/icu4c/source/common/ustr_imp.h +++ b/icu4c/source/common/ustr_imp.h @@ -25,11 +25,6 @@ */ #define _STRNCMP_STYLE 0x1000 -/** - * Internal option for string transformation functions to not first reset the Edits object. - */ -#define U_EDITS_NO_RESET 0x2000 - /** * Compare two strings in code point order or code unit order. * Works in strcmp style (both lengths -1), diff --git a/icu4c/source/common/ustrcase.cpp b/icu4c/source/common/ustrcase.cpp index bd910cf0dc5..a96c57cef2e 100644 --- a/icu4c/source/common/ustrcase.cpp +++ b/icu4c/source/common/ustrcase.cpp @@ -24,6 +24,7 @@ #include "unicode/brkiter.h" #include "unicode/casemap.h" #include "unicode/edits.h" +#include "unicode/stringoptions.h" #include "unicode/ustring.h" #include "unicode/ucasemap.h" #include "unicode/ubrk.h" diff --git a/icu4c/source/test/intltest/strcase.cpp b/icu4c/source/test/intltest/strcase.cpp index 854f4ec9140..35513052461 100644 --- a/icu4c/source/test/intltest/strcase.cpp +++ b/icu4c/source/test/intltest/strcase.cpp @@ -1272,18 +1272,23 @@ void StringCaseTest::TestCaseMapWithEdits() { TRUE, errorCode); #endif - edits.reset(); - length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I, + // No explicit nor automatic edits.reset(). Edits should be appended. + length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET | U_FOLD_CASE_EXCLUDE_SPECIAL_I, u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode); assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length)); static const EditChange foldExpectedChanges[] = { + // From titlecasing. + { FALSE, 1, 1 }, + { TRUE, 1, 1 }, + { FALSE, 10, 10 }, + // From case folding. { TRUE, 1, 1 }, { TRUE, 1, 2 }, { FALSE, 3, 3 }, { TRUE, 1, 1 }, { FALSE, 2, 2 } }; - TestUtility::checkEditsIter(*this, u"foldCase(IßtanBul)", + TestUtility::checkEditsIter(*this, u"foldCase(no Edits reset, IßtanBul)", edits.getFineIterator(), edits.getFineIterator(), foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges), TRUE, errorCode); @@ -1348,12 +1353,18 @@ void StringCaseTest::TestCaseMapUTF8WithEdits() { TRUE, errorCode); #endif - edits.reset(); - length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I, + // No explicit nor automatic edits.reset(). Edits should be appended. + length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET | + U_FOLD_CASE_EXCLUDE_SPECIAL_I, u8"IßtanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode); assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), UnicodeString::fromUTF8(StringPiece(dest, length))); static const EditChange foldExpectedChanges[] = { + // From titlecasing. + { FALSE, 1, 1 }, + { TRUE, 1, 1 }, + { FALSE, 10, 10 }, + // From case folding. { TRUE, 1, 2 }, { TRUE, 2, 2 }, { FALSE, 3, 3 }, -- 2.40.0