From: Markus Scherer Date: Thu, 8 Jun 2017 20:35:40 +0000 (+0000) Subject: ICU-13234 collect string & character options bits in new stringoptions.h X-Git-Tag: milestone-60-0-1~36 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=06a03303cbf1a2e39acc131e3896d49bf6958e5d;p=icu ICU-13234 collect string & character options bits in new stringoptions.h X-SVN-Rev: 40162 --- diff --git a/icu4c/source/common/normalizer2impl.cpp b/icu4c/source/common/normalizer2impl.cpp index 5e3103eca70..5ab29e42e8d 100644 --- a/icu4c/source/common/normalizer2impl.cpp +++ b/icu4c/source/common/normalizer2impl.cpp @@ -23,7 +23,7 @@ #include "unicode/bytestream.h" #include "unicode/edits.h" #include "unicode/normalizer2.h" -#include "unicode/ucasemap.h" // U_OMIT_UNCHANGED_TEXT +#include "unicode/stringoptions.h" #include "unicode/udata.h" #include "unicode/ustring.h" #include "unicode/utf16.h" diff --git a/icu4c/source/common/ucase.h b/icu4c/source/common/ucase.h index e15bae6604d..0240641132d 100644 --- a/icu4c/source/common/ucase.h +++ b/icu4c/source/common/ucase.h @@ -61,7 +61,7 @@ enum { /** * Bit mask for getting just the options from a string compare options word * that are relevant for case-insensitive string comparison. - * See uchar.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER. + * See stringoptions.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER. * @internal */ #define _STRCASECMP_OPTIONS_MASK 0xffff @@ -69,7 +69,7 @@ enum { /** * Bit mask for getting just the options from a string compare options word * that are relevant for case folding (of a single string or code point). - * See uchar.h. + * See stringoptions.h. * @internal */ #define _FOLD_CASE_OPTIONS_MASK 0xff diff --git a/icu4c/source/common/ucasemap_imp.h b/icu4c/source/common/ucasemap_imp.h index 79204226b00..5a670964f63 100644 --- a/icu4c/source/common/ucasemap_imp.h +++ b/icu4c/source/common/ucasemap_imp.h @@ -11,15 +11,6 @@ #include "unicode/ucasemap.h" #include "ucase.h" -#ifndef U_COMPARE_IGNORE_CASE -/* see also unorm.h */ -/** - * Option bit for unorm_compare: - * Perform case-insensitive comparison. - */ -#define U_COMPARE_IGNORE_CASE 0x10000 -#endif - /** * Internal API, used by u_strcasecmp() etc. * Compare strings case-insensitively, diff --git a/icu4c/source/common/unicode/stringoptions.h b/icu4c/source/common/unicode/stringoptions.h new file mode 100644 index 00000000000..975e193609d --- /dev/null +++ b/icu4c/source/common/unicode/stringoptions.h @@ -0,0 +1,133 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// stringoptions.h +// created: 2017jun08 Markus W. Scherer + +#ifndef __STRINGOPTIONS_H__ +#define __STRINGOPTIONS_H__ + +#include "unicode/utypes.h" + +/** + * \file + * \brief C API: Bit set option bit constants for various string and character processing functions. + */ + +/** + * Option value for case folding: Use default mappings defined in CaseFolding.txt. + * + * @stable ICU 2.0 + */ +#define U_FOLD_CASE_DEFAULT 0 + +/** + * Option value for case folding: + * + * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I + * and dotless i appropriately for Turkic languages (tr, az). + * + * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that + * are to be included for default mappings and + * excluded for the Turkic-specific mappings. + * + * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that + * are to be excluded for default mappings and + * included for the Turkic-specific mappings. + * + * @stable ICU 2.0 + */ +#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 + +/** + * Do not lowercase non-initial parts of words when titlecasing. + * Option bit for titlecasing APIs that take an options bit set. + * + * By default, titlecasing will titlecase the first cased character + * of a word and lowercase all other characters. + * With this option, the other characters will not be modified. + * + * @see ucasemap_setOptions + * @see ucasemap_toTitle + * @see ucasemap_utf8ToTitle + * @see UnicodeString::toTitle + * @stable ICU 3.8 + */ +#define U_TITLECASE_NO_LOWERCASE 0x100 + +/** + * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; + * titlecase exactly the characters at breaks from the iterator. + * Option bit for titlecasing APIs that take an options bit set. + * + * By default, titlecasing will take each break iterator index, + * adjust it by looking for the next cased character, and titlecase that one. + * Other characters are lowercased. + * + * This follows Unicode 4 & 5 section 3.13 Default Case Operations: + * + * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex + * #29, "Text Boundaries." Between each pair of word boundaries, find the first + * cased character F. If F exists, map F to default_title(F); then map each + * subsequent character C to default_lower(C). + * + * @see ucasemap_setOptions + * @see ucasemap_toTitle + * @see ucasemap_utf8ToTitle + * @see UnicodeString::toTitle + * @see U_TITLECASE_NO_LOWERCASE + * @stable ICU 3.8 + */ +#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 + +#ifndef U_HIDE_DRAFT_API + +/** + * Omit unchanged text when recording how source substrings + * relate to changed and unchanged result substrings. + * Used for example in some case-mapping and normalization functions. + * + * @see CaseMap + * @see Edits + * @see Normalizer2 + * @draft ICU 60 + */ +#define U_OMIT_UNCHANGED_TEXT 0x4000 + +#endif // U_HIDE_DRAFT_API + +/** + * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: + * Compare strings in code point order instead of code unit order. + * @stable ICU 2.2 + */ +#define U_COMPARE_CODE_POINT_ORDER 0x8000 + +/** + * Option bit for unorm_compare: + * Perform case-insensitive comparison. + * @stable ICU 2.2 + */ +#define U_COMPARE_IGNORE_CASE 0x10000 + +/** + * Option bit for unorm_compare: + * Both input strings are assumed to fulfill FCD conditions. + * @stable ICU 2.2 + */ +#define UNORM_INPUT_IS_FCD 0x20000 + +// Related definitions elsewhere. +// Options that are not meaningful in the same functions +// can share the same bits. +// +// Public: +// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 +// +// Internal: (may change or be removed) +// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff +// ucase.h #define _FOLD_CASE_OPTIONS_MASK 0xff +// ustr_imp.h #define _STRNCMP_STYLE 0x1000 +// unormcmp.cpp #define _COMPARE_EQUIV 0x80000 + +#endif // __STRINGOPTIONS_H__ diff --git a/icu4c/source/common/unicode/ucasemap.h b/icu4c/source/common/unicode/ucasemap.h index 15f508af20c..7c69bdc2076 100644 --- a/icu4c/source/common/unicode/ucasemap.h +++ b/icu4c/source/common/unicode/ucasemap.h @@ -23,6 +23,7 @@ #include "unicode/utypes.h" #include "unicode/localpointer.h" +#include "unicode/stringoptions.h" #include "unicode/ustring.h" /** @@ -144,56 +145,6 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode); U_STABLE void U_EXPORT2 ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode); -/** - * Do not lowercase non-initial parts of words when titlecasing. - * Option bit for titlecasing APIs that take an options bit set. - * - * By default, titlecasing will titlecase the first cased character - * of a word and lowercase all other characters. - * With this option, the other characters will not be modified. - * - * @see ucasemap_setOptions - * @see ucasemap_toTitle - * @see ucasemap_utf8ToTitle - * @see UnicodeString::toTitle - * @stable ICU 3.8 - */ -#define U_TITLECASE_NO_LOWERCASE 0x100 - -/** - * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; - * titlecase exactly the characters at breaks from the iterator. - * Option bit for titlecasing APIs that take an options bit set. - * - * By default, titlecasing will take each break iterator index, - * adjust it by looking for the next cased character, and titlecase that one. - * Other characters are lowercased. - * - * This follows Unicode 4 & 5 section 3.13 Default Case Operations: - * - * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex - * #29, "Text Boundaries." Between each pair of word boundaries, find the first - * cased character F. If F exists, map F to default_title(F); then map each - * subsequent character C to default_lower(C). - * - * @see ucasemap_setOptions - * @see ucasemap_toTitle - * @see ucasemap_utf8ToTitle - * @see UnicodeString::toTitle - * @see U_TITLECASE_NO_LOWERCASE - * @stable ICU 3.8 - */ -#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 - -/** - * Omit unchanged text when case-mapping or normalizing with Edits. - * - * @see CaseMap - * @see Edits - * @draft ICU 60 - */ -#define U_OMIT_UNCHANGED_TEXT 0x4000 - #if !UCONFIG_NO_BREAK_ITERATION /** diff --git a/icu4c/source/common/unicode/uchar.h b/icu4c/source/common/unicode/uchar.h index bca55b1562c..3613374d9a4 100644 --- a/icu4c/source/common/unicode/uchar.h +++ b/icu4c/source/common/unicode/uchar.h @@ -26,6 +26,7 @@ #define UCHAR_H #include "unicode/utypes.h" +#include "unicode/stringoptions.h" U_CDECL_BEGIN @@ -3569,27 +3570,6 @@ u_toupper(UChar32 c); U_STABLE UChar32 U_EXPORT2 u_totitle(UChar32 c); -/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */ -#define U_FOLD_CASE_DEFAULT 0 - -/** - * Option value for case folding: - * - * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I - * and dotless i appropriately for Turkic languages (tr, az). - * - * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that - * are to be included for default mappings and - * excluded for the Turkic-specific mappings. - * - * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that - * are to be excluded for default mappings and - * included for the Turkic-specific mappings. - * - * @stable ICU 2.0 - */ -#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 - /** * The given character is mapped to its case folding equivalent according to * UnicodeData.txt and CaseFolding.txt; diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index c4bf0d44f96..445d57c911a 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -38,16 +38,6 @@ struct UConverter; // unicode/ucnv.h -#ifndef U_COMPARE_CODE_POINT_ORDER -/* see also ustring.h and unorm.h */ -/** - * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: - * Compare strings in code point order instead of code unit order. - * @stable ICU 2.2 - */ -#define U_COMPARE_CODE_POINT_ORDER 0x8000 -#endif - #ifndef USTRING_H /** * \ingroup ustring_ustrlen diff --git a/icu4c/source/common/unicode/unorm2.h b/icu4c/source/common/unicode/unorm2.h index c6d3494d705..a9bd02f2563 100644 --- a/icu4c/source/common/unicode/unorm2.h +++ b/icu4c/source/common/unicode/unorm2.h @@ -32,6 +32,7 @@ #include "unicode/utypes.h" #include "unicode/localpointer.h" +#include "unicode/stringoptions.h" #include "unicode/uset.h" /** @@ -526,30 +527,6 @@ unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c); U_STABLE UBool U_EXPORT2 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c); -/** - * Option bit for unorm_compare: - * Both input strings are assumed to fulfill FCD conditions. - * @stable ICU 2.2 - */ -#define UNORM_INPUT_IS_FCD 0x20000 - -/** - * Option bit for unorm_compare: - * Perform case-insensitive comparison. - * @stable ICU 2.2 - */ -#define U_COMPARE_IGNORE_CASE 0x10000 - -#ifndef U_COMPARE_CODE_POINT_ORDER -/* see also unistr.h and ustring.h */ -/** - * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: - * Compare strings in code point order instead of code unit order. - * @stable ICU 2.2 - */ -#define U_COMPARE_CODE_POINT_ORDER 0x8000 -#endif - /** * Compares two strings for canonical equivalence. * Further options include case-insensitive comparison and diff --git a/icu4c/source/common/unicode/ustring.h b/icu4c/source/common/unicode/ustring.h index 31b05566bc6..3daa28e555e 100644 --- a/icu4c/source/common/unicode/ustring.h +++ b/icu4c/source/common/unicode/ustring.h @@ -497,16 +497,6 @@ u_strCompare(const UChar *s1, int32_t length1, U_STABLE int32_t U_EXPORT2 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder); -#ifndef U_COMPARE_CODE_POINT_ORDER -/* see also unistr.h and unorm.h */ -/** - * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: - * Compare strings in code point order instead of code unit order. - * @stable ICU 2.2 - */ -#define U_COMPARE_CODE_POINT_ORDER 0x8000 -#endif - /** * Compare two strings case-insensitively using full case folding. * This is equivalent to diff --git a/icu4c/source/test/intltest/tstnorm.cpp b/icu4c/source/test/intltest/tstnorm.cpp index 199554d9402..db6719434ff 100644 --- a/icu4c/source/test/intltest/tstnorm.cpp +++ b/icu4c/source/test/intltest/tstnorm.cpp @@ -13,7 +13,7 @@ #include "unicode/uchar.h" #include "unicode/errorcode.h" #include "unicode/normlzr.h" -#include "unicode/ucasemap.h" // U_OMIT_UNCHANGED_TEXT +#include "unicode/stringoptions.h" #include "unicode/uniset.h" #include "unicode/usetiter.h" #include "unicode/schriter.h"