ICU-13234 collect string & character options bits in new stringoptions.h

author Markus Scherer <markus.icu@gmail.com>

Thu, 8 Jun 2017 20:35:40 +0000 (20:35 +0000)

committer Markus Scherer <markus.icu@gmail.com>

Thu, 8 Jun 2017 20:35:40 +0000 (20:35 +0000)
author Markus Scherer <markus.icu@gmail.com>
Thu, 8 Jun 2017 20:35:40 +0000 (20:35 +0000)
committer Markus Scherer <markus.icu@gmail.com>
Thu, 8 Jun 2017 20:35:40 +0000 (20:35 +0000)
diff --git a/icu4c/source/common/normalizer2impl.cpp b/icu4c/source/common/normalizer2impl.cpp

index 5e3103eca7030f3fca239a5ebfbbbaec7aa40997..5ab29e42e8dd44ea35cb1d57b9d5b468a6f62255 100644 (file)
--- a/icu4c/source/common/normalizer2impl.cpp
+++ b/icu4c/source/common/normalizer2impl.cpp
@@ -23,7 +23,7 @@
  #include "unicode/bytestream.h"
  #include "unicode/edits.h"
  #include "unicode/normalizer2.h"
-#include "unicode/ucasemap.h"  // U_OMIT_UNCHANGED_TEXT
+#include "unicode/stringoptions.h"
  #include "unicode/udata.h"
  #include "unicode/ustring.h"
  #include "unicode/utf16.h"
diff --git a/icu4c/source/common/ucase.h b/icu4c/source/common/ucase.h

index e15bae6604daef02f06df71ec5594666ce56411d..0240641132d643161a8fd1131119677887dc8efd 100644 (file)
--- a/icu4c/source/common/ucase.h
+++ b/icu4c/source/common/ucase.h
@@ -61,7 +61,7 @@ enum {
  /**
   * Bit mask for getting just the options from a string compare options word
   * that are relevant for case-insensitive string comparison.
- * See uchar.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
+ * See stringoptions.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
   * @internal
   */
  #define _STRCASECMP_OPTIONS_MASK 0xffff
@@ -69,7 +69,7 @@ enum {
  /**
   * Bit mask for getting just the options from a string compare options word
   * that are relevant for case folding (of a single string or code point).
- * See uchar.h.
+ * See stringoptions.h.
   * @internal
   */
  #define _FOLD_CASE_OPTIONS_MASK 0xff
diff --git a/icu4c/source/common/ucasemap_imp.h b/icu4c/source/common/ucasemap_imp.h

index 79204226b00900aed4ea5418d5fa2d5aae8a9c24..5a670964f634f8ea17d5719a1dbe63ed5c7e6d4c 100644 (file)
--- a/icu4c/source/common/ucasemap_imp.h
+++ b/icu4c/source/common/ucasemap_imp.h
@@ -11,15 +11,6 @@
  #include "unicode/ucasemap.h"
  #include "ucase.h"
  
-#ifndef U_COMPARE_IGNORE_CASE
-/* see also unorm.h */
-/**
- * Option bit for unorm_compare:
- * Perform case-insensitive comparison.
- */
-#define U_COMPARE_IGNORE_CASE       0x10000
-#endif
-
  /**
   * Internal API, used by u_strcasecmp() etc.
   * Compare strings case-insensitively,
diff --git a/icu4c/source/common/unicode/stringoptions.h b/icu4c/source/common/unicode/stringoptions.h

new file mode 100644 (file)

index 0000000..975e193
--- /dev/null
+++ b/icu4c/source/common/unicode/stringoptions.h
@@ -0,0 +1,133 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// stringoptions.h
+// created: 2017jun08 Markus W. Scherer
+
+#ifndef __STRINGOPTIONS_H__
+#define __STRINGOPTIONS_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Bit set option bit constants for various string and character processing functions.
+ */
+
+/**
+ * Option value for case folding: Use default mappings defined in CaseFolding.txt.
+ *
+ * @stable ICU 2.0
+ */
+#define U_FOLD_CASE_DEFAULT 0
+
+/**
+ * Option value for case folding:
+ *
+ * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
+ * and dotless i appropriately for Turkic languages (tr, az).
+ *
+ * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
+ * are to be included for default mappings and
+ * excluded for the Turkic-specific mappings.
+ *
+ * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
+ * are to be excluded for default mappings and
+ * included for the Turkic-specific mappings.
+ *
+ * @stable ICU 2.0
+ */
+#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
+
+/**
+ * Do not lowercase non-initial parts of words when titlecasing.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will titlecase the first cased character
+ * of a word and lowercase all other characters.
+ * With this option, the other characters will not be modified.
+ *
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @see UnicodeString::toTitle
+ * @stable ICU 3.8
+ */
+#define U_TITLECASE_NO_LOWERCASE 0x100
+
+/**
+ * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
+ * titlecase exactly the characters at breaks from the iterator.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will take each break iterator index,
+ * adjust it by looking for the next cased character, and titlecase that one.
+ * Other characters are lowercased.
+ *
+ * This follows Unicode 4 & 5 section 3.13 Default Case Operations:
+ *
+ * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
+ * #29, "Text Boundaries." Between each pair of word boundaries, find the first
+ * cased character F. If F exists, map F to default_title(F); then map each
+ * subsequent character C to default_lower(C).
+ *
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @see UnicodeString::toTitle
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @stable ICU 3.8
+ */
+#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
+
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Omit unchanged text when recording how source substrings
+ * relate to changed and unchanged result substrings.
+ * Used for example in some case-mapping and normalization functions.
+ *
+ * @see CaseMap
+ * @see Edits
+ * @see Normalizer2
+ * @draft ICU 60
+ */
+#define U_OMIT_UNCHANGED_TEXT 0x4000
+
+#endif  // U_HIDE_DRAFT_API
+
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER  0x8000
+
+/**
+ * Option bit for unorm_compare:
+ * Perform case-insensitive comparison.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_IGNORE_CASE       0x10000
+
+/**
+ * Option bit for unorm_compare:
+ * Both input strings are assumed to fulfill FCD conditions.
+ * @stable ICU 2.2
+ */
+#define UNORM_INPUT_IS_FCD          0x20000
+
+// Related definitions elsewhere.
+// Options that are not meaningful in the same functions
+// can share the same bits.
+//
+// Public:
+// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
+//
+// Internal: (may change or be removed)
+// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
+// ucase.h #define _FOLD_CASE_OPTIONS_MASK 0xff
+// ustr_imp.h #define _STRNCMP_STYLE 0x1000
+// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
+
+#endif  // __STRINGOPTIONS_H__
diff --git a/icu4c/source/common/unicode/ucasemap.h b/icu4c/source/common/unicode/ucasemap.h

index 15f508af20c2b15f02c7dabcfd627b88314d85d5..7c69bdc20764e0c8da99780565f869f2c9bcde23 100644 (file)
--- a/icu4c/source/common/unicode/ucasemap.h
+++ b/icu4c/source/common/unicode/ucasemap.h
@@ -23,6 +23,7 @@
  
  #include "unicode/utypes.h"
  #include "unicode/localpointer.h"
+#include "unicode/stringoptions.h"
  #include "unicode/ustring.h"
  
  /**
@@ -144,56 +145,6 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
  U_STABLE void U_EXPORT2
  ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
  
-/**
- * Do not lowercase non-initial parts of words when titlecasing.
- * Option bit for titlecasing APIs that take an options bit set.
- *
- * By default, titlecasing will titlecase the first cased character
- * of a word and lowercase all other characters.
- * With this option, the other characters will not be modified.
- *
- * @see ucasemap_setOptions
- * @see ucasemap_toTitle
- * @see ucasemap_utf8ToTitle
- * @see UnicodeString::toTitle
- * @stable ICU 3.8
- */
-#define U_TITLECASE_NO_LOWERCASE 0x100
-
-/**
- * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
- * titlecase exactly the characters at breaks from the iterator.
- * Option bit for titlecasing APIs that take an options bit set.
- *
- * By default, titlecasing will take each break iterator index,
- * adjust it by looking for the next cased character, and titlecase that one.
- * Other characters are lowercased.
- *
- * This follows Unicode 4 & 5 section 3.13 Default Case Operations:
- *
- * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
- * #29, "Text Boundaries." Between each pair of word boundaries, find the first
- * cased character F. If F exists, map F to default_title(F); then map each
- * subsequent character C to default_lower(C).
- *
- * @see ucasemap_setOptions
- * @see ucasemap_toTitle
- * @see ucasemap_utf8ToTitle
- * @see UnicodeString::toTitle
- * @see U_TITLECASE_NO_LOWERCASE
- * @stable ICU 3.8
- */
-#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
-
-/**
- * Omit unchanged text when case-mapping or normalizing with Edits.
- *
- * @see CaseMap
- * @see Edits
- * @draft ICU 60
- */
-#define U_OMIT_UNCHANGED_TEXT 0x4000
-
  #if !UCONFIG_NO_BREAK_ITERATION
  
  /**
diff --git a/icu4c/source/common/unicode/uchar.h b/icu4c/source/common/unicode/uchar.h

index bca55b1562c4e174f15a007fd6f65c79668cedd5..3613374d9a43a5df7e8f302db61f3fb281e94e6c 100644 (file)
--- a/icu4c/source/common/unicode/uchar.h
+++ b/icu4c/source/common/unicode/uchar.h
@@ -26,6 +26,7 @@
  #define UCHAR_H
  
  #include "unicode/utypes.h"
+#include "unicode/stringoptions.h"
  
  U_CDECL_BEGIN
  
@@ -3569,27 +3570,6 @@ u_toupper(UChar32 c);
  U_STABLE UChar32 U_EXPORT2
  u_totitle(UChar32 c);
  
-/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */
-#define U_FOLD_CASE_DEFAULT 0
-
-/**
- * Option value for case folding:
- *
- * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
- * and dotless i appropriately for Turkic languages (tr, az).
- *
- * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
- * are to be included for default mappings and
- * excluded for the Turkic-specific mappings.
- *
- * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
- * are to be excluded for default mappings and
- * included for the Turkic-specific mappings.
- *
- * @stable ICU 2.0
- */
-#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
-
  /**
   * The given character is mapped to its case folding equivalent according to
   * UnicodeData.txt and CaseFolding.txt;
diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h

index c4bf0d44f96ffa08f8bb0a6f5d6afc19d41cd286..445d57c911a72f967b9267f1aa9f1fd93925c05a 100644 (file)
--- a/icu4c/source/common/unicode/unistr.h
+++ b/icu4c/source/common/unicode/unistr.h
@@ -38,16 +38,6 @@
  
  struct UConverter;          // unicode/ucnv.h
  
-#ifndef U_COMPARE_CODE_POINT_ORDER
-/* see also ustring.h and unorm.h */
-/**
- * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
- * Compare strings in code point order instead of code unit order.
- * @stable ICU 2.2
- */
-#define U_COMPARE_CODE_POINT_ORDER  0x8000
-#endif
-
  #ifndef USTRING_H
  /**
   * \ingroup ustring_ustrlen
diff --git a/icu4c/source/common/unicode/unorm2.h b/icu4c/source/common/unicode/unorm2.h

index c6d3494d7057f53599e1ee7995b7520f9e0e091a..a9bd02f256361f3438cb75972d93d668e223b331 100644 (file)
--- a/icu4c/source/common/unicode/unorm2.h
+++ b/icu4c/source/common/unicode/unorm2.h
@@ -32,6 +32,7 @@
  
  #include "unicode/utypes.h"
  #include "unicode/localpointer.h"
+#include "unicode/stringoptions.h"
  #include "unicode/uset.h"
  
  /**
@@ -526,30 +527,6 @@ unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c);
  U_STABLE UBool U_EXPORT2
  unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
  
-/**
- * Option bit for unorm_compare:
- * Both input strings are assumed to fulfill FCD conditions.
- * @stable ICU 2.2
- */
-#define UNORM_INPUT_IS_FCD          0x20000
-
-/**
- * Option bit for unorm_compare:
- * Perform case-insensitive comparison.
- * @stable ICU 2.2
- */
-#define U_COMPARE_IGNORE_CASE       0x10000
-
-#ifndef U_COMPARE_CODE_POINT_ORDER
-/* see also unistr.h and ustring.h */
-/**
- * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
- * Compare strings in code point order instead of code unit order.
- * @stable ICU 2.2
- */
-#define U_COMPARE_CODE_POINT_ORDER  0x8000
-#endif
-
  /**
   * Compares two strings for canonical equivalence.
   * Further options include case-insensitive comparison and
diff --git a/icu4c/source/common/unicode/ustring.h b/icu4c/source/common/unicode/ustring.h

index 31b05566bc628d45f40e66f3821f8f1caad0d095..3daa28e555e5db2774f3d2c51f74662699916334 100644 (file)
--- a/icu4c/source/common/unicode/ustring.h
+++ b/icu4c/source/common/unicode/ustring.h
@@ -497,16 +497,6 @@ u_strCompare(const UChar *s1, int32_t length1,
  U_STABLE int32_t U_EXPORT2
  u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
  
-#ifndef U_COMPARE_CODE_POINT_ORDER
-/* see also unistr.h and unorm.h */
-/**
- * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
- * Compare strings in code point order instead of code unit order.
- * @stable ICU 2.2
- */
-#define U_COMPARE_CODE_POINT_ORDER  0x8000
-#endif
-
  /**
   * Compare two strings case-insensitively using full case folding.
   * This is equivalent to
diff --git a/icu4c/source/test/intltest/tstnorm.cpp b/icu4c/source/test/intltest/tstnorm.cpp

index 199554d9402c00fb11a3dcc12f60644a0a4a9d38..db6719434ff3d6fbf2d0031156b103a10b4712be 100644 (file)
--- a/icu4c/source/test/intltest/tstnorm.cpp
+++ b/icu4c/source/test/intltest/tstnorm.cpp
@@ -13,7 +13,7 @@
  #include "unicode/uchar.h"
  #include "unicode/errorcode.h"
  #include "unicode/normlzr.h"
-#include "unicode/ucasemap.h"  // U_OMIT_UNCHANGED_TEXT
+#include "unicode/stringoptions.h"
  #include "unicode/uniset.h"
  #include "unicode/usetiter.h"
  #include "unicode/schriter.h"
author	Markus Scherer <markus.icu@gmail.com>
	Thu, 8 Jun 2017 20:35:40 +0000 (20:35 +0000)
committer	Markus Scherer <markus.icu@gmail.com>
	Thu, 8 Jun 2017 20:35:40 +0000 (20:35 +0000)
icu4c/source/common/normalizer2impl.cpp		patch \| blob \| history
icu4c/source/common/ucase.h		patch \| blob \| history
icu4c/source/common/ucasemap_imp.h		patch \| blob \| history
icu4c/source/common/unicode/stringoptions.h	[new file with mode: 0644]	patch \| blob
icu4c/source/common/unicode/ucasemap.h		patch \| blob \| history
icu4c/source/common/unicode/uchar.h		patch \| blob \| history
icu4c/source/common/unicode/unistr.h		patch \| blob \| history
icu4c/source/common/unicode/unorm2.h		patch \| blob \| history
icu4c/source/common/unicode/ustring.h		patch \| blob \| history
icu4c/source/test/intltest/tstnorm.cpp		patch \| blob \| history