ICU-7118 document that compare() is often more efficient than getSortKey()

author Markus Scherer <markus.icu@gmail.com>

Tue, 9 Sep 2014 22:05:13 +0000 (22:05 +0000)

committer Markus Scherer <markus.icu@gmail.com>

Tue, 9 Sep 2014 22:05:13 +0000 (22:05 +0000)
author Markus Scherer <markus.icu@gmail.com>
Tue, 9 Sep 2014 22:05:13 +0000 (22:05 +0000)
committer Markus Scherer <markus.icu@gmail.com>
Tue, 9 Sep 2014 22:05:13 +0000 (22:05 +0000)
diff --git a/icu4c/source/i18n/unicode/coll.h b/icu4c/source/i18n/unicode/coll.h

index a7932759de4b74b46c25ba3df1f61a0c9ba7ce89..1d2ff5b37f50fc399b5cc4cbfc31eec3aace2630 100644 (file)
--- a/icu4c/source/i18n/unicode/coll.h
+++ b/icu4c/source/i18n/unicode/coll.h
@@ -135,20 +135,12 @@ class CollationKey;
  * \endcode
  * </pre>
  * \htmlonly</blockquote>\endhtmlonly
-* <p>
-* For comparing strings exactly once, the <code>compare</code> method
-* provides the best performance. When sorting a list of strings however, it
-* is generally necessary to compare each string multiple times. In this case,
-* sort keys provide better performance. The <code>getSortKey</code> methods
+*
+* The <code>getSortKey</code> methods
  * convert a string to a series of bytes that can be compared bitwise against
  * other sort keys using <code>strcmp()</code>. Sort keys are written as
-* zero-terminated byte strings. They consist of several substrings, one for
-* each collation strength level, that are delimited by 0x01 bytes.
-* If the string code points are appended for UCOL_IDENTICAL, then they are
-* processed for correct code point order comparison and may contain 0x01
-* bytes but not zero bytes.
-* </p>
-* <p>
+* zero-terminated byte strings.
+*
  * Another set of APIs returns a <code>CollationKey</code> object that wraps
  * the sort key bytes instead of returning the bytes themselves.
  * </p>
@@ -482,11 +474,14 @@ public:
      /**
       * Transforms the string into a series of characters that can be compared
       * with CollationKey::compareTo. It is not possible to restore the original
-     * string from the chars in the sort key.  The generated sort key handles
-     * only a limited number of ignorable characters.
+     * string from the chars in the sort key.
       * <p>Use CollationKey::equals or CollationKey::compare to compare the
       * generated sort keys.
       * If the source string is null, a null collation key will be returned.
+     *
+     * Note that sort keys are often less efficient than simply doing comparison.  
+     * For more details, see the ICU User Guide.
+     *
       * @param source the source string to be transformed into a sort key.
       * @param key the collation key to be filled in
       * @param status the error code status.
@@ -501,11 +496,14 @@ public:
      /**
       * Transforms the string into a series of characters that can be compared
       * with CollationKey::compareTo. It is not possible to restore the original
-     * string from the chars in the sort key.  The generated sort key handles
-     * only a limited number of ignorable characters.
+     * string from the chars in the sort key.
       * <p>Use CollationKey::equals or CollationKey::compare to compare the
       * generated sort keys.
       * <p>If the source string is null, a null collation key will be returned.
+     *
+     * Note that sort keys are often less efficient than simply doing comparison.  
+     * For more details, see the ICU User Guide.
+     *
       * @param source the source string to be transformed into a sort key.
       * @param sourceLength length of the collation key
       * @param key the collation key to be filled in
@@ -980,6 +978,10 @@ public:
       * Get the sort key as an array of bytes from a UnicodeString.
       * Sort key byte arrays are zero-terminated and can be compared using
       * strcmp().
+     *
+     * Note that sort keys are often less efficient than simply doing comparison.  
+     * For more details, see the ICU User Guide.
+     *
       * @param source string to be processed.
       * @param result buffer to store result in. If NULL, number of bytes needed
       *        will be returned.
@@ -996,6 +998,10 @@ public:
       * Get the sort key as an array of bytes from a UChar buffer.
       * Sort key byte arrays are zero-terminated and can be compared using
       * strcmp().
+     *
+     * Note that sort keys are often less efficient than simply doing comparison.  
+     * For more details, see the ICU User Guide.
+     *
       * @param source string to be processed.
       * @param sourceLength length of string to be processed.
       *        If -1, the string is 0 terminated and length will be decided by the
diff --git a/icu4c/source/i18n/unicode/tblcoll.h b/icu4c/source/i18n/unicode/tblcoll.h

index 64f092c63f2be0cf37c05b17f457775ae76e967b..40d5090217414f00f8a6e0925cf974bf1197fb3b 100644 (file)
--- a/icu4c/source/i18n/unicode/tblcoll.h
+++ b/icu4c/source/i18n/unicode/tblcoll.h
@@ -6,7 +6,7 @@
  */
  
  /**
- * \file 
+ * \file
   * \brief C++ API: The RuleBasedCollator class implements the Collator abstract base class.
   */
  
@@ -343,34 +343,38 @@ public:
                                           UErrorCode &status) const;
  
      /**
-    * Transforms a specified region of the string into a series of characters
-    * that can be compared with CollationKey.compare. Use a CollationKey when
-    * you need to do repeated comparisions on the same string. For a single
-    * comparison the compare method will be faster.
-    * @param source the source string.
-    * @param key the transformed key of the source string.
-    * @param status the error code status.
-    * @return the transformed key.
-    * @see CollationKey
-    * @stable ICU 2.0
-    */
+     * Transforms the string into a series of characters
+     * that can be compared with CollationKey.compare().
+     *
+     * Note that sort keys are often less efficient than simply doing comparison.  
+     * For more details, see the ICU User Guide.
+     *
+     * @param source the source string.
+     * @param key the transformed key of the source string.
+     * @param status the error code status.
+     * @return the transformed key.
+     * @see CollationKey
+     * @stable ICU 2.0
+     */
      virtual CollationKey& getCollationKey(const UnicodeString& source,
                                            CollationKey& key,
                                            UErrorCode& status) const;
  
      /**
-    * Transforms a specified region of the string into a series of characters
-    * that can be compared with CollationKey.compare. Use a CollationKey when
-    * you need to do repeated comparisions on the same string. For a single
-    * comparison the compare method will be faster.
-    * @param source the source string.
-    * @param sourceLength the length of the source string.
-    * @param key the transformed key of the source string.
-    * @param status the error code status.
-    * @return the transformed key.
-    * @see CollationKey
-    * @stable ICU 2.0
-    */
+     * Transforms a specified region of the string into a series of characters
+     * that can be compared with CollationKey.compare.
+     *
+     * Note that sort keys are often less efficient than simply doing comparison.  
+     * For more details, see the ICU User Guide.
+     *
+     * @param source the source string.
+     * @param sourceLength the length of the source string.
+     * @param key the transformed key of the source string.
+     * @param status the error code status.
+     * @return the transformed key.
+     * @see CollationKey
+     * @stable ICU 2.0
+     */
      virtual CollationKey& getCollationKey(const UChar *source,
                                            int32_t sourceLength,
                                            CollationKey& key,
@@ -609,6 +613,10 @@ public:
  
      /**
       * Get the sort key as an array of bytes from a UnicodeString.
+     *
+     * Note that sort keys are often less efficient than simply doing comparison.  
+     * For more details, see the ICU User Guide.
+     *
       * @param source string to be processed.
       * @param result buffer to store result in. If NULL, number of bytes needed
       *        will be returned.
@@ -622,6 +630,10 @@ public:
  
      /**
       * Get the sort key as an array of bytes from a UChar buffer.
+     *
+     * Note that sort keys are often less efficient than simply doing comparison.  
+     * For more details, see the ICU User Guide.
+     *
       * @param source string to be processed.
       * @param sourceLength length of string to be processed. If -1, the string
       *        is 0 terminated and length will be decided by the function.
diff --git a/icu4c/source/i18n/unicode/ucol.h b/icu4c/source/i18n/unicode/ucol.h

index 8919b094ff7d29625108f14cd8ad554a53c8f073..8aecc38c2541f1729043244c006dca97f15ec1ba 100644 (file)
--- a/icu4c/source/i18n/unicode/ucol.h
+++ b/icu4c/source/i18n/unicode/ucol.h
@@ -970,6 +970,9 @@ ucol_normalizeShortDefinitionString(const char *source,
   * Get a sort key for a string from a UCollator.
   * Sort keys may be compared using <TT>strcmp</TT>.
   *
+ * Note that sort keys are often less efficient than simply doing comparison.  
+ * For more details, see the ICU User Guide.
+ *
   * Like ICU functions that write to an output buffer, the buffer contents
   * is undefined if the buffer capacity (resultLength parameter) is too small.
   * Unlike ICU functions that write a string to an output buffer,
author	Markus Scherer <markus.icu@gmail.com>
	Tue, 9 Sep 2014 22:05:13 +0000 (22:05 +0000)
committer	Markus Scherer <markus.icu@gmail.com>
	Tue, 9 Sep 2014 22:05:13 +0000 (22:05 +0000)
icu4c/source/i18n/unicode/coll.h		patch \| blob \| history
icu4c/source/i18n/unicode/tblcoll.h		patch \| blob \| history
icu4c/source/i18n/unicode/ucol.h		patch \| blob \| history