ICU-12380 Size reductions for transliterator registry (and some speedup)

author Peter Edberg <pedberg@unicode.org>

Wed, 20 Sep 2017 00:39:40 +0000 (00:39 +0000)

committer Peter Edberg <pedberg@unicode.org>

Wed, 20 Sep 2017 00:39:40 +0000 (00:39 +0000)
author Peter Edberg <pedberg@unicode.org>
Wed, 20 Sep 2017 00:39:40 +0000 (00:39 +0000)
committer Peter Edberg <pedberg@unicode.org>
Wed, 20 Sep 2017 00:39:40 +0000 (00:39 +0000)
diff --git a/icu4c/source/common/hash.h b/icu4c/source/common/hash.h

index b411a4305bfe02b2c6407870708a9c0f614d8544..c6be46720156d50eb6d8b5a71cc8ff4d9fa89275 100644 (file)
--- a/icu4c/source/common/hash.h
+++ b/icu4c/source/common/hash.h
@@ -33,6 +33,8 @@ class U_COMMON_API Hashtable : public UMemory {
  
      inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
  
+    inline void initSize(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, int32_t size, UErrorCode& status);
+
  public:
      /**
       * Construct a hashtable
@@ -41,6 +43,14 @@ public:
      */
      Hashtable(UBool ignoreKeyCase, UErrorCode& status);
  
+    /**
+     * Construct a hashtable
+     * @param ignoreKeyCase If true, keys are case insensitive.
+     * @param size initial size allocation
+     * @param status Error code
+    */
+    Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status);
+
      /**
       * Construct a hashtable
       * @param keyComp Comparator for comparing the keys
@@ -76,9 +86,9 @@ public:
      int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
  
      void* get(const UnicodeString& key) const;
-    
+
      int32_t geti(const UnicodeString& key) const;
-    
+
      void* remove(const UnicodeString& key);
  
      int32_t removei(const UnicodeString& key);
@@ -92,9 +102,9 @@ public:
       * @see uhash_nextElement
       */
      const UHashElement* nextElement(int32_t& pos) const;
-    
+
      UKeyComparator* setKeyComparator(UKeyComparator*keyComp);
-    
+
      UValueComparator* setValueComparator(UValueComparator* valueComp);
  
      UBool equals(const Hashtable& that) const;
@@ -107,7 +117,7 @@ private:
   * Implementation
   ********************************************************************/
  
-inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp, 
+inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
                              UValueComparator *valueComp, UErrorCode& status) {
      if (U_FAILURE(status)) {
          return;
@@ -119,10 +129,23 @@ inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
      }
  }
  
-inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp, 
+inline void Hashtable::initSize(UHashFunction *keyHash, UKeyComparator *keyComp,
+                                UValueComparator *valueComp, int32_t size, UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    uhash_initSize(&hashObj, keyHash, keyComp, valueComp, size, &status);
+    if (U_SUCCESS(status)) {
+        hash = &hashObj;
+        uhash_setKeyDeleter(hash, uprv_deleteUObject);
+    }
+}
+
+inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
                   UErrorCode& status) : hash(0) {
      init( uhash_hashUnicodeString, keyComp, valueComp, status);
  }
+
  inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
   : hash(0)
  {
@@ -134,6 +157,17 @@ inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
              status);
  }
  
+inline Hashtable::Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status)
+ : hash(0)
+{
+    initSize(ignoreKeyCase ? uhash_hashCaselessUnicodeString
+                        : uhash_hashUnicodeString,
+            ignoreKeyCase ? uhash_compareCaselessUnicodeString
+                        : uhash_compareUnicodeString,
+            NULL, size,
+            status);
+}
+
  inline Hashtable::Hashtable(UErrorCode& status)
   : hash(0)
  {
@@ -200,7 +234,7 @@ inline void Hashtable::removeAll(void) {
  inline UKeyComparator* Hashtable::setKeyComparator(UKeyComparator*keyComp){
      return uhash_setKeyComparator(hash, keyComp);
  }
-    
+
  inline UValueComparator* Hashtable::setValueComparator(UValueComparator* valueComp){
      return uhash_setValueComparator(hash, valueComp);
  }
diff --git a/icu4c/source/common/uhash.cpp b/icu4c/source/common/uhash.cpp

index b7326f238cc1e18058335c20a53bf620d41ed5c0..a80e7b8ff27b42822c89132c2de98cc6debecc20 100644 (file)
--- a/icu4c/source/common/uhash.cpp
+++ b/icu4c/source/common/uhash.cpp
@@ -79,14 +79,14 @@
   * prime number while being less than a power of two.
   */
  static const int32_t PRIMES[] = {
-    13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
+    7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
      65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593,
      16777213, 33554393, 67108859, 134217689, 268435399, 536870909,
      1073741789, 2147483647 /*, 4294967291 */
  };
  
  #define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES)
-#define DEFAULT_PRIME_INDEX 3
+#define DEFAULT_PRIME_INDEX 4
  
  /* These ratios are tuned to the PRIMES array such that a resize
   * places the table back into the zone of non-resizing.  That is,
@@ -231,7 +231,7 @@ _uhash_allocate(UHashtable *hash,
  
      emptytok.pointer = NULL; /* Only one of these two is needed */
      emptytok.integer = 0;    /* but we don't know which one. */
-    
+
      limit = p + hash->length;
      while (p < limit) {
          p->key = emptytok;
@@ -247,7 +247,7 @@ _uhash_allocate(UHashtable *hash,
  
  static UHashtable*
  _uhash_init(UHashtable *result,
-              UHashFunction *keyHash, 
+              UHashFunction *keyHash,
                UKeyComparator *keyComp,
                UValueComparator *valueComp,
                int32_t primeIndex,
@@ -275,7 +275,7 @@ _uhash_init(UHashtable *result,
  }
  
  static UHashtable*
-_uhash_create(UHashFunction *keyHash, 
+_uhash_create(UHashFunction *keyHash,
                UKeyComparator *keyComp,
                UValueComparator *valueComp,
                int32_t primeIndex,
@@ -415,7 +415,7 @@ _uhash_rehash(UHashtable *hash, UErrorCode *status) {
  
      if (U_FAILURE(*status)) {
          hash->elements = old;
-        hash->length = oldLength;       
+        hash->length = oldLength;
          return;
      }
  
@@ -536,7 +536,7 @@ _uhash_put(UHashtable *hash,
   ********************************************************************/
  
  U_CAPI UHashtable* U_EXPORT2
-uhash_open(UHashFunction *keyHash, 
+uhash_open(UHashFunction *keyHash,
             UKeyComparator *keyComp,
             UValueComparator *valueComp,
             UErrorCode *status) {
@@ -545,7 +545,7 @@ uhash_open(UHashFunction *keyHash,
  }
  
  U_CAPI UHashtable* U_EXPORT2
-uhash_openSize(UHashFunction *keyHash, 
+uhash_openSize(UHashFunction *keyHash,
                 UKeyComparator *keyComp,
                 UValueComparator *valueComp,
                 int32_t size,
@@ -562,7 +562,7 @@ uhash_openSize(UHashFunction *keyHash,
  
  U_CAPI UHashtable* U_EXPORT2
  uhash_init(UHashtable *fillinResult,
-           UHashFunction *keyHash, 
+           UHashFunction *keyHash,
             UKeyComparator *keyComp,
             UValueComparator *valueComp,
             UErrorCode *status) {
@@ -570,6 +570,22 @@ uhash_init(UHashtable *fillinResult,
      return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
  }
  
+U_CAPI UHashtable* U_EXPORT2
+uhash_initSize(UHashtable *fillinResult,
+               UHashFunction *keyHash,
+               UKeyComparator *keyComp,
+               UValueComparator *valueComp,
+               int32_t size,
+               UErrorCode *status) {
+
+    // Find the smallest index i for which PRIMES[i] >= size.
+    int32_t i = 0;
+    while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
+        ++i;
+    }
+    return _uhash_init(fillinResult, keyHash, keyComp, valueComp, i, status);
+}
+
  U_CAPI void U_EXPORT2
  uhash_close(UHashtable *hash) {
      if (hash == NULL) {
@@ -604,7 +620,7 @@ uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) {
      hash->keyComparator = fn;
      return result;
  }
-U_CAPI UValueComparator *U_EXPORT2 
+U_CAPI UValueComparator *U_EXPORT2
  uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){
      UValueComparator *result = hash->valueComparator;
      hash->valueComparator = fn;
@@ -630,7 +646,7 @@ uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
      UErrorCode status = U_ZERO_ERROR;
      _uhash_internalSetResizePolicy(hash, policy);
      hash->lowWaterMark  = (int32_t)(hash->length * hash->lowWaterRatio);
-    hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);    
+    hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
      _uhash_rehash(hash, &status);
  }
  
@@ -853,7 +869,7 @@ uhash_hashIChars(const UHashTok key) {
      return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s));
  }
  
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
  uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
      int32_t count1, count2, pos, i;
  
@@ -886,14 +902,14 @@ uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
      if(count1!=count2){
          return FALSE;
      }
-    
+
      pos=UHASH_FIRST;
      for(i=0; i<count1; i++){
          const UHashElement* elem1 = uhash_nextElement(hash1, &pos);
          const UHashTok key1 = elem1->key;
          const UHashTok val1 = elem1->value;
          /* here the keys are not compared, instead the key form hash1 is used to fetch
-         * value from hash2. If the hashes are equal then then both hashes should 
+         * value from hash2. If the hashes are equal then then both hashes should
           * contain equal values for the same key!
           */
          const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1));
diff --git a/icu4c/source/common/uhash.h b/icu4c/source/common/uhash.h

index 6369f8e34afdf4352c43556d121d60a43c65d68c..b59d2711bb29d03c0ac7380f0a56cd83071c951e 100644 (file)
--- a/icu4c/source/common/uhash.h
+++ b/icu4c/source/common/uhash.h
@@ -154,7 +154,7 @@ struct UHashtable {
                                     * If NULL won't do anything */
  
      /* Size parameters */
-  
+
      int32_t     count;      /* The number of key-value pairs in this table.
                               * 0 <= count <= length.  In practice we
                               * never let count == length (see code). */
@@ -162,12 +162,12 @@ struct UHashtable {
                               * and values.  Must be prime. */
  
      /* Rehashing thresholds */
-    
+
      int32_t     highWaterMark;  /* If count > highWaterMark, rehash */
      int32_t     lowWaterMark;   /* If count < lowWaterMark, rehash */
      float       highWaterRatio; /* 0..1; high water as a fraction of length */
      float       lowWaterRatio;  /* 0..1; low water as a fraction of length */
-    
+
      int8_t      primeIndex;     /* Index into our prime table for length.
                                   * length == PRIMES[primeIndex] */
      UBool       allocated; /* Was this UHashtable allocated? */
@@ -190,7 +190,7 @@ U_CDECL_END
   * @return A pointer to a UHashtable, or 0 if an error occurred.
   * @see uhash_openSize
   */
-U_CAPI UHashtable* U_EXPORT2 
+U_CAPI UHashtable* U_EXPORT2
  uhash_open(UHashFunction *keyHash,
             UKeyComparator *keyComp,
             UValueComparator *valueComp,
@@ -207,7 +207,7 @@ uhash_open(UHashFunction *keyHash,
   * @return A pointer to a UHashtable, or 0 if an error occurred.
   * @see uhash_open
   */
-U_CAPI UHashtable* U_EXPORT2 
+U_CAPI UHashtable* U_EXPORT2
  uhash_openSize(UHashFunction *keyHash,
                 UKeyComparator *keyComp,
                 UValueComparator *valueComp,
@@ -224,18 +224,37 @@ uhash_openSize(UHashFunction *keyHash,
   * @return A pointer to a UHashtable, or 0 if an error occurred.
   * @see uhash_openSize
   */
-U_CAPI UHashtable* U_EXPORT2 
+U_CAPI UHashtable* U_EXPORT2
  uhash_init(UHashtable *hash,
             UHashFunction *keyHash,
             UKeyComparator *keyComp,
             UValueComparator *valueComp,
             UErrorCode *status);
  
+/**
+ * Initialize an existing UHashtable.
+ * @param keyHash A pointer to the key hashing function.  Must not be
+ * NULL.
+ * @param keyComp A pointer to the function that compares keys.  Must
+ * not be NULL.
+ * @param size The initial capacity of this hash table.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return A pointer to a UHashtable, or 0 if an error occurred.
+ * @see uhash_openSize
+ */
+U_CAPI UHashtable* U_EXPORT2
+uhash_initSize(UHashtable *hash,
+               UHashFunction *keyHash,
+               UKeyComparator *keyComp,
+               UValueComparator *valueComp,
+               int32_t size,
+               UErrorCode *status);
+
  /**
   * Close a UHashtable, releasing the memory used.
   * @param hash The UHashtable to close. If hash is NULL no operation is performed.
   */
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uhash_close(UHashtable *hash);
  
  
@@ -246,7 +265,7 @@ uhash_close(UHashtable *hash);
   * @param fn the function to be used hash keys; must not be NULL
   * @return the previous key hasher; non-NULL
   */
-U_CAPI UHashFunction *U_EXPORT2 
+U_CAPI UHashFunction *U_EXPORT2
  uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn);
  
  /**
@@ -256,7 +275,7 @@ uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn);
   * @param fn the function to be used compare keys; must not be NULL
   * @return the previous key comparator; non-NULL
   */
-U_CAPI UKeyComparator *U_EXPORT2 
+U_CAPI UKeyComparator *U_EXPORT2
  uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn);
  
  /**
@@ -266,7 +285,7 @@ uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn);
   * @param fn the function to be used compare keys; must not be NULL
   * @return the previous key comparator; non-NULL
   */
-U_CAPI UValueComparator *U_EXPORT2 
+U_CAPI UValueComparator *U_EXPORT2
  uhash_setValueComparator(UHashtable *hash, UValueComparator *fn);
  
  /**
@@ -279,7 +298,7 @@ uhash_setValueComparator(UHashtable *hash, UValueComparator *fn);
   * @param fn the function to be used delete keys, or NULL
   * @return the previous key deleter; may be NULL
   */
-U_CAPI UObjectDeleter *U_EXPORT2 
+U_CAPI UObjectDeleter *U_EXPORT2
  uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn);
  
  /**
@@ -292,7 +311,7 @@ uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn);
   * @param fn the function to be used delete values, or NULL
   * @return the previous value deleter; may be NULL
   */
-U_CAPI UObjectDeleter *U_EXPORT2 
+U_CAPI UObjectDeleter *U_EXPORT2
  uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn);
  
  /**
@@ -302,7 +321,7 @@ uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn);
   * @param hash The UHashtable to set
   * @param policy The way the hashtable resizes itself, {U_GROW, U_GROW_AND_SHRINK, U_FIXED}
   */
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy);
  
  /**
@@ -310,7 +329,7 @@ uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy);
   * @param hash The UHashtable to query.
   * @return The number of key-value pairs stored in hash.
   */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uhash_count(const UHashtable *hash);
  
  /**
@@ -326,7 +345,7 @@ uhash_count(const UHashtable *hash);
   * @return The previous value, or NULL if none.
   * @see uhash_get
   */
-U_CAPI void* U_EXPORT2 
+U_CAPI void* U_EXPORT2
  uhash_put(UHashtable *hash,
            void *key,
            void *value,
@@ -344,7 +363,7 @@ uhash_put(UHashtable *hash,
   * @return The previous value, or NULL if none.
   * @see uhash_get
   */
-U_CAPI void* U_EXPORT2 
+U_CAPI void* U_EXPORT2
  uhash_iput(UHashtable *hash,
             int32_t key,
             void* value,
@@ -362,7 +381,7 @@ uhash_iput(UHashtable *hash,
   * @return The previous value, or 0 if none.
   * @see uhash_get
   */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uhash_puti(UHashtable *hash,
             void* key,
             int32_t value,
@@ -380,7 +399,7 @@ uhash_puti(UHashtable *hash,
   * @return The previous value, or 0 if none.
   * @see uhash_get
   */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uhash_iputi(UHashtable *hash,
             int32_t key,
             int32_t value,
@@ -393,8 +412,8 @@ uhash_iputi(UHashtable *hash,
   * @param key A pointer key stored in a hashtable
   * @return The requested item, or NULL if not found.
   */
-U_CAPI void* U_EXPORT2 
-uhash_get(const UHashtable *hash, 
+U_CAPI void* U_EXPORT2
+uhash_get(const UHashtable *hash,
            const void *key);
  
  /**
@@ -404,7 +423,7 @@ uhash_get(const UHashtable *hash,
   * @param key An integer key stored in a hashtable
   * @return The requested item, or NULL if not found.
   */
-U_CAPI void* U_EXPORT2 
+U_CAPI void* U_EXPORT2
  uhash_iget(const UHashtable *hash,
             int32_t key);
  
@@ -415,7 +434,7 @@ uhash_iget(const UHashtable *hash,
   * @param key A pointer key stored in a hashtable
   * @return The requested item, or 0 if not found.
   */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uhash_geti(const UHashtable *hash,
             const void* key);
  /**
@@ -425,7 +444,7 @@ uhash_geti(const UHashtable *hash,
   * @param key An integer key stored in a hashtable
   * @return The requested item, or 0 if not found.
   */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uhash_igeti(const UHashtable *hash,
             int32_t key);
  
@@ -435,7 +454,7 @@ uhash_igeti(const UHashtable *hash,
   * @param key A key stored in a hashtable
   * @return The item removed, or NULL if not found.
   */
-U_CAPI void* U_EXPORT2 
+U_CAPI void* U_EXPORT2
  uhash_remove(UHashtable *hash,
               const void *key);
  
@@ -445,7 +464,7 @@ uhash_remove(UHashtable *hash,
   * @param key An integer key stored in a hashtable
   * @return The item removed, or NULL if not found.
   */
-U_CAPI void* U_EXPORT2 
+U_CAPI void* U_EXPORT2
  uhash_iremove(UHashtable *hash,
                int32_t key);
  
@@ -455,7 +474,7 @@ uhash_iremove(UHashtable *hash,
   * @param key An key stored in a hashtable
   * @return The item removed, or 0 if not found.
   */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uhash_removei(UHashtable *hash,
                const void* key);
  
@@ -465,7 +484,7 @@ uhash_removei(UHashtable *hash,
   * @param key An integer key stored in a hashtable
   * @return The item removed, or 0 if not found.
   */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uhash_iremovei(UHashtable *hash,
                 int32_t key);
  
@@ -473,7 +492,7 @@ uhash_iremovei(UHashtable *hash,
   * Remove all items from a UHashtable.
   * @param hash The target UHashtable.
   */
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uhash_removeAll(UHashtable *hash);
  
  /**
@@ -487,7 +506,7 @@ uhash_removeAll(UHashtable *hash);
   * @param key A key stored in a hashtable
   * @return a hash element, or NULL if the key is not found.
   */
-U_CAPI const UHashElement* U_EXPORT2 
+U_CAPI const UHashElement* U_EXPORT2
  uhash_find(const UHashtable *hash, const void* key);
  
  /**
@@ -510,7 +529,7 @@ uhash_find(const UHashtable *hash, const void* key);
   * @return a hash element, or NULL if no further key-value pairs
   * exist in the table.
   */
-U_CAPI const UHashElement* U_EXPORT2 
+U_CAPI const UHashElement* U_EXPORT2
  uhash_nextElement(const UHashtable *hash,
                    int32_t *pos);
  
@@ -525,7 +544,7 @@ uhash_nextElement(const UHashtable *hash,
   * modified.
   * @return the value that was removed.
   */
-U_CAPI void* U_EXPORT2 
+U_CAPI void* U_EXPORT2
  uhash_removeElement(UHashtable *hash, const UHashElement* e);
  
  /********************************************************************
@@ -537,7 +556,7 @@ uhash_removeElement(UHashtable *hash, const UHashElement* e);
   * @param i The given integer
   * @return a UHashTok for an integer.
   */
-/*U_CAPI UHashTok U_EXPORT2 
+/*U_CAPI UHashTok U_EXPORT2
  uhash_toki(int32_t i);*/
  
  /**
@@ -545,7 +564,7 @@ uhash_toki(int32_t i);*/
   * @param p The given pointer
   * @return a UHashTok for a pointer.
   */
-/*U_CAPI UHashTok U_EXPORT2 
+/*U_CAPI UHashTok U_EXPORT2
  uhash_tokp(void* p);*/
  
  /********************************************************************
@@ -559,7 +578,7 @@ uhash_tokp(void* p);*/
   * @param key The string (const UChar*) to hash.
   * @return A hash code for the key.
   */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uhash_hashUChars(const UHashTok key);
  
  /**
@@ -569,7 +588,7 @@ uhash_hashUChars(const UHashTok key);
   * @param key The string (const char*) to hash.
   * @return A hash code for the key.
   */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uhash_hashChars(const UHashTok key);
  
  /**
@@ -589,7 +608,7 @@ uhash_hashIChars(const UHashTok key);
   * @param key2 The string for comparison
   * @return true if key1 and key2 are equal, return false otherwise.
   */
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
  uhash_compareUChars(const UHashTok key1, const UHashTok key2);
  
  /**
@@ -599,7 +618,7 @@ uhash_compareUChars(const UHashTok key1, const UHashTok key2);
   * @param key2 The string for comparison
   * @return true if key1 and key2 are equal, return false otherwise.
   */
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
  uhash_compareChars(const UHashTok key1, const UHashTok key2);
  
  /**
@@ -609,7 +628,7 @@ uhash_compareChars(const UHashTok key1, const UHashTok key2);
   * @param key2 The string for comparison
   * @return true if key1 and key2 are equal, return false otherwise.
   */
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
  uhash_compareIChars(const UHashTok key1, const UHashTok key2);
  
  /********************************************************************
@@ -621,7 +640,7 @@ uhash_compareIChars(const UHashTok key1, const UHashTok key2);
   * @param key The string (const char*) to hash.
   * @return A hash code for the key.
   */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uhash_hashUnicodeString(const UElement key);
  
  /**
@@ -630,7 +649,7 @@ uhash_hashUnicodeString(const UElement key);
   * @param key The string (const char*) to hash.
   * @return A hash code for the key.
   */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uhash_hashCaselessUnicodeString(const UElement key);
  
  /********************************************************************
@@ -642,7 +661,7 @@ uhash_hashCaselessUnicodeString(const UElement key);
   * @param key The string (const char*) to hash.
   * @return A hash code for the key.
   */
-U_CAPI int32_t U_EXPORT2 
+U_CAPI int32_t U_EXPORT2
  uhash_hashLong(const UHashTok key);
  
  /**
@@ -651,7 +670,7 @@ uhash_hashLong(const UHashTok key);
   * @param Key2 The integer for comparison
   * @return true if key1 and key2 are equal, return false otherwise
   */
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
  uhash_compareLong(const UHashTok key1, const UHashTok key2);
  
  /********************************************************************
@@ -662,7 +681,7 @@ uhash_compareLong(const UHashTok key1, const UHashTok key2);
   * Deleter for Hashtable objects.
   * @param obj The object to be deleted
   */
-U_CAPI void U_EXPORT2 
+U_CAPI void U_EXPORT2
  uhash_deleteHashtable(void *obj);
  
  /* Use uprv_free() itself as a deleter for any key or value allocated using uprv_malloc. */
@@ -673,7 +692,7 @@ uhash_deleteHashtable(void *obj);
   * @param hash2
   * @return true if the hashtables are equal and false if not.
   */
-U_CAPI UBool U_EXPORT2 
+U_CAPI UBool U_EXPORT2
  uhash_equals(const UHashtable* hash1, const UHashtable* hash2);
  
  
diff --git a/icu4c/source/i18n/anytrans.cpp b/icu4c/source/i18n/anytrans.cpp

index e7d5375d693b141570be4cbf342966a9d9c11965..d06469e2ae274633bede136323d502704373f0b2 100644 (file)
--- a/icu4c/source/i18n/anytrans.cpp
+++ b/icu4c/source/i18n/anytrans.cpp
@@ -31,9 +31,13 @@
  
  static const UChar TARGET_SEP = 45; // '-'
  static const UChar VARIANT_SEP = 47; // '/'
-static const UChar ANY[] = {65,110,121,0}; // "Any"
+static const UChar ANY[] = {0x41,0x6E,0x79,0}; // "Any"
  static const UChar NULL_ID[] = {78,117,108,108,0}; // "Null"
-static const UChar LATIN_PIVOT[] = {45,76,97,116,105,110,59,76,97,116,105,110,45,0}; // "-Latin;Latin-"
+static const UChar LATIN_PIVOT[] = {0x2D,0x4C,0x61,0x74,0x6E,0x3B,0x4C,0x61,0x74,0x6E,0x2D,0}; // "-Latn;Latn-"
+
+// initial size for an Any-XXXX transform's cache of script-XXXX transforms
+// (will grow as necessary, but we don't expect to have source text with more than 7 scripts)
+#define ANY_TRANS_CACHE_INIT_SIZE 7
  
  //------------------------------------------------------------
  
@@ -186,7 +190,7 @@ AnyTransliterator::AnyTransliterator(const UnicodeString& id,
      Transliterator(id, NULL),
      targetScript(theTargetScript)
  {
-    cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec);
+    cache = uhash_openSize(uhash_hashLong, uhash_compareLong, NULL, ANY_TRANS_CACHE_INIT_SIZE, &ec);
      if (U_FAILURE(ec)) {
          return;
      }
@@ -212,7 +216,7 @@ AnyTransliterator::AnyTransliterator(const AnyTransliterator& o) :
  {
      // Don't copy the cache contents
      UErrorCode ec = U_ZERO_ERROR;
-    cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec);
+    cache = uhash_openSize(uhash_hashLong, uhash_compareLong, NULL, ANY_TRANS_CACHE_INIT_SIZE, &ec);
      if (U_FAILURE(ec)) {
          return;
      }
@@ -286,7 +290,7 @@ Transliterator* AnyTransliterator::getTransliterator(UScriptCode source) const {
      }
      if (t == NULL) {
          UErrorCode ec = U_ZERO_ERROR;
-        UnicodeString sourceName(uscript_getName(source), -1, US_INV);
+        UnicodeString sourceName(uscript_getShortName(source), -1, US_INV);
          UnicodeString id(sourceName);
          id.append(TARGET_SEP).append(target);
  
diff --git a/icu4c/source/i18n/transreg.cpp b/icu4c/source/i18n/transreg.cpp

index 3dadf792b2421a90b5eaaf552085397f490c82c8..d57f13153432b9271d2d61a159f544d2fee0bd84 100644 (file)
--- a/icu4c/source/i18n/transreg.cpp
+++ b/icu4c/source/i18n/transreg.cpp
@@ -46,11 +46,29 @@ static const UChar LOCALE_SEP  = 95; // '_'
  //static const UChar VARIANT_SEP = 0x002F; // '/'
  
  // String constants
-static const UChar ANY[] = { 65, 110, 121, 0 }; // Any
+static const UChar ANY[] = { 0x41, 0x6E, 0x79, 0 }; // Any
+static const UChar LAT[] = { 0x4C, 0x61, 0x74, 0 }; // Lat
  
  // empty string
  #define NO_VARIANT UnicodeString()
  
+// initial estimate for specDAG size
+// ICU 60 Transliterator::countAvailableSources()
+#define SPECDAG_INIT_SIZE 149
+
+// initial estimate for number of variant names
+#define VARIANT_LIST_INIT_SIZE 11
+#define VARIANT_LIST_MAX_SIZE 31
+
+// initial estimate for availableIDs count (default estimate is 8 => multiple reallocs)
+// ICU 60 Transliterator::countAvailableIDs()
+#define AVAILABLE_IDS_INIT_SIZE 641
+
+// initial estimate for number of targets for source "Any", "Lat"
+// ICU 60 Transliterator::countAvailableTargets("Any")/("Latn")
+#define ANY_TARGETS_INIT_SIZE 125
+#define LAT_TARGETS_INIT_SIZE 23
+
  /**
   * Resource bundle key for the RuleBasedTransliterator rule.
   */
@@ -517,10 +535,17 @@ U_CDECL_END
  
  TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) :
      registry(TRUE, status),
-    specDAG(TRUE, status),
-    availableIDs(status)
+    specDAG(TRUE, SPECDAG_INIT_SIZE, status),
+    variantList(VARIANT_LIST_INIT_SIZE, status),
+    availableIDs(AVAILABLE_IDS_INIT_SIZE, status)
  {
      registry.setValueDeleter(deleteEntry);
+    variantList.setDeleter(uprv_deleteUObject);
+    variantList.setComparer(uhash_compareCaselessUnicodeString);
+    UnicodeString *emptyString = new UnicodeString();
+    if (emptyString != NULL) {
+        variantList.addElement(emptyString, status);
+    }
      availableIDs.setDeleter(uprv_deleteUObject);
      availableIDs.setComparer(uhash_compareCaselessUnicodeString);
      specDAG.setValueDeleter(uhash_deleteHashtable);
@@ -781,9 +806,15 @@ int32_t TransliteratorRegistry::countAvailableVariants(const UnicodeString& sour
      if (targets == 0) {
          return 0;
      }
-    UVector *variants = (UVector*) targets->get(target);
-    // variants may be 0 if the source/target are invalid
-    return (variants == 0) ? 0 : variants->size();
+    int32_t varMask = targets->geti(target);
+    int32_t varCount = 0;
+    while (varMask > 0) {
+        if (varMask & 1) {
+            varCount++;
+        }
+        varMask >>= 1;
+    }
+    return varCount;
  }
  
  UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index,
@@ -795,17 +826,25 @@ UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index,
          result.truncate(0); // invalid source
          return result;
      }
-    UVector *variants = (UVector*) targets->get(target);
-    if (variants == 0) {
-        result.truncate(0); // invalid target
-        return result;
-    }
-    UnicodeString *v = (UnicodeString*) variants->elementAt(index);
-    if (v == 0) {
-        result.truncate(0); // invalid index
-    } else {
-        result = *v;
+    int32_t varMask = targets->geti(target);
+    int32_t varCount = 0;
+    int32_t varListIndex = 0;
+    while (varMask > 0) {
+        if (varMask & 1) {
+            if (varCount == index) {
+                UnicodeString *v = (UnicodeString*) variantList.elementAt(varListIndex);
+                if (v != NULL) {
+                    result = *v;
+                    return result;
+                }
+                break;
+            }
+            varCount++;
+        }
+        varMask >>= 1;
+        varListIndex++;
      }
+    result.truncate(0); // invalid target or index
      return result;
  }
  
@@ -911,9 +950,9 @@ void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
              UnicodeString *newID = (UnicodeString *)ID.clone();
              // Check to make sure newID was created.
              if (newID != NULL) {
-                   // NUL-terminate the ID string
-                   newID->getTerminatedBuffer();
-                   availableIDs.addElement(newID, status);
+                // NUL-terminate the ID string
+                newID->getTerminatedBuffer();
+                availableIDs.addElement(newID, status);
              }
          }
      } else {
@@ -924,9 +963,7 @@ void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
  
  /**
   * Register a source-target/variant in the specDAG.  Variant may be
- * empty, but source and target must not be.  If variant is empty then
- * the special variant NO_VARIANT is stored in slot zero of the
- * UVector of variants.
+ * empty, but source and target must not be.
   */
  void TransliteratorRegistry::registerSTV(const UnicodeString& source,
                                           const UnicodeString& target,
@@ -936,39 +973,38 @@ void TransliteratorRegistry::registerSTV(const UnicodeString& source,
      UErrorCode status = U_ZERO_ERROR;
      Hashtable *targets = (Hashtable*) specDAG.get(source);
      if (targets == 0) {
-        targets = new Hashtable(TRUE, status);
-        if (U_FAILURE(status) || targets == 0) {
+        int32_t size = 3;
+        if (source.compare(ANY,3) == 0) {
+            size = ANY_TARGETS_INIT_SIZE;
+        } else if (source.compare(LAT,3) == 0) {
+            size = LAT_TARGETS_INIT_SIZE;
+        }
+        targets = new Hashtable(TRUE, size, status);
+        if (U_FAILURE(status) || targets == NULL) {
              return;
          }
-        targets->setValueDeleter(uprv_deleteUObject);
          specDAG.put(source, targets, status);
      }
-    UVector *variants = (UVector*) targets->get(target);
-    if (variants == 0) {
-        variants = new UVector(uprv_deleteUObject,
-                               uhash_compareCaselessUnicodeString, status);
-        if (variants == 0) {
+    int32_t variantListIndex = variantList.indexOf((void*) &variant, 0);
+    if (variantListIndex < 0) {
+        if (variantList.size() >= VARIANT_LIST_MAX_SIZE) {
+            // can't handle any more variants
              return;
          }
-        targets->put(target, variants, status);
-    }
-    // assert(NO_VARIANT == "");
-    // We add the variant string.  If it is the special "no variant"
-    // string, that is, the empty string, we add it at position zero.
-    if (!variants->contains((void*) &variant)) {
-       UnicodeString *tempus; // Used for null pointer check.
-        if (variant.length() > 0) {
-               tempus = new UnicodeString(variant);
-               if (tempus != NULL) {
-                       variants->addElement(tempus, status);
-               }
-        } else {
-               tempus = new UnicodeString();  // = NO_VARIANT
-               if (tempus != NULL) {
-                       variants->insertElementAt(tempus, 0, status);
-               }
+        UnicodeString *variantEntry = new UnicodeString(variant);
+        if (variantEntry != NULL) {
+            variantList.addElement(variantEntry, status);
+            if (U_SUCCESS(status)) {
+                variantListIndex = variantList.size() - 1;
+            }
+        }
+        if (variantListIndex < 0) {
+            return;
          }
      }
+    int32_t addMask = 1 << variantListIndex;
+    int32_t varMask = targets->geti(target);
+    targets->puti(target, varMask | addMask, status);
  }
  
  /**
@@ -979,17 +1015,24 @@ void TransliteratorRegistry::removeSTV(const UnicodeString& source,
                                         const UnicodeString& variant) {
      // assert(source.length() > 0);
      // assert(target.length() > 0);
-//    UErrorCode status = U_ZERO_ERROR;
+    UErrorCode status = U_ZERO_ERROR;
      Hashtable *targets = (Hashtable*) specDAG.get(source);
-    if (targets == 0) {
+    if (targets == NULL) {
          return; // should never happen for valid s-t/v
      }
-    UVector *variants = (UVector*) targets->get(target);
-    if (variants == 0) {
+    int32_t varMask = targets->geti(target);
+    if (varMask == 0) {
          return; // should never happen for valid s-t/v
      }
-    variants->removeElement((void*) &variant);
-    if (variants->size() == 0) {
+    int32_t variantListIndex = variantList.indexOf((void*) &variant, 0);
+    if (variantListIndex < 0) {
+        return; // should never happen for valid s-t/v
+    }
+    int32_t remMask = 1 << variantListIndex;
+    varMask &= (~remMask);
+    if (varMask != 0) {
+        targets->puti(target, varMask, status);
+    } else {
          targets->remove(target); // should delete variants
          if (targets->count() == 0) {
              specDAG.remove(source); // should delete targets
@@ -1281,8 +1324,8 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID
              UVector* rbts = new UVector(entry->u.dataVector->size(), status);
              // Check for null pointer
              if (rbts == NULL) {
-               status = U_MEMORY_ALLOCATION_ERROR;
-               return NULL;
+                status = U_MEMORY_ALLOCATION_ERROR;
+                return NULL;
              }
              int32_t passNumber = 1;
              for (int32_t i = 0; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) {
diff --git a/icu4c/source/i18n/transreg.h b/icu4c/source/i18n/transreg.h

index 6fc35c8247b341e263a74c697308399f8542a975..041244e1b02d774d0ec81381d5b829f133fba64f 100644 (file)
--- a/icu4c/source/i18n/transreg.h
+++ b/icu4c/source/i18n/transreg.h
@@ -440,13 +440,15 @@ class TransliteratorRegistry : public UMemory {
  
      /**
       * DAG of visible IDs by spec.  Hashtable: source => (Hashtable:
-     * target => (UVector: variant)) The UVector of variants is never
-     * empty.  For a source-target with no variant, the special
-     * variant NO_VARIANT (the empty string) is stored in slot zero of
-     * the UVector.
+     * target => variant bitmask)
       */
      Hashtable specDAG;
  
+    /**
+     * Vector of all variant names
+     */
+    UVector variantList;
+
      /**
       * Vector of public full IDs.
       */
diff --git a/icu4c/source/test/cintltst/utransts.c b/icu4c/source/test/cintltst/utransts.c

index 40bd2d6ebe0ed9820a28aa9c3c674bd7f1c64cba..66d6580d99357cc255e3196c69fd585d61df3de0 100644 (file)
--- a/icu4c/source/test/cintltst/utransts.c
+++ b/icu4c/source/test/cintltst/utransts.c
@@ -33,6 +33,7 @@ static void TestRegisterUnregister(void);
  static void TestExtractBetween(void);
  static void TestUnicodeIDs(void);
  static void TestGetRulesAndSourceSet(void);
+static void TestDataVariantsCompounds(void);
  
  static void _expectRules(const char*, const char*, const char*);
  static void _expect(const UTransliterator* trans, const char* cfrom, const char* cto);
@@ -51,6 +52,7 @@ addUTransTest(TestNode** root) {
      TEST(TestExtractBetween);
      TEST(TestUnicodeIDs);
      TEST(TestGetRulesAndSourceSet);
+    TEST(TestDataVariantsCompounds);
  }
  
  /*------------------------------------------------------------------
@@ -638,6 +640,65 @@ static void TestGetRulesAndSourceSet() {
      }
  }
  
+typedef struct {
+    const char * transID;
+    const char * sourceText;
+    const char * targetText;
+} TransIDSourceTarg;
+
+static const TransIDSourceTarg dataVarCompItems[] = {
+    { "Simplified-Traditional",
+       "\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u4ECE\\u7B80\\u4F53\\u8F6C\\u6362\\u4E3A\\u7E41\\u4F53\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002",
+       "\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u5F9E\\u7C21\\u9AD4\\u8F49\\u63DB\\u70BA\\u7E41\\u9AD4\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002" },
+    { "Halfwidth-Fullwidth",
+      "Sample text, \\uFF7B\\uFF9D\\uFF8C\\uFF9F\\uFF99\\uFF83\\uFF77\\uFF7D\\uFF84.",
+      "\\uFF33\\uFF41\\uFF4D\\uFF50\\uFF4C\\uFF45\\u3000\\uFF54\\uFF45\\uFF58\\uFF54\\uFF0C\\u3000\\u30B5\\u30F3\\u30D7\\u30EB\\u30C6\\u30AD\\u30B9\\u30C8\\uFF0E" },
+    { "Han-Latin/Names; Latin-Bopomofo",
+       "\\u4E07\\u4FDF\\u919C\\u5974\\u3001\\u533A\\u695A\\u826F\\u3001\\u4EFB\\u70E8\\u3001\\u5CB3\\u98DB",
+       "\\u3107\\u311B\\u02CB \\u3111\\u3127\\u02CA \\u3114\\u3121\\u02C7 \\u310B\\u3128\\u02CA\\u3001 \\u3121 \\u3114\\u3128\\u02C7 \\u310C\\u3127\\u3124\\u02CA\\u3001 \\u3116\\u3123\\u02CA \\u3127\\u311D\\u02CB\\u3001 \\u3129\\u311D\\u02CB \\u3108\\u311F" },
+    { "Greek-Latin",
+      "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
+      "A \\u0100I H\\u0100I RH" },
+    { "Greek-Latin/BGN",
+      "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
+      "A\\u0313 A\\u0345 A\\u0314\\u0345 \\u1FEC" },
+    { "Greek-Latin/UNGEGN",
+      "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
+      "A A A R" },
+    { NULL, NULL, NULL }
+};
+
+enum { kBBufMax = 384 };
+static void TestDataVariantsCompounds() {
+    const TransIDSourceTarg* itemsPtr;
+    for (itemsPtr = dataVarCompItems; itemsPtr->transID != NULL; itemsPtr++) {
+        UErrorCode status = U_ZERO_ERROR;
+        UChar utrid[kUBufMax];
+        int32_t utridlen = u_unescape(itemsPtr->transID, utrid, kUBufMax);
+        UTransliterator* utrans = utrans_openU(utrid, utridlen, UTRANS_FORWARD, NULL, 0, NULL, &status);
+        if (U_FAILURE(status)) {
+            log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n", itemsPtr->transID, u_errorName(status));
+            continue;
+        }
+        UChar text[kUBufMax];
+        int32_t textLen =  u_unescape(itemsPtr->sourceText, text, kUBufMax);
+        int32_t textLim = textLen;
+        utrans_transUChars(utrans, text, &textLen, kUBufMax, 0, &textLim, &status);
+        if (U_FAILURE(status)) {
+            log_err("FAIL: utrans_transUChars(%s) failed, error=%s\n", itemsPtr->transID, u_errorName(status));
+        } else {
+            UChar expect[kUBufMax];
+            int32_t expectLen =  u_unescape(itemsPtr->targetText, expect, kUBufMax);
+            if (textLen != expectLen || u_strncmp(text, expect, textLen) != 0) {
+                char btext[kBBufMax], bexpect[kBBufMax];
+                u_austrncpy(btext, text, textLen);
+                u_austrncpy(bexpect, expect, expectLen);
+                log_err("FAIL: utrans_transUChars(%s),\n       expect %s\n       get    %s\n", itemsPtr->transID, bexpect, btext);
+            }
+        }
+        utrans_close(utrans);
+    }
+}
  
  static void _expectRules(const char* crules,
                    const char* cfrom,
author	Peter Edberg <pedberg@unicode.org>
	Wed, 20 Sep 2017 00:39:40 +0000 (00:39 +0000)
committer	Peter Edberg <pedberg@unicode.org>
	Wed, 20 Sep 2017 00:39:40 +0000 (00:39 +0000)
icu4c/source/common/hash.h		patch \| blob \| history
icu4c/source/common/uhash.cpp		patch \| blob \| history
icu4c/source/common/uhash.h		patch \| blob \| history
icu4c/source/i18n/anytrans.cpp		patch \| blob \| history
icu4c/source/i18n/transreg.cpp		patch \| blob \| history
icu4c/source/i18n/transreg.h		patch \| blob \| history
icu4c/source/test/cintltst/utransts.c		patch \| blob \| history