ICU-11610 rename usearch hash() to hashFromCE32(), improve implementation

author Markus Scherer <markus.icu@gmail.com>

Fri, 29 May 2015 21:51:00 +0000 (21:51 +0000)

committer Markus Scherer <markus.icu@gmail.com>

Fri, 29 May 2015 21:51:00 +0000 (21:51 +0000)
author Markus Scherer <markus.icu@gmail.com>
Fri, 29 May 2015 21:51:00 +0000 (21:51 +0000)
committer Markus Scherer <markus.icu@gmail.com>
Fri, 29 May 2015 21:51:00 +0000 (21:51 +0000)
diff --git a/icu4c/source/i18n/usearch.cpp b/icu4c/source/i18n/usearch.cpp

index 96414dbf4d90ab38be5ecad097101c45c79e913f..a2d83ed2f8fc1ca9b3b5ffd2e2b3e2a21b4e5bf3 100644 (file)
--- a/icu4c/source/i18n/usearch.cpp
+++ b/icu4c/source/i18n/usearch.cpp
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-*   Copyright (C) 2001-2014 IBM and others. All rights reserved.
+*   Copyright (C) 2001-2015 IBM and others. All rights reserved.
  **********************************************************************
  *   Date        Name        Description
  *  07/02/2001   synwee      Creation.
@@ -75,18 +75,22 @@ inline uint32_t getMask(UCollationStrength strength)
  }
  
  /**
-* This is to squeeze the 21bit ces into a 256 table
-* @param ce collation element
-* @return collapsed version of the collation element
+* @param ce 32-bit collation element
+* @return hash code
  */
  static
-inline int hash(uint32_t ce)
+inline int hashFromCE32(uint32_t ce)
  {
-    // the old value UCOL_PRIMARYORDER(ce) % MAX_TABLE_SIZE_ does not work
-    // well with the new collation where most of the latin 1 characters
-    // are of the value xx000xxx. their hashes will most of the time be 0
-    // to be discussed on the hash algo.
-    return UCOL_PRIMARYORDER(ce) % MAX_TABLE_SIZE_;
+    int hc = (int)(
+            ((((((ce >> 24) * 37) +
+            (ce >> 16)) * 37) +
+            (ce >> 8)) * 37) +
+            ce);
+    hc %= MAX_TABLE_SIZE_;
+    if (hc < 0) {
+        hc += MAX_TABLE_SIZE_;
+    }
+    return hc;
  }
  
  U_CDECL_BEGIN
@@ -492,22 +496,22 @@ inline void setShiftTable(int16_t   shift[], int16_t backshift[],
      for (count = 0; count < cesize; count ++) {
          // number of ces from right of array to the count
          int temp = defaultforward - count - 1;
-        shift[hash(cetable[count])] = temp > 1 ? temp : 1;
+        shift[hashFromCE32(cetable[count])] = temp > 1 ? temp : 1;
      }
-    shift[hash(cetable[cesize])] = 1;
+    shift[hashFromCE32(cetable[cesize])] = 1;
      // for ignorables we just shift by one. see test examples.
-    shift[hash(0)] = 1;
+    shift[hashFromCE32(0)] = 1;
  
      for (count = 0; count < MAX_TABLE_SIZE_; count ++) {
          backshift[count] = defaultbackward;
      }
      for (count = cesize; count > 0; count --) {
          // the original value count does not seem to work
-        backshift[hash(cetable[count])] = count > expansionsize ?
+        backshift[hashFromCE32(cetable[count])] = count > expansionsize ?
                                            (int16_t)(count - expansionsize) : 1;
      }
-    backshift[hash(cetable[0])] = 1;
-    backshift[hash(0)] = 1;
+    backshift[hashFromCE32(cetable[0])] = 1;
+    backshift[hashFromCE32(0)] = 1;
  }
  
  /**
@@ -730,7 +734,7 @@ inline int32_t shiftForward(UStringSearch *strsrch,
  {
      UPattern *pattern = &(strsrch->pattern);
      if (ce != UCOL_NULLORDER) {
-        int32_t shift = pattern->shift[hash(ce)];
+        int32_t shift = pattern->shift[hashFromCE32(ce)];
          // this is to adjust for characters in the middle of the
          // substring for matching that failed.
          int32_t adjust = pattern->cesLength - patternceindex;
@@ -1971,7 +1975,7 @@ inline int32_t reverseShift(UStringSearch *strsrch,
      }
      else {
          if (ce != UCOL_NULLORDER) {
-            int32_t shift = strsrch->pattern.backShift[hash(ce)];
+            int32_t shift = strsrch->pattern.backShift[hashFromCE32(ce)];
  
              // this is to adjust for characters in the middle of the substring
              // for matching that failed.
author	Markus Scherer <markus.icu@gmail.com>
	Fri, 29 May 2015 21:51:00 +0000 (21:51 +0000)
committer	Markus Scherer <markus.icu@gmail.com>
	Fri, 29 May 2015 21:51:00 +0000 (21:51 +0000)