ICU-21699 Fix CjkBreakEngine performance issue

author allenwtsu <allenwtsu@google.com>

Mon, 10 Jan 2022 14:07:13 +0000 (22:07 +0800)

committer Frank Yung-Fong Tang <ftang@google.com>

Wed, 12 Jan 2022 00:46:32 +0000 (16:46 -0800)
author allenwtsu <allenwtsu@google.com>
Mon, 10 Jan 2022 14:07:13 +0000 (22:07 +0800)
committer Frank Yung-Fong Tang <ftang@google.com>
Wed, 12 Jan 2022 00:46:32 +0000 (16:46 -0800)
diff --git a/icu4c/source/common/dictbe.cpp b/icu4c/source/common/dictbe.cpp

index 6b6d4297ad4e88011f6441c8e3e9994a15b87023..35d3cd48a7a6f6bfbb0e220f03cfb75883d3d79f 100644 (file)
--- a/icu4c/source/common/dictbe.cpp
+++ b/icu4c/source/common/dictbe.cpp
@@ -1370,7 +1370,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
          if (utextPos > prevUTextPos) {
              // Boundaries are added to foundBreaks output in ascending order.
              U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos);
-            if (!(foundBreaks.contains(utextPos) || utextPos == rangeStart)) {
+            if (utextPos != rangeStart) {
                  foundBreaks.push(utextPos, status);
                  correctedNumBreaks++;
              }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/breakiter/CjkBreakEngine.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/breakiter/CjkBreakEngine.java

index a14c745e509946330ebe55a108980bab2126dc75..0404e031cc2367b7eea27c7d730d4269bea2ab30 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/breakiter/CjkBreakEngine.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/breakiter/CjkBreakEngine.java
@@ -209,12 +209,14 @@ public class CjkBreakEngine extends DictionaryBreakEngine {
          }
  
          int correctedNumBreaks = 0;
+        int previous = -1;
          for (int i = numBreaks - 1; i >= 0; i--) {
              int pos = charPositions[t_boundary[i]] + startPos;
-            if (!(foundBreaks.contains(pos) || pos == startPos)) {
-                foundBreaks.push(charPositions[t_boundary[i]] + startPos);
+            if (pos > previous && pos != startPos) {
+                foundBreaks.push(pos);
                  correctedNumBreaks++;
              }
+            previous = pos;
          }
  
          if (!foundBreaks.isEmpty() && foundBreaks.peek() == endPos) {
author	allenwtsu <allenwtsu@google.com>
	Mon, 10 Jan 2022 14:07:13 +0000 (22:07 +0800)
committer	Frank Yung-Fong Tang <ftang@google.com>
	Wed, 12 Jan 2022 00:46:32 +0000 (16:46 -0800)
icu4c/source/common/dictbe.cpp		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/breakiter/CjkBreakEngine.java		patch \| blob \| history