]> granicus.if.org Git - icu/commitdiff
ICU-13569 rbbi char class merging now working, tests passing.
authorAndy Heninger <andy.heninger@gmail.com>
Fri, 9 Feb 2018 18:53:28 +0000 (18:53 +0000)
committerAndy Heninger <andy.heninger@gmail.com>
Fri, 9 Feb 2018 18:53:28 +0000 (18:53 +0000)
X-SVN-Rev: 40881

icu4c/source/common/rbbirb.cpp
icu4c/source/common/rbbisetb.cpp
icu4c/source/common/rbbisetb.h
icu4c/source/test/intltest/rbbitst.cpp

index 3becda31278a06fef462fe0bbd6f4b52d9af69b3..a0f92da619f15271170763ef31be78070128ff5e 100644 (file)
@@ -359,10 +359,7 @@ void RBBIRuleBuilder::optimizeTables() {
 
     leftClass = 3;
     rightClass = 4;
-    printf("Optimizing tables ...\n");
     while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) {
-        printf("Merging duplicate columns (%d, %d)\n", leftClass, rightClass);
-        
         fSetBuilder->mergeCategories(leftClass, rightClass);
         fForwardTables->removeColumn(rightClass);
         fReverseTables->removeColumn(rightClass);
index 67bb460acaa5e968f0b8c2bafed1e9622c36762b..108d127d45f5996e07f6eb8150f3f2baa44248b0 100644 (file)
@@ -274,9 +274,11 @@ void RBBISetBuilder::mergeCategories(int32_t left, int32_t right) {
     U_ASSERT(left >= 1);
     U_ASSERT(right > left);
     for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) {
-        if (rd->fNum == right) {
-            rd->fNum = left;
-        } else if (rd->fNum > right) {
+        int32_t rangeNum = rd->fNum & ~DICT_BIT;
+        int32_t rangeDict = rd->fNum & DICT_BIT;
+        if (rangeNum == right) {
+            rd->fNum = left | rangeDict;
+        } else if (rangeNum > right) {
             rd->fNum--;
         }
     }
@@ -465,7 +467,7 @@ void RBBISetBuilder::printRangeGroups() {
             lastPrintedGroupNum = groupNum;
             RBBIDebugPrintf("%2i  ", groupNum);
 
-            if (rlRange->fNum & 0x4000) { RBBIDebugPrintf(" <DICT> ");}
+            if (rlRange->fNum & DICT_BIT) { RBBIDebugPrintf(" <DICT> ");}
 
             for (i=0; i<rlRange->fIncludesSets->size(); i++) {
                 RBBINode       *usetNode    = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
@@ -658,20 +660,20 @@ void RangeDescriptor::split(UChar32 where, UErrorCode &status) {
 void RangeDescriptor::setDictionaryFlag() {
     int i;
 
-    for (i=0; i<this->fIncludesSets->size(); i++) {
-        RBBINode       *usetNode    = (RBBINode *)fIncludesSets->elementAt(i);
-        UnicodeString   setName;
-        RBBINode       *setRef = usetNode->fParent;
-        if (setRef != NULL) {
+    static const char16_t *dictionary = u"dictionary";
+    for (i=0; i<fIncludesSets->size(); i++) {
+        RBBINode *usetNode  = (RBBINode *)fIncludesSets->elementAt(i);
+        RBBINode *setRef = usetNode->fParent;
+        if (setRef != nullptr) {
             RBBINode *varRef = setRef->fParent;
-            if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
-                setName = varRef->fText;
+            if (varRef && varRef->fType == RBBINode::varRef) {
+                const UnicodeString *setName = &varRef->fText;
+                if (setName->compare(dictionary, -1) == 0) {
+                    fNum |= RBBISetBuilder::DICT_BIT;
+                    break;
+                }
             }
         }
-        if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) {   // TODO:  no string literals.
-            this->fNum |= 0x4000;
-            break;
-        }
     }
 }
 
index 3f0ec1a8a0c462de4d922cf357654934b62f6ce5..a7a91b3b375b75aa625b33c5a58c26c19801d2c5 100644 (file)
@@ -99,6 +99,8 @@ public:
      */
     void     mergeCategories(int32_t left, int32_t right);
 
+    static constexpr int32_t DICT_BIT = 0x4000;
+
 #ifdef RBBI_DEBUG
     void     printSets();
     void     printRanges();
index c6e0f457fbe10f8d36ce171af0885a336a934c61..b28723f4564c015c85bc6e313b466ace4bf797d3 100644 (file)
@@ -4471,7 +4471,10 @@ void RBBITest::TestTableRedundancies() {
                 "($s1 | $s2 | $s3)*; \n" };
     RuleBasedBreakIterator *lbi = 
         (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
+    //lbi->dumpTables();
     rules = lbi->getRules();
+    delete lbi;
+
     UParseError pe {};
     RuleBasedBreakIterator *bi =
     //         (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);