From fd77c49a2b6a74455fad8390b51d701234c7aed6 Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Wed, 14 Feb 2018 23:44:50 +0000 Subject: [PATCH] ICU-13569 RBBI state table minimization, Java now works. X-SVN-Rev: 40916 --- .../src/com/ibm/icu/text/RBBIRuleBuilder.java | 26 +- .../src/com/ibm/icu/text/RBBISetBuilder.java | 5 + .../com/ibm/icu/text/RBBITableBuilder.java | 254 ++++++++++-------- .../com/ibm/icu/dev/test/rbbi/RBBITest.java | 16 +- 4 files changed, 166 insertions(+), 135 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java index 87ea903bd25..76921c564c3 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java @@ -372,25 +372,29 @@ class RBBIRuleBuilder { builder.flattenData(os); } - static class ClassPair { - int left = 3; - int right = 0; + static class IntPair { + int first = 0; + int second = 0; + IntPair() {}; + IntPair(int f, int s) { + first = f; + second = s; + } } void optimizeTables() { - ClassPair duplPair = new ClassPair(); - + IntPair duplPair = new IntPair(3, 0); while (fForwardTables.findDuplCharClassFrom(duplPair)) { - fSetBuilder.mergeCategories(duplPair); - fForwardTables.removeColumn(duplPair.right); - fReverseTables.removeColumn(duplPair.right); - fSafeFwdTables.removeColumn(duplPair.right); - fSafeRevTables.removeColumn(duplPair.right); + fSetBuilder.mergeCategories(duplPair.first, duplPair.second); + fForwardTables.removeColumn(duplPair.second); + fReverseTables.removeColumn(duplPair.second); + fSafeFwdTables.removeColumn(duplPair.second); + fSafeRevTables.removeColumn(duplPair.second); } fForwardTables.removeDuplicateStates(); fReverseTables.removeDuplicateStates(); fSafeFwdTables.removeDuplicateStates(); fSafeRevTables.removeDuplicateStates(); - + } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBISetBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBISetBuilder.java index 9f5a8a50a2c..ada22580101 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBISetBuilder.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBISetBuilder.java @@ -305,6 +305,10 @@ class RBBISetBuilder { } } + /** + * Merge two character categories that have been identified as having equivalent behavior. + * The ranges belonging to the right category (table column) will be added to the left. + */ void mergeCategories(int left, int right) { assert(left >= 1); assert(right > left); @@ -319,6 +323,7 @@ class RBBISetBuilder { } --fGroupCount; } + //----------------------------------------------------------------------------------- // // getTrieSize() Return the size that will be required to serialize the Trie. diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java index 4d71e76f812..9130ad81b58 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java @@ -10,6 +10,7 @@ package com.ibm.icu.text; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.List; @@ -20,6 +21,7 @@ import java.util.TreeSet; import com.ibm.icu.impl.Assert; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UProperty; +import com.ibm.icu.text.RBBIRuleBuilder.IntPair; // // class RBBITableBuilder is part of the RBBI rule compiler. @@ -832,128 +834,148 @@ class RBBITableBuilder { -// -// findDuplCharClassFrom() -// -boolean findDuplCharClassFrom(RBBIRuleBuilder.ClassPair classPair) { - int numStates = fDStates.size(); - int numCols = fRB.fSetBuilder.getNumCharCategories(); - - uint16_t table_base; - uint16_t table_dupl; - for (; baseCategory < numCols-1; ++baseCategory) { - for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) { - for (int state=0; state duplState) { - newVal = existingVal - 1; - } - sd.fDtran.setElementAt(newVal, col); - } - if (sd.fAccepting == duplState) { - sd.fAccepting = keepState; - } else if (sd.fAccepting > duplState) { - sd.fAccepting--; - } - if (sd.fLookAhead == duplState) { - sd.fLookAhead = keepState; - } else if (sd.fLookAhead > duplState) { - sd.fLookAhead--; - } - } -} + /** + * Remove a duplicate state (row) from the state table. All references to the deleted state are + * redirected to "keepState", the first encountered of the duplicated pair of states. + * @param keepState The first of the duplicate pair of states, the one to be kept. + * @param duplState The second of the duplicate pair, the one to be removed. + * @internal + */ + void removeState(int keepState, int duplState) { + assert(keepState < duplState); + assert(duplState < fDStates.size()); + fDStates.remove(duplState); -/* - * RemoveDuplicateStates - */ -void removeDuplicateStates() { - int firstState = 3; - int duplicateState = 0; - while (findDuplicateState(firstState, duplicateState)) { - // printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState); - removeState(firstState, duplicateState); - } -} + int numStates = fDStates.size(); + int numCols = fRB.fSetBuilder.getNumCharCategories(); + for (int state=0; state duplState) { + newVal = existingVal - 1; + } + sd.fDtran[col] = newVal; + } + if (sd.fAccepting == duplState) { + sd.fAccepting = keepState; + } else if (sd.fAccepting > duplState) { + sd.fAccepting--; + } + if (sd.fLookAhead == duplState) { + sd.fLookAhead = keepState; + } else if (sd.fLookAhead > duplState) { + sd.fLookAhead--; + } + } + } + + + /** + * Check for, and remove duplicate states (table rows). + * @internal + */ + void removeDuplicateStates() { + IntPair dupls = new IntPair(3, 0); + while (findDuplicateState(dupls)) { + // System.out.printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second); + removeState(dupls.first, dupls.second); + } + } //----------------------------------------------------------------------------- diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITest.java index e0aff62172a..e1dd1e3b285 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITest.java @@ -590,10 +590,10 @@ public class RBBITest extends TestFmwk { // Ignore column (char class) 0 while checking; it's special, and may have duplicates. for (int c1=1; c1