int index = findOrInsertNodeForCEs(strength);
long node = nodes.elementAti(index);
- // If the index is for a "weaker" tailored node,
+ // If the index is for a "weaker" node,
// then skip backwards over this and further "weaker" nodes.
while(strengthFromNode(node) > strength) {
index = previousIndexFromNode(node);
if(strength >= Collator.TERTIARY) {
index = findCommonNode(index, Collator.TERTIARY);
}
+ // findCommonNode() stayed on the stronger node or moved to
+ // an explicit common-weight node of the reset-before strength.
node = nodes.elementAti(index);
if(strengthFromNode(node) == strength) {
// Found a same-strength node with an explicit weight.
"reset secondary-before secondary ignorable not possible" :
"reset tertiary-before completely ignorable not possible");
}
- assert(weight16 >= Collation.COMMON_WEIGHT16);
+ assert(weight16 > Collation.BEFORE_WEIGHT16);
+ // Reset to just before this node.
+ // Insert the preceding same-level explicit weight if it is not there already.
+ // Which explicit weight immediately precedes this one?
+ weight16 = getWeight16Before(index, node, strength);
+ // Does this preceding weight have a node?
+ int previousWeight16;
int previousIndex = previousIndexFromNode(node);
- if(weight16 == Collation.COMMON_WEIGHT16) {
- // Reset to just before this same-strength common-weight node.
+ for(int i = previousIndex;; i = previousIndexFromNode(node)) {
+ node = nodes.elementAti(i);
+ int previousStrength = strengthFromNode(node);
+ if(previousStrength < strength) {
+ assert(weight16 >= Collation.COMMON_WEIGHT16 || i == previousIndex);
+ // Either the reset element has an above-common weight and
+ // the parent node provides the implied common weight,
+ // or the reset element has a weight<=common in the node
+ // right after the parent, and we need to insert the preceding weight.
+ previousWeight16 = Collation.COMMON_WEIGHT16;
+ break;
+ } else if(previousStrength == strength && !isTailoredNode(node)) {
+ previousWeight16 = weight16FromNode(node);
+ break;
+ }
+ // Skip weaker nodes and same-level tailored nodes.
+ }
+ if(previousWeight16 == weight16) {
+ // The preceding weight has a node,
+ // maybe with following weaker or tailored nodes.
+ // Reset to the last of them.
index = previousIndex;
} else {
- // A non-common weight is only possible from a root CE.
- // Find the higher-level weights, which must all be explicit,
- // and then find the preceding weight for this level.
- long previousWeight16 = 0;
- int previousWeightIndex = -1;
- int i = index;
- if(strength == Collator.SECONDARY) {
- long p;
- do {
- i = previousIndexFromNode(node);
- node = nodes.elementAti(i);
- if(strengthFromNode(node) == Collator.SECONDARY && !isTailoredNode(node) &&
- previousWeightIndex < 0) {
- previousWeightIndex = i;
- previousWeight16 = weight16FromNode(node);
- }
- } while(strengthFromNode(node) > Collator.PRIMARY);
- assert(!isTailoredNode(node));
- p = weight32FromNode(node);
- weight16 = rootElements.getSecondaryBefore(p, weight16);
- } else {
- long p;
- int s;
- do {
- i = previousIndexFromNode(node);
- node = nodes.elementAti(i);
- if(strengthFromNode(node) == Collator.TERTIARY && !isTailoredNode(node) &&
- previousWeightIndex < 0) {
- previousWeightIndex = i;
- previousWeight16 = weight16FromNode(node);
- }
- } while(strengthFromNode(node) > Collator.SECONDARY);
- assert(!isTailoredNode(node));
- if(strengthFromNode(node) == Collator.SECONDARY) {
- s = weight16FromNode(node);
- do {
- i = previousIndexFromNode(node);
- node = nodes.elementAti(i);
- } while(strengthFromNode(node) > Collator.PRIMARY);
- assert(!isTailoredNode(node));
- } else {
- assert(!nodeHasBefore2(node));
- s = Collation.COMMON_WEIGHT16;
- }
- p = weight32FromNode(node);
- weight16 = rootElements.getTertiaryBefore(p, s, weight16);
- assert((weight16 & ~Collation.ONLY_TERTIARY_MASK) == 0);
- }
- // Find or insert the new explicit weight before the current one.
- if(previousWeightIndex >= 0 && weight16 == previousWeight16) {
- // Tailor after the last node between adjacent root nodes.
- index = previousIndex;
- } else {
- node = nodeFromWeight16(weight16) | nodeFromStrength(strength);
- index = insertNodeBetween(previousIndex, index, node);
- }
+ // Insert a node with the preceding weight, reset to that.
+ node = nodeFromWeight16(weight16) | nodeFromStrength(strength);
+ index = insertNodeBetween(previousIndex, index, node);
}
} else {
// Found a stronger node with implied strength-common weight.
- long hasBefore3 = 0;
- if(strength == Collator.SECONDARY) {
- assert(!nodeHasBefore2(node));
- // Move the HAS_BEFORE3 flag from the parent node
- // to the new secondary common node.
- hasBefore3 = node & HAS_BEFORE3;
- node = (node & ~(long)HAS_BEFORE3) | HAS_BEFORE2;
- } else {
- assert(!nodeHasBefore3(node));
- node |= HAS_BEFORE3;
- }
- nodes.setElementAt(node, index);
- int nextIndex = nextIndexFromNode(node);
- // Insert default nodes with weights 01 and 05, reset to the 01 node.
- node = nodeFromWeight16(Collation.BEFORE_WEIGHT16) | nodeFromStrength(strength);
- index = insertNodeBetween(index, nextIndex, node);
- node = nodeFromWeight16(Collation.COMMON_WEIGHT16) | hasBefore3 |
- nodeFromStrength(strength);
- insertNodeBetween(index, nextIndex, node);
+ int weight16 = getWeight16Before(index, node, strength);
+ index = findOrInsertWeakNode(index, weight16, strength);
}
// Strength of the temporary CE = strength of its reset position.
// Code above raises an error if the before-strength is stronger.
ces[cesLength - 1] = tempCEFromIndexAndStrength(index, strength);
}
+ /**
+ * Returns the secondary or tertiary weight preceding the current node's weight.
+ * node=nodes[index].
+ */
+ private int getWeight16Before(int index, long node, int level) {
+ assert(strengthFromNode(node) < level || !isTailoredNode(node));
+ // Collect the root CE weights if this node is for a root CE.
+ // If it is not, then return the low non-primary boundary for a tailored CE.
+ int t;
+ if(strengthFromNode(node) == Collator.TERTIARY) {
+ t = weight16FromNode(node);
+ } else {
+ t = Collation.COMMON_WEIGHT16; // Stronger node with implied common weight.
+ }
+ while(strengthFromNode(node) > Collator.SECONDARY) {
+ index = previousIndexFromNode(node);
+ node = nodes.elementAti(index);
+ }
+ if(isTailoredNode(node)) {
+ return Collation.BEFORE_WEIGHT16;
+ }
+ int s;
+ if(strengthFromNode(node) == Collator.SECONDARY) {
+ s = weight16FromNode(node);
+ } else {
+ s = Collation.COMMON_WEIGHT16; // Stronger node with implied common weight.
+ }
+ while(strengthFromNode(node) > Collator.PRIMARY) {
+ index = previousIndexFromNode(node);
+ node = nodes.elementAti(index);
+ }
+ if(isTailoredNode(node)) {
+ return Collation.BEFORE_WEIGHT16;
+ }
+ // [p, s, t] is a root CE. Return the preceding weight for the requested level.
+ long p = weight32FromNode(node);
+ int weight16;
+ if(level == Collator.SECONDARY) {
+ weight16 = rootElements.getSecondaryBefore(p, s);
+ } else {
+ weight16 = rootElements.getTertiaryBefore(p, s, t);
+ assert((weight16 & ~Collation.ONLY_TERTIARY_MASK) == 0);
+ }
+ return weight16;
+ }
+
private long getSpecialResetPosition(CharSequence str) {
assert(str.length() == 2);
long ce;
// down to the requested level/strength.
// Root CEs must have common=zero quaternary weights (for which we never insert any nodes).
assert((ce & 0xc0) == 0);
- int index = findOrInsertNodeForPrimary(ce >>> 32 );
+ int index = findOrInsertNodeForPrimary(ce >>> 32);
if(strength >= Collator.SECONDARY) {
int lower32 = (int)ce;
index = findOrInsertWeakNode(index, lower32 >>> 16, Collator.SECONDARY);
/** Finds or inserts the node for a secondary or tertiary weight. */
private int findOrInsertWeakNode(int index, int weight16, int level) {
assert(0 <= index && index < nodes.size());
+ assert(Collator.SECONDARY <= level && level <= Collator.TERTIARY);
- assert(weight16 == 0 || weight16 >= Collation.COMMON_WEIGHT16);
- // Only reset-before inserts common weights.
if(weight16 == Collation.COMMON_WEIGHT16) {
return findCommonNode(index, level);
}
+
+ // If this will be the first below-common weight for the parent node,
+ // then we will also need to insert a common weight after it.
+ long node = nodes.elementAti(index);
+ assert(strengthFromNode(node) < level); // parent node is stronger
+ if(weight16 != 0 && weight16 < Collation.COMMON_WEIGHT16) {
+ int hasThisLevelBefore = level == Collator.SECONDARY ? HAS_BEFORE2 : HAS_BEFORE3;
+ if((node & hasThisLevelBefore) == 0) {
+ // The parent node has an implied level-common weight.
+ long commonNode =
+ nodeFromWeight16(Collation.COMMON_WEIGHT16) | nodeFromStrength(level);
+ if(level == Collator.SECONDARY) {
+ // Move the HAS_BEFORE3 flag from the parent node
+ // to the new secondary common node.
+ commonNode |= node & HAS_BEFORE3;
+ node &= ~(long)HAS_BEFORE3;
+ }
+ nodes.setElementAt(node | hasThisLevelBefore, index);
+ // Insert below-common-weight node.
+ int nextIndex = nextIndexFromNode(node);
+ node = nodeFromWeight16(weight16) | nodeFromStrength(level);
+ index = insertNodeBetween(index, nextIndex, node);
+ // Insert common-weight node.
+ insertNodeBetween(index, nextIndex, commonNode);
+ // Return index of below-common-weight node.
+ return index;
+ }
+ }
+
// Find the root CE's weight for this level.
// Postpone insertion if not found:
// Insert the new root node before the next stronger node,
// or before the next root node with the same strength and a larger weight.
- long node = nodes.elementAti(index);
int nextIndex;
while((nextIndex = nextIndexFromNode(node)) != 0) {
node = nodes.elementAti(nextIndex);
/**
* Finds the node which implies or contains a common=05 weight of the given strength
- * (secondary or tertiary).
+ * (secondary or tertiary), if the current node is stronger.
* Skips weaker nodes and tailored nodes if the current node is stronger
* and is followed by an explicit-common-weight node.
* Always returns the input index if that node is no stronger than the given strength.
index = nextIndexFromNode(node);
node = nodes.elementAti(index);
assert(!isTailoredNode(node) && strengthFromNode(node) == strength &&
- weight16FromNode(node) == Collation.BEFORE_WEIGHT16);
+ weight16FromNode(node) < Collation.COMMON_WEIGHT16);
// Skip to the explicit common node.
do {
index = nextIndexFromNode(node);
node = nodes.elementAti(index);
assert(strengthFromNode(node) >= strength);
- } while(isTailoredNode(node) || strengthFromNode(node) > strength);
+ } while(isTailoredNode(node) || strengthFromNode(node) > strength ||
+ weight16FromNode(node) < Collation.COMMON_WEIGHT16);
assert(weight16FromNode(node) == Collation.COMMON_WEIGHT16);
return index;
}
CollationWeights secondaries = new CollationWeights();
CollationWeights tertiaries = new CollationWeights();
long[] nodesArray = nodes.getBuffer();
+ if(DEBUG) {
+ System.out.println("\nCollationBuilder.makeTailoredCEs()");
+ }
for(int rpi = 0; rpi < rootPrimaryIndexes.size(); ++rpi) {
int i = rootPrimaryIndexes.elementAti(rpi);
// Gap at the beginning of the tertiary CE range.
t = rootElements.getTertiaryBoundary() - 0x100;
tLimit = (int)rootElements.getFirstTertiaryCE() & Collation.ONLY_TERTIARY_MASK;
- } else if(t == Collation.BEFORE_WEIGHT16) {
- tLimit = Collation.COMMON_WEIGHT16;
} else if(!pIsTailored && !sIsTailored) {
// p and s are root weights.
tLimit = rootElements.getTertiaryAfter(pIndex, s, t);
+ } else if(t == Collation.BEFORE_WEIGHT16) {
+ tLimit = Collation.COMMON_WEIGHT16;
} else {
// [p, s] is tailored.
assert(t == Collation.COMMON_WEIGHT16);
// Gap at the beginning of the secondary CE range.
s = rootElements.getSecondaryBoundary() - 0x100;
sLimit = (int)(rootElements.getFirstSecondaryCE() >> 16);
- } else if(s == Collation.BEFORE_WEIGHT16) {
- sLimit = Collation.COMMON_WEIGHT16;
} else if(!pIsTailored) {
// p is a root primary.
sLimit = rootElements.getSecondaryAfter(pIndex, s);
+ } else if(s == Collation.BEFORE_WEIGHT16) {
+ sLimit = Collation.COMMON_WEIGHT16;
} else {
// p is a tailored primary.
assert(s == Collation.COMMON_WEIGHT16);
secondaries.initForSecondary();
if(!secondaries.allocWeights(s, sLimit, sCount)) {
// C++ U_BUFFER_OVERFLOW_ERROR
+ if(DEBUG) {
+ System.out.printf("!secondaries.allocWeights(%x, %x, sCount=%d)\n",
+ alignWeightRight(s), alignWeightRight(sLimit),
+ alignWeightRight(sCount));
+ }
throw new UnsupportedOperationException("secondary tailoring gap too small");
}
sIsTailored = true;
/** At most 1M nodes, limited by the 20 bits in node bit fields. */
private static final int MAX_INDEX = 0xfffff;
/**
- * Node bit 6 is set on a primary node if there are tailored nodes
- * with secondary values below the common secondary weight (05),
- * from a reset-secondary-before (&[before 2]).
+ * Node bit 6 is set on a primary node if there are nodes
+ * with secondary values below the common secondary weight (05).
*/
private static final int HAS_BEFORE2 = 0x40;
/**
- * Node bit 5 is set on a primary or secondary node if there are tailored nodes
- * with tertiary values below the common tertiary weight (05),
- * from a reset-tertiary-before (&[before 3]).
+ * Node bit 5 is set on a primary or secondary node if there are nodes
+ * with tertiary values below the common tertiary weight (05).
*/
private static final int HAS_BEFORE3 = 0x20;
/**
* A node of a given strength normally implies "common" weights on weaker levels.
*
* A node with HAS_BEFORE2 must be immediately followed by
- * a secondary node with BEFORE_WEIGHT16, then a secondary tailored node,
+ * a secondary node with an explicit below-common weight, then a secondary tailored node,
* and later an explicit common-secondary node.
- * (&[before 2] resets to the BEFORE_WEIGHT16 node so that
+ * The below-common weight can be a root weight,
+ * or it can be BEFORE_WEIGHT16 for tailoring before an implied common weight
+ * or before the lowest root weight.
+ * (&[before 2] resets to an explicit secondary node so that
* the following addRelation(secondary) tailors right after that.
* If we did not have this node and instead were to reset on the primary node,
* then addRelation(secondary) would skip forward to the the COMMON_WEIGHT16 node.)
*
- * All secondary tailored nodes between these two explicit ones
- * will be assigned lower-than-common secondary weights.
* If the flag is not set, then there are no explicit secondary nodes
* with the common or lower weights.
*
} else {
index = findPrimary(p) + 1;
previousSec = Collation.BEFORE_WEIGHT16;
- sec = Collation.COMMON_WEIGHT16;
+ sec = (int)getFirstSecTerForPrimary(index) >>> 16;
}
assert(s >= sec);
while(s > sec) {
} else {
index = findPrimary(p) + 1;
previousTer = Collation.BEFORE_WEIGHT16;
- secTer = Collation.COMMON_SEC_AND_TER_CE;
+ secTer = getFirstSecTerForPrimary(index);
}
long st = ((long)s << 16) | t;
while(st > secTer) {
/**
* Returns the secondary weight after [p, s] where index=findPrimary(p)
* except use index=0 for p=0.
+ *
+ * <p>Must return a weight for every root [p, s] as well as for every weight
+ * returned by getSecondaryBefore(). If p!=0 then s can be BEFORE_WEIGHT16.
+ *
+ * <p>Exception: [0, 0] is handled by the CollationBuilder:
+ * Both its lower and upper boundaries are special.
*/
int getSecondaryAfter(int index, int s) {
+ long secTer;
int secLimit;
if(index == 0) {
// primary = 0
+ assert(s != 0);
index = (int)elements[IX_FIRST_SECONDARY_INDEX];
+ secTer = elements[index];
// Gap at the end of the secondary CE range.
secLimit = 0x10000;
} else {
assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]);
- ++index;
+ secTer = getFirstSecTerForPrimary(index + 1);
+ // If this is an explicit sec/ter unit, then it will be read once more.
// Gap for secondaries of primary CEs.
secLimit = getSecondaryBoundary();
}
for(;;) {
- long secTer = elements[index];
- if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
int sec = (int)(secTer >> 16);
if(sec > s) { return sec; }
- ++index;
+ secTer = elements[++index];
+ if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
}
}
/**
* Returns the tertiary weight after [p, s, t] where index=findPrimary(p)
* except use index=0 for p=0.
+ *
+ * <p>Must return a weight for every root [p, s, t] as well as for every weight
+ * returned by getTertiaryBefore(). If s!=0 then t can be BEFORE_WEIGHT16.
+ *
+ * <p>Exception: [0, 0, 0] is handled by the CollationBuilder:
+ * Both its lower and upper boundaries are special.
*/
int getTertiaryAfter(int index, int s, int t) {
+ long secTer;
int terLimit;
if(index == 0) {
// primary = 0
if(s == 0) {
+ assert(t != 0);
index = (int)elements[IX_FIRST_TERTIARY_INDEX];
// Gap at the end of the tertiary CE range.
terLimit = 0x4000;
// Gap for tertiaries of primary/secondary CEs.
terLimit = getTertiaryBoundary();
}
+ secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
} else {
assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]);
- ++index;
+ secTer = getFirstSecTerForPrimary(index + 1);
+ // If this is an explicit sec/ter unit, then it will be read once more.
terLimit = getTertiaryBoundary();
}
long st = (((long)s & 0xffffffffL) << 16) | t;
for(;;) {
- long secTer = elements[index];
+ if(secTer > st) {
+ assert((secTer >> 16) == s);
+ return (int)secTer & 0xffff;
+ }
+ secTer = elements[++index];
// No tertiary greater than t for this primary+secondary.
if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
secTer &= ~SEC_TER_DELTA_FLAG;
- if(secTer > st) { return (int)secTer & 0xffff; }
- ++index;
}
}
+ /**
+ * Returns the first secondary & tertiary weights for p where index=findPrimary(p)+1.
+ */
+ private long getFirstSecTerForPrimary(int index) {
+ long secTer = elements[index];
+ if((secTer & SEC_TER_DELTA_FLAG) == 0) {
+ // No sec/ter delta.
+ return Collation.COMMON_SEC_AND_TER_CE;
+ }
+ secTer &= ~SEC_TER_DELTA_FLAG;
+ if(secTer > Collation.COMMON_SEC_AND_TER_CE) {
+ // Implied sec/ter.
+ return Collation.COMMON_SEC_AND_TER_CE;
+ }
+ // Explicit sec/ter below common/common.
+ return secTer;
+ }
+
/**
* Finds the largest index i where elements[i]<=p.
* Requires first primary<=p<0xffffff00 (PRIMARY_SENTINEL).